Implement grouped convolutions (#116)

* Add groups_of template parameter * Add test data for grouped convolutions * Fix data generation script * Initial working version for grouped convolutions * Specialize forward method when groups_of is 1 * Remove extra stuff in CMakeLists.txt * Clean up conv1d.h * Restore conditional for dynamic state * Test different configuration * Merge column copying logic * Implement microtcn layer in PyTorch * Try to implement microtcn * Make first half of microtcn work * Add more group tests * Add .venv to gitignore * New definition for causal_crop This definition makes it so that an apparent off-by-one error is fixed by instead making the `start` index be the difference between the target and the current length. For example, given a tensor with 1000 elements, and a target of 970, the function would instead crop with `x[..., 30:]`, rather than the previous behaviour of `x[..., 29:999]`. This seems to be more correct in that the previous behaviour takes items from index 29 (inclusive) to index 999 (exclusive, so 998). Meanwhile, the new behaviour makes it so that it's index 30 (inclusive) until the end, which is the index 999. * TCNBlock works!!! * Fix loadLayer to use groups_of * Bring back old tests * Add groups_of template parameter * Add test data for grouped convolutions * Fix data generation script * Initial working version for grouped convolutions * Specialize forward method when groups_of is 1 * Remove extra stuff in CMakeLists.txt * Clean up conv1d.h * Restore conditional for dynamic state * Test different configuration * Merge column copying logic * Implement microtcn layer in PyTorch * Try to implement microtcn * Make first half of microtcn work * Add more group tests * Add .venv to gitignore * New definition for causal_crop This definition makes it so that an apparent off-by-one error is fixed by instead making the `start` index be the difference between the target and the current length. For example, given a tensor with 1000 elements, and a target of 970, the function would instead crop with `x[..., 30:]`, rather than the previous behaviour of `x[..., 29:999]`. This seems to be more correct in that the previous behaviour takes items from index 29 (inclusive) to index 999 (exclusive, so 998). Meanwhile, the new behaviour makes it so that it's index 30 (inclusive) until the end, which is the index 999. * TCNBlock works!!! * Fix loadLayer to use groups_of * Bring back old tests * Tweaks for testing code * Fixes, Conv1D with groups works for everything except Eigen backend * Adding Eigen implementation and fixing channel indexing * Re-add groups test * Bring back MicroTCN test * Rename groups_of -> groups * Also rename groups_of -> groups --------- Co-authored-by: jatin <[email protected]>
jatinchowdhury18 · Nov 29, 2023 · 1e81449 · 1e81449
1 parent 6a2b7b8
commit 1e81449
Show file tree

Hide file tree

Showing 38 changed files with 2,922 additions and 485 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,4 @@ __pycache__
 /docs
 .DS_Store
 /.idea
+/.venv
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.1)
+cmake_minimum_required(VERSION 3.5)
 project(RTNeural VERSION 1.0.0)
 include(cmake/CXXStandard.cmake)
 

diff --git a/RTNeural/ModelT.h b/RTNeural/ModelT.h
@@ -102,20 +102,21 @@ namespace modelt_detail
         }
     }
 
-    template <typename T, int in_size, int out_size, int kernel_size, int dilation_rate, bool dynamic_state>
-    void loadLayer(Conv1DT<T, in_size, out_size, kernel_size, dilation_rate, dynamic_state>& conv, int& json_stream_idx, const nlohmann::json& l,
+    template <typename T, int in_size, int out_size, int kernel_size, int dilation_rate, int groups, bool dynamic_state>
+    void loadLayer(Conv1DT<T, in_size, out_size, kernel_size, dilation_rate, groups, dynamic_state>& conv, int& json_stream_idx, const nlohmann::json& l,
         const std::string& type, int layerDims, bool debug)
     {
         using namespace json_parser;
 
         debug_print("Layer: " + type, debug);
         debug_print("  Dims: " + std::to_string(layerDims), debug);
-        const auto& weights = l["weights"];
-        const auto kernel = l["kernel_size"].back().get<int>();
-        const auto dilation = l["dilation"].back().get<int>();
+        const auto& l_weights = l["weights"];
+        const auto l_kernel = l["kernel_size"].back().get<int>();
+        const auto l_dilation = l["dilation"].back().get<int>();
+        const auto l_groups = l.value("groups", 1);
 
-        if(checkConv1D<T>(conv, type, layerDims, kernel, dilation, debug))
-            loadConv1D<T>(conv, kernel, dilation, weights);
+        if(checkConv1D<T>(conv, type, layerDims, l_kernel, l_dilation, l_groups, debug))
+            loadConv1D<T>(conv, l_kernel, l_dilation, l_weights);
 
         if(!l.contains("activation"))
         {

diff --git a/RTNeural/conv1d/conv1d.h b/RTNeural/conv1d/conv1d.h
@@ -37,7 +37,7 @@ class Conv1D final : public Layer<T>
      * @param kernel_size: the size of the convolution kernel
      * @param dilation: the dilation rate to use for dilated convolution
      */
-    Conv1D(int in_size, int out_size, int kernel_size, int dilation);
+    Conv1D(int in_size, int out_size, int kernel_size, int dilation, int groups = 1);
     Conv1D(std::initializer_list<int> sizes);
     Conv1D(const Conv1D& other);
     Conv1D& operator=(const Conv1D& other);
@@ -58,23 +58,50 @@ class Conv1D final : public Layer<T>
         // set state pointers to particular columns of the buffer
         setStatePointers();
 
-        // copy selected columns to a helper variable
-        for(int k = 0; k < kernel_size; ++k)
+        if (groups == 1)
         {
-            const auto& col = state[state_ptrs[k]];
-            std::copy(col, col + Layer<T>::in_size, state_cols[k]);
+            // copy selected columns to a helper variable
+            for(int k = 0; k < kernel_size; ++k)
+            {
+                const auto& col = state[state_ptrs[k]];
+                std::copy(col, col + Layer<T>::in_size, state_cols[k]);
+            }
+
+            // perform multi-channel convolution
+            for(int i = 0; i < Layer<T>::out_size; ++i)
+            {
+                h[i] = bias[i];
+                for(int k = 0; k < kernel_size; ++k)
+                    h[i] = std::inner_product(
+                        weights[i][k],
+                        weights[i][k] + filters_per_group,
+                        state_cols[k],
+                        h[i]);
+            }
         }
-
-        // perform multi-channel convolution
-        for(int i = 0; i < Layer<T>::out_size; ++i)
+        else
         {
-            h[i] = bias[i];
-            for(int k = 0; k < kernel_size; ++k)
-                h[i] = std::inner_product(
-                    weights[i][k],
-                    weights[i][k] + Layer<T>::in_size,
-                    state_cols[k],
-                    h[i]);
+            // perform multi-channel convolution
+            for(int i = 0; i < Layer<T>::out_size; ++i)
+            {
+                h[i] = bias[i];
+                const auto ii = ((i / channels_per_group) * filters_per_group);
+                for(int k = 0; k < kernel_size; ++k)
+                {
+                    // copy selected columns to a helper variable
+                    const auto& column = state[state_ptrs[k]];
+
+                    const auto column_begin = column + ii;
+                    const auto column_end = column_begin + filters_per_group;
+                    std::copy(column_begin, column_end, state_cols[k]);
+
+                    h[i] = std::inner_product(
+                        weights[i][k],
+                        weights[i][k] + filters_per_group,
+                        state_cols[k],
+                        h[i]);
+                }
+            }
         }
 
         state_ptr = (state_ptr == state_size - 1 ? 0 : state_ptr + 1); // iterate state pointer forwards
@@ -100,10 +127,16 @@ class Conv1D final : public Layer<T>
     /** Returns the convolution dilation rate. */
     int getDilationRate() const noexcept { return dilation_rate; }
 
+    /** Returns the number of "groups" in the convolution. */
+    int getGroups() const noexcept { return groups; }
+
 private:
     const int dilation_rate;
     const int kernel_size;
     const int state_size;
+    const int groups;
+    const int filters_per_group;
+    const int channels_per_group;
 
     T*** weights;
     T* bias;
@@ -138,15 +171,20 @@ class Conv1D final : public Layer<T>
  * @param kernel_size: the size of the convolution kernel
  * @param dilation_rate: the dilation rate to use for dilated convolution
  * @param dynamic_state: use dynamically allocated layer state
+ * @param groups: controls connections between inputs and outputs
  */
-template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, bool dynamic_state = false>
+template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, int groups = 1, bool dynamic_state = false>
 class Conv1DT
 {
+    static_assert((in_sizet % groups == 0) && (out_sizet % groups == 0), "in_size and out_size must be divisible by groups!");
+
     static constexpr auto state_size = (kernel_size - 1) * dilation_rate + 1;
 
 public:
     static constexpr auto in_size = in_sizet;
     static constexpr auto out_size = out_sizet;
+    static constexpr auto filters_per_group = in_size / groups;
+    static constexpr auto channels_per_group = out_size / groups;
 
     Conv1DT();
 
@@ -159,6 +197,7 @@ class Conv1DT
     /** Resets the layer state. */
     void reset();
 
+    template<int _groups = groups, std::enable_if_t<_groups == 1, bool> = true>
     /** Performs forward propagation for this layer. */
     inline void forward(const T (&ins)[in_size]) noexcept
     {
@@ -190,10 +229,45 @@ class Conv1DT
         state_ptr = (state_ptr == state_size - 1 ? 0 : state_ptr + 1); // iterate state pointer forwards
     }
 
+    template<int _groups = groups, std::enable_if_t<_groups != 1, bool> = true>
+    /** Performs forward propagation for this layer. */
+    inline void forward(const T (&ins)[in_size]) noexcept
+    {
+        // insert input into a circular buffer
+        std::copy(std::begin(ins), std::end(ins), state[state_ptr].begin());
+
+        // set state pointers to particular columns of the buffer
+        setStatePointers();
+
+        // perform multi-channel convolution
+        for(int i = 0; i < out_size; ++i)
+        {
+            outs[i] = bias[i];
+
+            const auto ii = ((i / channels_per_group) * filters_per_group);
+            for(int k = 0; k < kernel_size; ++k)
+            {
+                // copy selected columns to a helper variable
+                const auto& column = state[state_ptrs[k]];
+                const auto column_begin = column.begin() + ii;
+                const auto column_end = column_begin + filters_per_group;
+                std::copy(column_begin, column_end, state_cols[k].begin());
+
+                outs[i] = std::inner_product(
+                    weights[i][k].begin(),
+                    weights[i][k].end(),
+                    state_cols[k].begin(),
+                    outs[i]);
+            }
+        }
+
+        state_ptr = (state_ptr == state_size - 1 ? 0 : state_ptr + 1); // iterate state pointer forwards
+    }
+
     /**
      * Sets the layer weights.
      *
-     * The weights vector must have size weights[out_size][in_size][kernel_size * dilation]
+     * The weights vector must have size weights[out_size][group_count][kernel_size * dilation]
      */
     void setWeights(const std::vector<std::vector<std::vector<T>>>& weights);
 
@@ -210,6 +284,9 @@ class Conv1DT
     /** Returns the convolution dilation rate. */
     int getDilationRate() const noexcept { return dilation_rate; }
 
+    /** Returns the number of "groups" in the convolution. */
+    int getGroups() const noexcept { return groups; }
+
     T outs alignas(RTNEURAL_DEFAULT_ALIGNMENT)[out_size];
 
 private:
@@ -223,7 +300,7 @@ class Conv1DT
     typename std::enable_if<!DS, void>::type resize_state() { }
 
     using state_type = typename std::conditional<dynamic_state, std::vector<std::array<T, in_size>>, std::array<std::array<T, in_size>, state_size>>::type;
-    using weights_type = std::array<std::array<T, in_size>, kernel_size>;
+    using weights_type = std::array<std::array<T, filters_per_group>, kernel_size>;
 
     alignas(RTNEURAL_DEFAULT_ALIGNMENT) state_type state;
     alignas(RTNEURAL_DEFAULT_ALIGNMENT) weights_type state_cols;

diff --git a/RTNeural/conv1d/conv1d.tpp b/RTNeural/conv1d/conv1d.tpp
@@ -6,20 +6,23 @@ namespace RTNEURAL_NAMESPACE
 #if !RTNEURAL_USE_EIGEN && !RTNEURAL_USE_XSIMD
 
 template <typename T>
-Conv1D<T>::Conv1D(int in_size, int out_size, int kernel_size, int dilation)
+Conv1D<T>::Conv1D(int in_size, int out_size, int kernel_size, int dilation, int num_groups)
     : Layer<T>(in_size, out_size)
     , dilation_rate(dilation)
     , kernel_size(kernel_size)
     , state_size((kernel_size - 1) * dilation + 1)
+    , groups(num_groups)
+    , filters_per_group(in_size / groups)
+    , channels_per_group(out_size / groups)
 {
     weights = new T**[out_size];
     for(int i = 0; i < out_size; ++i)
     {
         weights[i] = new T*[kernel_size];
         for(int k = 0; k < kernel_size; ++k)
         {
-            weights[i][k] = new T[in_size];
-            std::fill(weights[i][k], weights[i][k] + in_size, (T)0);
+            weights[i][k] = new T[filters_per_group];
+            std::fill(weights[i][k], weights[i][k] + filters_per_group, (T)0);
         }
     }
 
@@ -31,7 +34,7 @@ Conv1D<T>::Conv1D(int in_size, int out_size, int kernel_size, int dilation)
 
     state_cols = new T*[kernel_size];
     for(int k = 0; k < kernel_size; ++k)
-        state_cols[k] = new T[in_size];
+        state_cols[k] = new T[filters_per_group];
 
     state_ptrs = new int[kernel_size];
 }
@@ -89,7 +92,7 @@ void Conv1D<T>::reset()
         std::fill(state[k], state[k] + Layer<T>::in_size, (T)0);
 
     for(int k = 0; k < kernel_size; ++k)
-        std::fill(state_cols[k], state_cols[k] + Layer<T>::in_size, (T)0);
+        std::fill(state_cols[k], state_cols[k] + filters_per_group, (T)0);
 
     for(int k = 0; k < kernel_size; ++k)
         state_ptrs[k] = 0;
@@ -101,7 +104,7 @@ template <typename T>
 void Conv1D<T>::setWeights(const std::vector<std::vector<std::vector<T>>>& ws)
 {
     for(int i = 0; i < Layer<T>::out_size; ++i)
-        for(int k = 0; k < Layer<T>::in_size; ++k)
+        for(int k = 0; k < filters_per_group; ++k)
             for(int j = 0; j < kernel_size; ++j)
                 weights[i][j][k] = ws[i][k][j];
 }
@@ -114,12 +117,12 @@ void Conv1D<T>::setBias(const std::vector<T>& biasVals)
 }
 
 //====================================================
-template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, bool dynamic_state>
-Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, dynamic_state>::Conv1DT()
+template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, int groups, bool dynamic_state>
+Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, groups, dynamic_state>::Conv1DT()
 {
     for(int i = 0; i < out_size; ++i)
         for(int j = 0; j < kernel_size; ++j)
-            for(int k = 0; k < in_size; ++k)
+            for(int k = 0; k < filters_per_group; ++k)
                 weights[i][j][k] = (T)0.0;
 
     for(int i = 0; i < out_size; ++i)
@@ -132,33 +135,33 @@ Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, dynamic_state>::Conv
     reset();
 }
 
-template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, bool dynamic_state>
-void Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, dynamic_state>::reset()
+template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, int groups, bool dynamic_state>
+void Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, groups, dynamic_state>::reset()
 {
     for(int i = 0; i < state_size; ++i)
         for(int k = 0; k < in_size; ++k)
             state[i][k] = (T)0.0;
 
     for(int i = 0; i < kernel_size; ++i)
-        for(int k = 0; k < in_size; ++k)
+        for(int k = 0; k < filters_per_group; ++k)
             state_cols[i][k] = (T)0.0;
 
     state_ptr = 0;
     for(int i = 0; i < kernel_size; ++i)
         state_ptrs[i] = 0;
 }
 
-template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, bool dynamic_state>
-void Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, dynamic_state>::setWeights(const std::vector<std::vector<std::vector<T>>>& ws)
+template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, int groups, bool dynamic_state>
+void Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, groups, dynamic_state>::setWeights(const std::vector<std::vector<std::vector<T>>>& ws)
 {
     for(int i = 0; i < out_size; ++i)
-        for(int k = 0; k < in_size; ++k)
+        for(int k = 0; k < filters_per_group; ++k)
             for(int j = 0; j < kernel_size; ++j)
                 weights[i][j][k] = ws[i][k][j];
 }
 
-template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, bool dynamic_state>
-void Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, dynamic_state>::setBias(const std::vector<T>& biasVals)
+template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, int groups, bool dynamic_state>
+void Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, groups, dynamic_state>::setBias(const std::vector<T>& biasVals)
 {
     for(int i = 0; i < out_size; ++i)
         bias[i] = biasVals[i];
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,3 +5,4 @@ __pycache__ @@
     /docs
     .DS_Store
     /.idea
+    /.venv