Skip to content

Commit

Permalink
Implement grouped convolutions (#116)
Browse files Browse the repository at this point in the history
* Add groups_of template parameter

* Add test data for grouped convolutions

* Fix data generation script

* Initial working version for grouped convolutions

* Specialize forward method when groups_of is 1

* Remove extra stuff in CMakeLists.txt

* Clean up conv1d.h

* Restore conditional for dynamic state

* Test different configuration

* Merge column copying logic

* Implement microtcn layer in PyTorch

* Try to implement microtcn

* Make first half of microtcn work

* Add more group tests

* Add .venv to gitignore

* New definition for causal_crop

This definition makes it so that an apparent off-by-one error is
fixed by instead making the `start` index be the difference between
the target and the current length.

For example, given a tensor with 1000 elements, and a target of 970,
the function would instead crop with `x[..., 30:]`, rather than the
previous behaviour of `x[..., 29:999]`.

This seems to be more correct in that the previous behaviour takes
items from index 29 (inclusive) to index 999 (exclusive, so 998).

Meanwhile, the new behaviour makes it so that it's index 30 (inclusive)
until the end, which is the index 999.

* TCNBlock works!!!

* Fix loadLayer to use groups_of

* Bring back old tests

* Add groups_of template parameter

* Add test data for grouped convolutions

* Fix data generation script

* Initial working version for grouped convolutions

* Specialize forward method when groups_of is 1

* Remove extra stuff in CMakeLists.txt

* Clean up conv1d.h

* Restore conditional for dynamic state

* Test different configuration

* Merge column copying logic

* Implement microtcn layer in PyTorch

* Try to implement microtcn

* Make first half of microtcn work

* Add more group tests

* Add .venv to gitignore

* New definition for causal_crop

This definition makes it so that an apparent off-by-one error is
fixed by instead making the `start` index be the difference between
the target and the current length.

For example, given a tensor with 1000 elements, and a target of 970,
the function would instead crop with `x[..., 30:]`, rather than the
previous behaviour of `x[..., 29:999]`.

This seems to be more correct in that the previous behaviour takes
items from index 29 (inclusive) to index 999 (exclusive, so 998).

Meanwhile, the new behaviour makes it so that it's index 30 (inclusive)
until the end, which is the index 999.

* TCNBlock works!!!

* Fix loadLayer to use groups_of

* Bring back old tests

* Tweaks for testing code

* Fixes, Conv1D with groups works for everything except Eigen backend

* Adding Eigen implementation and fixing channel indexing

* Re-add groups test

* Bring back MicroTCN test

* Rename groups_of -> groups

* Also rename groups_of -> groups

---------

Co-authored-by: jatin <[email protected]>
  • Loading branch information
purefunctor and jatinchowdhury18 authored Nov 29, 2023
1 parent 6a2b7b8 commit 1e81449
Show file tree
Hide file tree
Showing 38 changed files with 2,922 additions and 485 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ __pycache__
/docs
.DS_Store
/.idea
/.venv
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.1)
cmake_minimum_required(VERSION 3.5)
project(RTNeural VERSION 1.0.0)
include(cmake/CXXStandard.cmake)

Expand Down
15 changes: 8 additions & 7 deletions RTNeural/ModelT.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,20 +102,21 @@ namespace modelt_detail
}
}

template <typename T, int in_size, int out_size, int kernel_size, int dilation_rate, bool dynamic_state>
void loadLayer(Conv1DT<T, in_size, out_size, kernel_size, dilation_rate, dynamic_state>& conv, int& json_stream_idx, const nlohmann::json& l,
template <typename T, int in_size, int out_size, int kernel_size, int dilation_rate, int groups, bool dynamic_state>
void loadLayer(Conv1DT<T, in_size, out_size, kernel_size, dilation_rate, groups, dynamic_state>& conv, int& json_stream_idx, const nlohmann::json& l,
const std::string& type, int layerDims, bool debug)
{
using namespace json_parser;

debug_print("Layer: " + type, debug);
debug_print(" Dims: " + std::to_string(layerDims), debug);
const auto& weights = l["weights"];
const auto kernel = l["kernel_size"].back().get<int>();
const auto dilation = l["dilation"].back().get<int>();
const auto& l_weights = l["weights"];
const auto l_kernel = l["kernel_size"].back().get<int>();
const auto l_dilation = l["dilation"].back().get<int>();
const auto l_groups = l.value("groups", 1);

if(checkConv1D<T>(conv, type, layerDims, kernel, dilation, debug))
loadConv1D<T>(conv, kernel, dilation, weights);
if(checkConv1D<T>(conv, type, layerDims, l_kernel, l_dilation, l_groups, debug))
loadConv1D<T>(conv, l_kernel, l_dilation, l_weights);

if(!l.contains("activation"))
{
Expand Down
113 changes: 95 additions & 18 deletions RTNeural/conv1d/conv1d.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class Conv1D final : public Layer<T>
* @param kernel_size: the size of the convolution kernel
* @param dilation: the dilation rate to use for dilated convolution
*/
Conv1D(int in_size, int out_size, int kernel_size, int dilation);
Conv1D(int in_size, int out_size, int kernel_size, int dilation, int groups = 1);
Conv1D(std::initializer_list<int> sizes);
Conv1D(const Conv1D& other);
Conv1D& operator=(const Conv1D& other);
Expand All @@ -58,23 +58,50 @@ class Conv1D final : public Layer<T>
// set state pointers to particular columns of the buffer
setStatePointers();

// copy selected columns to a helper variable
for(int k = 0; k < kernel_size; ++k)
if (groups == 1)
{
const auto& col = state[state_ptrs[k]];
std::copy(col, col + Layer<T>::in_size, state_cols[k]);
// copy selected columns to a helper variable
for(int k = 0; k < kernel_size; ++k)
{
const auto& col = state[state_ptrs[k]];
std::copy(col, col + Layer<T>::in_size, state_cols[k]);
}

// perform multi-channel convolution
for(int i = 0; i < Layer<T>::out_size; ++i)
{
h[i] = bias[i];
for(int k = 0; k < kernel_size; ++k)
h[i] = std::inner_product(
weights[i][k],
weights[i][k] + filters_per_group,
state_cols[k],
h[i]);
}
}

// perform multi-channel convolution
for(int i = 0; i < Layer<T>::out_size; ++i)
else
{
h[i] = bias[i];
for(int k = 0; k < kernel_size; ++k)
h[i] = std::inner_product(
weights[i][k],
weights[i][k] + Layer<T>::in_size,
state_cols[k],
h[i]);
// perform multi-channel convolution
for(int i = 0; i < Layer<T>::out_size; ++i)
{
h[i] = bias[i];
const auto ii = ((i / channels_per_group) * filters_per_group);
for(int k = 0; k < kernel_size; ++k)
{
// copy selected columns to a helper variable
const auto& column = state[state_ptrs[k]];

const auto column_begin = column + ii;
const auto column_end = column_begin + filters_per_group;
std::copy(column_begin, column_end, state_cols[k]);

h[i] = std::inner_product(
weights[i][k],
weights[i][k] + filters_per_group,
state_cols[k],
h[i]);
}
}
}

state_ptr = (state_ptr == state_size - 1 ? 0 : state_ptr + 1); // iterate state pointer forwards
Expand All @@ -100,10 +127,16 @@ class Conv1D final : public Layer<T>
/** Returns the convolution dilation rate. */
int getDilationRate() const noexcept { return dilation_rate; }

/** Returns the number of "groups" in the convolution. */
int getGroups() const noexcept { return groups; }

private:
const int dilation_rate;
const int kernel_size;
const int state_size;
const int groups;
const int filters_per_group;
const int channels_per_group;

T*** weights;
T* bias;
Expand Down Expand Up @@ -138,15 +171,20 @@ class Conv1D final : public Layer<T>
* @param kernel_size: the size of the convolution kernel
* @param dilation_rate: the dilation rate to use for dilated convolution
* @param dynamic_state: use dynamically allocated layer state
* @param groups: controls connections between inputs and outputs
*/
template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, bool dynamic_state = false>
template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, int groups = 1, bool dynamic_state = false>
class Conv1DT
{
static_assert((in_sizet % groups == 0) && (out_sizet % groups == 0), "in_size and out_size must be divisible by groups!");

static constexpr auto state_size = (kernel_size - 1) * dilation_rate + 1;

public:
static constexpr auto in_size = in_sizet;
static constexpr auto out_size = out_sizet;
static constexpr auto filters_per_group = in_size / groups;
static constexpr auto channels_per_group = out_size / groups;

Conv1DT();

Expand All @@ -159,6 +197,7 @@ class Conv1DT
/** Resets the layer state. */
void reset();

template<int _groups = groups, std::enable_if_t<_groups == 1, bool> = true>
/** Performs forward propagation for this layer. */
inline void forward(const T (&ins)[in_size]) noexcept
{
Expand Down Expand Up @@ -190,10 +229,45 @@ class Conv1DT
state_ptr = (state_ptr == state_size - 1 ? 0 : state_ptr + 1); // iterate state pointer forwards
}

template<int _groups = groups, std::enable_if_t<_groups != 1, bool> = true>
/** Performs forward propagation for this layer. */
inline void forward(const T (&ins)[in_size]) noexcept
{
// insert input into a circular buffer
std::copy(std::begin(ins), std::end(ins), state[state_ptr].begin());

// set state pointers to particular columns of the buffer
setStatePointers();

// perform multi-channel convolution
for(int i = 0; i < out_size; ++i)
{
outs[i] = bias[i];

const auto ii = ((i / channels_per_group) * filters_per_group);
for(int k = 0; k < kernel_size; ++k)
{
// copy selected columns to a helper variable
const auto& column = state[state_ptrs[k]];
const auto column_begin = column.begin() + ii;
const auto column_end = column_begin + filters_per_group;
std::copy(column_begin, column_end, state_cols[k].begin());

outs[i] = std::inner_product(
weights[i][k].begin(),
weights[i][k].end(),
state_cols[k].begin(),
outs[i]);
}
}

state_ptr = (state_ptr == state_size - 1 ? 0 : state_ptr + 1); // iterate state pointer forwards
}

/**
* Sets the layer weights.
*
* The weights vector must have size weights[out_size][in_size][kernel_size * dilation]
* The weights vector must have size weights[out_size][group_count][kernel_size * dilation]
*/
void setWeights(const std::vector<std::vector<std::vector<T>>>& weights);

Expand All @@ -210,6 +284,9 @@ class Conv1DT
/** Returns the convolution dilation rate. */
int getDilationRate() const noexcept { return dilation_rate; }

/** Returns the number of "groups" in the convolution. */
int getGroups() const noexcept { return groups; }

T outs alignas(RTNEURAL_DEFAULT_ALIGNMENT)[out_size];

private:
Expand All @@ -223,7 +300,7 @@ class Conv1DT
typename std::enable_if<!DS, void>::type resize_state() { }

using state_type = typename std::conditional<dynamic_state, std::vector<std::array<T, in_size>>, std::array<std::array<T, in_size>, state_size>>::type;
using weights_type = std::array<std::array<T, in_size>, kernel_size>;
using weights_type = std::array<std::array<T, filters_per_group>, kernel_size>;

alignas(RTNEURAL_DEFAULT_ALIGNMENT) state_type state;
alignas(RTNEURAL_DEFAULT_ALIGNMENT) weights_type state_cols;
Expand Down
37 changes: 20 additions & 17 deletions RTNeural/conv1d/conv1d.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,23 @@ namespace RTNEURAL_NAMESPACE
#if !RTNEURAL_USE_EIGEN && !RTNEURAL_USE_XSIMD

template <typename T>
Conv1D<T>::Conv1D(int in_size, int out_size, int kernel_size, int dilation)
Conv1D<T>::Conv1D(int in_size, int out_size, int kernel_size, int dilation, int num_groups)
: Layer<T>(in_size, out_size)
, dilation_rate(dilation)
, kernel_size(kernel_size)
, state_size((kernel_size - 1) * dilation + 1)
, groups(num_groups)
, filters_per_group(in_size / groups)
, channels_per_group(out_size / groups)
{
weights = new T**[out_size];
for(int i = 0; i < out_size; ++i)
{
weights[i] = new T*[kernel_size];
for(int k = 0; k < kernel_size; ++k)
{
weights[i][k] = new T[in_size];
std::fill(weights[i][k], weights[i][k] + in_size, (T)0);
weights[i][k] = new T[filters_per_group];
std::fill(weights[i][k], weights[i][k] + filters_per_group, (T)0);
}
}

Expand All @@ -31,7 +34,7 @@ Conv1D<T>::Conv1D(int in_size, int out_size, int kernel_size, int dilation)

state_cols = new T*[kernel_size];
for(int k = 0; k < kernel_size; ++k)
state_cols[k] = new T[in_size];
state_cols[k] = new T[filters_per_group];

state_ptrs = new int[kernel_size];
}
Expand Down Expand Up @@ -89,7 +92,7 @@ void Conv1D<T>::reset()
std::fill(state[k], state[k] + Layer<T>::in_size, (T)0);

for(int k = 0; k < kernel_size; ++k)
std::fill(state_cols[k], state_cols[k] + Layer<T>::in_size, (T)0);
std::fill(state_cols[k], state_cols[k] + filters_per_group, (T)0);

for(int k = 0; k < kernel_size; ++k)
state_ptrs[k] = 0;
Expand All @@ -101,7 +104,7 @@ template <typename T>
void Conv1D<T>::setWeights(const std::vector<std::vector<std::vector<T>>>& ws)
{
for(int i = 0; i < Layer<T>::out_size; ++i)
for(int k = 0; k < Layer<T>::in_size; ++k)
for(int k = 0; k < filters_per_group; ++k)
for(int j = 0; j < kernel_size; ++j)
weights[i][j][k] = ws[i][k][j];
}
Expand All @@ -114,12 +117,12 @@ void Conv1D<T>::setBias(const std::vector<T>& biasVals)
}

//====================================================
template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, bool dynamic_state>
Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, dynamic_state>::Conv1DT()
template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, int groups, bool dynamic_state>
Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, groups, dynamic_state>::Conv1DT()
{
for(int i = 0; i < out_size; ++i)
for(int j = 0; j < kernel_size; ++j)
for(int k = 0; k < in_size; ++k)
for(int k = 0; k < filters_per_group; ++k)
weights[i][j][k] = (T)0.0;

for(int i = 0; i < out_size; ++i)
Expand All @@ -132,33 +135,33 @@ Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, dynamic_state>::Conv
reset();
}

template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, bool dynamic_state>
void Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, dynamic_state>::reset()
template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, int groups, bool dynamic_state>
void Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, groups, dynamic_state>::reset()
{
for(int i = 0; i < state_size; ++i)
for(int k = 0; k < in_size; ++k)
state[i][k] = (T)0.0;

for(int i = 0; i < kernel_size; ++i)
for(int k = 0; k < in_size; ++k)
for(int k = 0; k < filters_per_group; ++k)
state_cols[i][k] = (T)0.0;

state_ptr = 0;
for(int i = 0; i < kernel_size; ++i)
state_ptrs[i] = 0;
}

template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, bool dynamic_state>
void Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, dynamic_state>::setWeights(const std::vector<std::vector<std::vector<T>>>& ws)
template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, int groups, bool dynamic_state>
void Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, groups, dynamic_state>::setWeights(const std::vector<std::vector<std::vector<T>>>& ws)
{
for(int i = 0; i < out_size; ++i)
for(int k = 0; k < in_size; ++k)
for(int k = 0; k < filters_per_group; ++k)
for(int j = 0; j < kernel_size; ++j)
weights[i][j][k] = ws[i][k][j];
}

template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, bool dynamic_state>
void Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, dynamic_state>::setBias(const std::vector<T>& biasVals)
template <typename T, int in_sizet, int out_sizet, int kernel_size, int dilation_rate, int groups, bool dynamic_state>
void Conv1DT<T, in_sizet, out_sizet, kernel_size, dilation_rate, groups, dynamic_state>::setBias(const std::vector<T>& biasVals)
{
for(int i = 0; i < out_size; ++i)
bias[i] = biasVals[i];
Expand Down
Loading

0 comments on commit 1e81449

Please sign in to comment.