From f87ead3d882cfae96d16564e9f6de2ac44e30257 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=B0=E9=98=85?= <43716063+Baiyuetribe@users.noreply.github.com> Date: Sat, 11 Jan 2025 22:19:30 +0800 Subject: [PATCH 01/14] init flip --- docs/developer-guide/operators.md | 2307 ++++++++++++----------- src/CMakeLists.txt | 1 + src/layer/flip.cpp | 41 + src/layer/flip.h | 37 + tools/pnnx/src/CMakeLists.txt | 1 + tools/pnnx/src/pass_ncnn/torch_flip.cpp | 56 + 6 files changed, 1349 insertions(+), 1094 deletions(-) create mode 100644 src/layer/flip.cpp create mode 100644 src/layer/flip.h create mode 100644 tools/pnnx/src/pass_ncnn/torch_flip.cpp diff --git a/docs/developer-guide/operators.md b/docs/developer-guide/operators.md index 10fe1f03f0f..745043e4789 100644 --- a/docs/developer-guide/operators.md +++ b/docs/developer-guide/operators.md @@ -1,168 +1,177 @@ - -* [AbsVal](#absval) -* [ArgMax](#argmax) -* [BatchNorm](#batchnorm) -* [Bias](#bias) -* [BinaryOp](#binaryop) -* [BNLL](#bnll) -* [Cast](#cast) -* [CELU](#celu) -* [Clip](#clip) -* [Concat](#concat) -* [Convolution](#convolution) -* [Convolution1D](#convolution1d) -* [Convolution3D](#convolution3d) -* [ConvolutionDepthWise](#convolutiondepthwise) -* [ConvolutionDepthWise1D](#convolutiondepthwise1d) -* [ConvolutionDepthWise3D](#convolutiondepthwise3d) -* [CopyTo](#copyto) -* [Crop](#crop) -* [CumulativeSum](#cumulativesum) -* [Deconvolution](#deconvolution) -* [Deconvolution1D](#deconvolution1d) -* [Deconvolution3D](#deconvolution3d) -* [DeconvolutionDepthWise](#deconvolutiondepthwise) -* [DeconvolutionDepthWise1D](#deconvolutiondepthwise1d) -* [DeconvolutionDepthWise3D](#deconvolutiondepthwise3d) -* [DeformableConv2D](#deformableconv2d) -* [Dequantize](#dequantize) -* [Diag](#diag) -* [Dropout](#dropout) -* [Eltwise](#eltwise) -* [ELU](#elu) -* [Embed](#embed) -* [Exp](#exp) -* [Flatten](#flatten) -* [Fold](#fold) -* [GELU](#gelu) -* [GLU](#glu) -* [Gemm](#gemm) -* [GridSample](#gridsample) -* [GroupNorm](#groupnorm) -* [GRU](#gru) -* [HardSigmoid](#hardsigmoid) -* [HardSwish](#hardswish) -* [InnerProduct](#innerproduct) -* [Input](#input) -* [InstanceNorm](#instancenorm) -* [Interp](#interp) -* [InverseSpectrogram](#inversespectrogram) -* [LayerNorm](#layernorm) -* [Log](#log) -* [LRN](#lrn) -* [LSTM](#lstm) -* [MemoryData](#memorydata) -* [Mish](#mish) -* [MultiHeadAttention](#multiheadattention) -* [MVN](#mvn) -* [Noop](#noop) -* [Normalize](#normalize) -* [Packing](#packing) -* [Padding](#padding) -* [Permute](#permute) -* [PixelShuffle](#pixelshuffle) -* [Pooling](#pooling) -* [Pooling1D](#pooling1d) -* [Pooling3D](#pooling3d) -* [Power](#power) -* [PReLU](#prelu) -* [Quantize](#quantize) -* [Reduction](#reduction) -* [ReLU](#relu) -* [Reorg](#reorg) -* [Requantize](#requantize) -* [Reshape](#reshape) -* [RMSNorm](#rmsnorm) -* [RNN](#rnn) -* [Scale](#scale) -* [SELU](#selu) -* [Shrink](#shrink) -* [ShuffleChannel](#shufflechannel) -* [Sigmoid](#sigmoid) -* [Slice](#slice) -* [Softmax](#softmax) -* [Softplus](#softplus) -* [Spectrogram](#spectrogram) -* [Split](#split) -* [Swish](#swish) -* [TanH](#tanh) -* [Threshold](#threshold) -* [Tile](#tile) -* [UnaryOp](#unaryop) -* [Unfold](#unfold) +- [AbsVal](#absval) +- [ArgMax](#argmax) +- [BatchNorm](#batchnorm) +- [Bias](#bias) +- [BinaryOp](#binaryop) +- [BNLL](#bnll) +- [Cast](#cast) +- [CELU](#celu) +- [Clip](#clip) +- [Concat](#concat) +- [Convolution](#convolution) +- [Convolution1D](#convolution1d) +- [Convolution3D](#convolution3d) +- [ConvolutionDepthWise](#convolutiondepthwise) +- [ConvolutionDepthWise1D](#convolutiondepthwise1d) +- [ConvolutionDepthWise3D](#convolutiondepthwise3d) +- [CopyTo](#copyto) +- [Crop](#crop) +- [CumulativeSum](#cumulativesum) +- [Deconvolution](#deconvolution) +- [Deconvolution1D](#deconvolution1d) +- [Deconvolution3D](#deconvolution3d) +- [DeconvolutionDepthWise](#deconvolutiondepthwise) +- [DeconvolutionDepthWise1D](#deconvolutiondepthwise1d) +- [DeconvolutionDepthWise3D](#deconvolutiondepthwise3d) +- [DeformableConv2D](#deformableconv2d) +- [Dequantize](#dequantize) +- [Diag](#diag) +- [Dropout](#dropout) +- [Eltwise](#eltwise) +- [ELU](#elu) +- [Embed](#embed) +- [Exp](#exp) +- [Flatten](#flatten) +- [Flip](#flip) +- [Fold](#fold) +- [GELU](#gelu) +- [GLU](#glu) +- [Gemm](#gemm) +- [GridSample](#gridsample) +- [GroupNorm](#groupnorm) +- [GRU](#gru) +- [HardSigmoid](#hardsigmoid) +- [HardSwish](#hardswish) +- [InnerProduct](#innerproduct) +- [Input](#input) +- [InstanceNorm](#instancenorm) +- [Interp](#interp) +- [InverseSpectrogram](#inversespectrogram) +- [LayerNorm](#layernorm) +- [Log](#log) +- [LRN](#lrn) +- [LSTM](#lstm) +- [MemoryData](#memorydata) +- [Mish](#mish) +- [MultiHeadAttention](#multiheadattention) +- [MVN](#mvn) +- [Noop](#noop) +- [Normalize](#normalize) +- [Packing](#packing) +- [Padding](#padding) +- [Permute](#permute) +- [PixelShuffle](#pixelshuffle) +- [Pooling](#pooling) +- [Pooling1D](#pooling1d) +- [Pooling3D](#pooling3d) +- [Power](#power) +- [PReLU](#prelu) +- [Quantize](#quantize) +- [Reduction](#reduction) +- [ReLU](#relu) +- [Reorg](#reorg) +- [Requantize](#requantize) +- [Reshape](#reshape) +- [RMSNorm](#rmsnorm) +- [RNN](#rnn) +- [Scale](#scale) +- [SELU](#selu) +- [Shrink](#shrink) +- [ShuffleChannel](#shufflechannel) +- [Sigmoid](#sigmoid) +- [Slice](#slice) +- [Softmax](#softmax) +- [Softplus](#softplus) +- [Spectrogram](#spectrogram) +- [Split](#split) +- [Swish](#swish) +- [TanH](#tanh) +- [Threshold](#threshold) +- [Tile](#tile) +- [UnaryOp](#unaryop) +- [Unfold](#unfold) # AbsVal + ``` y = abs(x) ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace # ArgMax + ``` y = argmax(x, out_max_val, topk) ``` -* one_blob_only +- one_blob_only -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | out_max_val | int | 0 | | -| 1 | topk | int | 1 | | +| param id | name | type | default | description | +| -------- | ----------- | ---- | ------- | ----------- | +| 0 | out_max_val | int | 0 | | +| 1 | topk | int | 1 | | # BatchNorm + ``` y = (x - mean) / sqrt(var + eps) * slope + bias ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | channels | int | 0 | | -| 1 | eps | float | 0.f | | +| param id | name | type | default | description | +| -------- | -------- | ----- | ------- | ----------- | +| 0 | channels | int | 0 | | +| 1 | eps | float | 0.f | | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| slope_data | float | [channels] | -| mean_data | float | [channels] | -| var_data | float | [channels] | -| bias_data | float | [channels] | +| weight | type | shape | +| ---------- | ----- | ---------- | +| slope_data | float | [channels] | +| mean_data | float | [channels] | +| var_data | float | [channels] | +| bias_data | float | [channels] | # Bias + ``` y = x + bias ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | bias_data_size| int | 0 | | +| param id | name | type | default | description | +| -------- | -------------- | ---- | ------- | ----------- | +| 0 | bias_data_size | int | 0 | | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| bias_data | float | [channels] | +| weight | type | shape | +| --------- | ----- | ---------- | +| bias_data | float | [channels] | # BinaryOp - This operation is used for binary computation, and the calculation rule depends on the [broadcasting rule](https://github.com/Tencent/ncnn/wiki/binaryop-broadcasting). + +This operation is used for binary computation, and the calculation rule depends on the [broadcasting rule](https://github.com/Tencent/ncnn/wiki/binaryop-broadcasting). + ``` C = binaryop(A, B) ``` + if with_scalar = 1: + - one_blob_only - support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | op_type | int | 0 | Operation type as follows | -| 1 | with_scalar | int | 0 | with_scalar=0 B is a matrix, with_scalar=1 B is a scalar | -| 2 | b | float | 0.f | When B is a scalar, B = b | +| param id | name | type | default | description | +| -------- | ----------- | ----- | ------- | -------------------------------------------------------- | +| 0 | op_type | int | 0 | Operation type as follows | +| 1 | with_scalar | int | 0 | with_scalar=0 B is a matrix, with_scalar=1 B is a scalar | +| 2 | b | float | 0.f | When B is a scalar, B = b | Operation type: + - 0 = ADD - 1 = SUB - 2 = MUL @@ -177,28 +186,31 @@ Operation type: - 11 = RATAN2 # BNLL + ``` y = log(1 + e^(-x)) , x > 0 y = log(1 + e^x), x < 0 ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace # Cast + ``` y = cast(x) ``` -* one_blob_only -* support_packing +- one_blob_only +- support_packing -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | type_from | int | 0 | | -| 1 | type_to | int | 0 | | +| param id | name | type | default | description | +| -------- | --------- | ---- | ------- | ----------- | +| 0 | type_from | int | 0 | | +| 1 | type_to | int | 0 | | Element type: + - 0 = auto - 1 = float32 - 2 = float16 @@ -206,293 +218,304 @@ Element type: - 4 = bfloat16 # CELU + ``` if x < 0 y = (exp(x / alpha) - 1.f) * alpha else y = x ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | alpha | float | 1.f | | +| param id | name | type | default | description | +| -------- | ----- | ----- | ------- | ----------- | +| 0 | alpha | float | 1.f | | # Clip + ``` y = clamp(x, min, max) ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | min | float | -FLT_MAX | | -| 1 | max | float | FLT_MAX | | +| param id | name | type | default | description | +| -------- | ---- | ----- | -------- | ----------- | +| 0 | min | float | -FLT_MAX | | +| 1 | max | float | FLT_MAX | | # Concat + ``` y = concat(x0, x1, x2, ...) by axis ``` -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | axis | int | 0 | | +| param id | name | type | default | description | +| -------- | ---- | ---- | ------- | ----------- | +| 0 | axis | int | 0 | | # Convolution + ``` x2 = pad(x, pads, pad_value) x3 = conv(x2, weight, kernel, stride, dilation) + bias y = activation(x3, act_type, act_params) ``` -* one_blob_only - -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | dilation_w | int | 1 | | -| 3 | stride_w | int | 1 | | -| 4 | pad_left | int | 0 | | -| 5 | bias_term | int | 0 | | -| 6 | weight_data_size| int | 0 | | -| 8 | int8_scale_term| int | 0 | | -| 9 | activation_type| int | 0 | | -| 10 | activation_params| array | [ ] | | -| 11 | kernel_h | int | kernel_w | | -| 12 | dilation_h | int | dilation_w | | -| 13 | stride_h | int | stride_w | | -| 14 | pad_top | int | pad_left | | -| 15 | pad_right | int | pad_left | | -| 16 | pad_bottom | int | pad_top | | -| 18 | pad_value | float | 0.f | | -| 19 | dynamic_weight| int | 0 | | - -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_data | float/fp16/int8 | [kernel_w, kernel_h, num_input, num_output] | -| bias_data | float | [num_output] | -| weight_data_int8_scales| float | [num_output] | -| bottom_blob_int8_scales| float | [1] | -| top_blob_int8_scales| float | [1] | +- one_blob_only + +| param id | name | type | default | description | +| -------- | ----------------- | ----- | ---------- | ----------- | +| 0 | num_output | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | dilation_w | int | 1 | | +| 3 | stride_w | int | 1 | | +| 4 | pad_left | int | 0 | | +| 5 | bias_term | int | 0 | | +| 6 | weight_data_size | int | 0 | | +| 8 | int8_scale_term | int | 0 | | +| 9 | activation_type | int | 0 | | +| 10 | activation_params | array | [ ] | | +| 11 | kernel_h | int | kernel_w | | +| 12 | dilation_h | int | dilation_w | | +| 13 | stride_h | int | stride_w | | +| 14 | pad_top | int | pad_left | | +| 15 | pad_right | int | pad_left | | +| 16 | pad_bottom | int | pad_top | | +| 18 | pad_value | float | 0.f | | +| 19 | dynamic_weight | int | 0 | | + +| weight | type | shape | +| ----------------------- | --------------- | ------------------------------------------- | +| weight_data | float/fp16/int8 | [kernel_w, kernel_h, num_input, num_output] | +| bias_data | float | [num_output] | +| weight_data_int8_scales | float | [num_output] | +| bottom_blob_int8_scales | float | [1] | +| top_blob_int8_scales | float | [1] | # Convolution1D + ``` x2 = pad(x, pads, pad_value) x3 = conv1d(x2, weight, kernel, stride, dilation) + bias y = activation(x3, act_type, act_params) ``` -* one_blob_only - -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | dilation_w | int | 1 | | -| 3 | stride_w | int | 1 | | -| 4 | pad_left | int | 0 | | -| 5 | bias_term | int | 0 | | -| 6 | weight_data_size| int | 0 | | -| 9 | activation_type| int | 0 | | -| 10 | activation_params| array | [ ] | | -| 15 | pad_right | int | pad_left | | -| 18 | pad_value | float | 0.f | | -| 19 | dynamic_weight| int | 0 | | - -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_data | float/fp16/int8 | [kernel_w, num_input, num_output] | -| bias_data | float | [num_output] | +- one_blob_only + +| param id | name | type | default | description | +| -------- | ----------------- | ----- | -------- | ----------- | +| 0 | num_output | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | dilation_w | int | 1 | | +| 3 | stride_w | int | 1 | | +| 4 | pad_left | int | 0 | | +| 5 | bias_term | int | 0 | | +| 6 | weight_data_size | int | 0 | | +| 9 | activation_type | int | 0 | | +| 10 | activation_params | array | [ ] | | +| 15 | pad_right | int | pad_left | | +| 18 | pad_value | float | 0.f | | +| 19 | dynamic_weight | int | 0 | | + +| weight | type | shape | +| ----------- | --------------- | --------------------------------- | +| weight_data | float/fp16/int8 | [kernel_w, num_input, num_output] | +| bias_data | float | [num_output] | # Convolution3D + ``` x2 = pad(x, pads, pad_value) x3 = conv3d(x2, weight, kernel, stride, dilation) + bias y = activation(x3, act_type, act_params) ``` -* one_blob_only - -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | dilation_w | int | 1 | | -| 3 | stride_w | int | 1 | | -| 4 | pad_left | int | 0 | | -| 5 | bias_term | int | 0 | | -| 6 | weight_data_size| int | 0 | | -| 9 | activation_type| int | 0 | | -| 10 | activation_params| array | [ ] | | -| 11 | kernel_h | int | kernel_w | | -| 12 | dilation_h | int | dilation_w | | -| 13 | stride_h | int | stride_w | | -| 14 | pad_top | int | pad_left | | -| 15 | pad_right | int | pad_left | | -| 16 | pad_bottom | int | pad_top | | -| 17 | pad_behind | int | pad_front | | -| 18 | pad_value | float | 0.f | | -| 21 | kernel_d | int | kernel_w | | -| 22 | dilation_d | int | dilation_w | | -| 23 | stride_d | int | stride_w | | -| 24 | pad_front | int | pad_left | | - -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_data | float/fp16/int8 | [kernel_w, kernel_h, kernel_d, num_input, num_output] | -| bias_data | float | [num_output] | +- one_blob_only + +| param id | name | type | default | description | +| -------- | ----------------- | ----- | ---------- | ----------- | +| 0 | num_output | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | dilation_w | int | 1 | | +| 3 | stride_w | int | 1 | | +| 4 | pad_left | int | 0 | | +| 5 | bias_term | int | 0 | | +| 6 | weight_data_size | int | 0 | | +| 9 | activation_type | int | 0 | | +| 10 | activation_params | array | [ ] | | +| 11 | kernel_h | int | kernel_w | | +| 12 | dilation_h | int | dilation_w | | +| 13 | stride_h | int | stride_w | | +| 14 | pad_top | int | pad_left | | +| 15 | pad_right | int | pad_left | | +| 16 | pad_bottom | int | pad_top | | +| 17 | pad_behind | int | pad_front | | +| 18 | pad_value | float | 0.f | | +| 21 | kernel_d | int | kernel_w | | +| 22 | dilation_d | int | dilation_w | | +| 23 | stride_d | int | stride_w | | +| 24 | pad_front | int | pad_left | | + +| weight | type | shape | +| ----------- | --------------- | ----------------------------------------------------- | +| weight_data | float/fp16/int8 | [kernel_w, kernel_h, kernel_d, num_input, num_output] | +| bias_data | float | [num_output] | # ConvolutionDepthWise + ``` x2 = pad(x, pads, pad_value) x3 = conv(x2, weight, kernel, stride, dilation, group) + bias y = activation(x3, act_type, act_params) ``` -* one_blob_only - -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | dilation_w | int | 1 | | -| 3 | stride_w | int | 1 | | -| 4 | pad_left | int | 0 | | -| 5 | bias_term | int | 0 | | -| 6 | weight_data_size| int | 0 | | -| 7 | group | int | 1 | | -| 8 | int8_scale_term| int | 0 | | -| 9 | activation_type| int | 0 | | -| 10 | activation_params| array | [ ] | | -| 11 | kernel_h | int | kernel_w | | -| 12 | dilation_h | int | dilation_w | | -| 13 | stride_h | int | stride_w | | -| 14 | pad_top | int | pad_left | | -| 15 | pad_right | int | pad_left | | -| 16 | pad_bottom | int | pad_top | | -| 18 | pad_value | float | 0.f | | -| 19 | dynamic_weight| int | 0 | | - -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_data | float/fp16/int8 | [kernel_w, kernel_h, num_input / group, num_output / group, group] | -| bias_data | float | [num_output] | -| weight_data_int8_scales| float | [group] | -| bottom_blob_int8_scales| float | [1] | -| top_blob_int8_scales| float | [1] | +- one_blob_only + +| param id | name | type | default | description | +| -------- | ----------------- | ----- | ---------- | ----------- | +| 0 | num_output | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | dilation_w | int | 1 | | +| 3 | stride_w | int | 1 | | +| 4 | pad_left | int | 0 | | +| 5 | bias_term | int | 0 | | +| 6 | weight_data_size | int | 0 | | +| 7 | group | int | 1 | | +| 8 | int8_scale_term | int | 0 | | +| 9 | activation_type | int | 0 | | +| 10 | activation_params | array | [ ] | | +| 11 | kernel_h | int | kernel_w | | +| 12 | dilation_h | int | dilation_w | | +| 13 | stride_h | int | stride_w | | +| 14 | pad_top | int | pad_left | | +| 15 | pad_right | int | pad_left | | +| 16 | pad_bottom | int | pad_top | | +| 18 | pad_value | float | 0.f | | +| 19 | dynamic_weight | int | 0 | | + +| weight | type | shape | +| ----------------------- | --------------- | ------------------------------------------------------------------ | +| weight_data | float/fp16/int8 | [kernel_w, kernel_h, num_input / group, num_output / group, group] | +| bias_data | float | [num_output] | +| weight_data_int8_scales | float | [group] | +| bottom_blob_int8_scales | float | [1] | +| top_blob_int8_scales | float | [1] | # ConvolutionDepthWise1D + ``` x2 = pad(x, pads, pad_value) x3 = conv1d(x2, weight, kernel, stride, dilation, group) + bias y = activation(x3, act_type, act_params) ``` -* one_blob_only - -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | dilation_w | int | 1 | | -| 3 | stride_w | int | 1 | | -| 4 | pad_left | int | 0 | | -| 5 | bias_term | int | 0 | | -| 6 | weight_data_size| int | 0 | | -| 7 | group | int | 1 | | -| 9 | activation_type| int | 0 | | -| 10 | activation_params| array | [ ] | | -| 15 | pad_right | int | pad_left | | -| 18 | pad_value | float | 0.f | | -| 19 | dynamic_weight| int | 0 | | - -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_data | float/fp16/int8 | [kernel_w, num_input / group, num_output / group, group] | -| bias_data | float | [num_output] | +- one_blob_only + +| param id | name | type | default | description | +| -------- | ----------------- | ----- | -------- | ----------- | +| 0 | num_output | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | dilation_w | int | 1 | | +| 3 | stride_w | int | 1 | | +| 4 | pad_left | int | 0 | | +| 5 | bias_term | int | 0 | | +| 6 | weight_data_size | int | 0 | | +| 7 | group | int | 1 | | +| 9 | activation_type | int | 0 | | +| 10 | activation_params | array | [ ] | | +| 15 | pad_right | int | pad_left | | +| 18 | pad_value | float | 0.f | | +| 19 | dynamic_weight | int | 0 | | + +| weight | type | shape | +| ----------- | --------------- | -------------------------------------------------------- | +| weight_data | float/fp16/int8 | [kernel_w, num_input / group, num_output / group, group] | +| bias_data | float | [num_output] | # ConvolutionDepthWise3D + ``` x2 = pad(x, pads, pad_value) x3 = conv3d(x2, weight, kernel, stride, dilation, group) + bias y = activation(x3, act_type, act_params) ``` -* one_blob_only - -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | dilation_w | int | 1 | | -| 3 | stride_w | int | 1 | | -| 4 | pad_left | int | 0 | | -| 5 | bias_term | int | 0 | | -| 6 | weight_data_size| int | 0 | | -| 7 | group | int | 1 | | -| 9 | activation_type| int | 0 | | -| 10 | activation_params| array | [ ] | | -| 11 | kernel_h | int | kernel_w | | -| 12 | dilation_h | int | dilation_w | | -| 13 | stride_h | int | stride_w | | -| 14 | pad_top | int | pad_left | | -| 15 | pad_right | int | pad_left | | -| 16 | pad_bottom | int | pad_top | | -| 17 | pad_behind | int | pad_front | | -| 18 | pad_value | float | 0.f | | -| 21 | kernel_d | int | kernel_w | | -| 22 | dilation_d | int | dilation_w | | -| 23 | stride_d | int | stride_w | | -| 24 | pad_front | int | pad_left | | - -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_data | float/fp16/int8 | [kernel_w, kernel_h, kernel_d, num_input / group, num_output / group, group] | -| bias_data | float | [num_output] | +- one_blob_only + +| param id | name | type | default | description | +| -------- | ----------------- | ----- | ---------- | ----------- | +| 0 | num_output | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | dilation_w | int | 1 | | +| 3 | stride_w | int | 1 | | +| 4 | pad_left | int | 0 | | +| 5 | bias_term | int | 0 | | +| 6 | weight_data_size | int | 0 | | +| 7 | group | int | 1 | | +| 9 | activation_type | int | 0 | | +| 10 | activation_params | array | [ ] | | +| 11 | kernel_h | int | kernel_w | | +| 12 | dilation_h | int | dilation_w | | +| 13 | stride_h | int | stride_w | | +| 14 | pad_top | int | pad_left | | +| 15 | pad_right | int | pad_left | | +| 16 | pad_bottom | int | pad_top | | +| 17 | pad_behind | int | pad_front | | +| 18 | pad_value | float | 0.f | | +| 21 | kernel_d | int | kernel_w | | +| 22 | dilation_d | int | dilation_w | | +| 23 | stride_d | int | stride_w | | +| 24 | pad_front | int | pad_left | | + +| weight | type | shape | +| ----------- | --------------- | ---------------------------------------------------------------------------- | +| weight_data | float/fp16/int8 | [kernel_w, kernel_h, kernel_d, num_input / group, num_output / group, group] | +| bias_data | float | [num_output] | # CopyTo + ``` self[offset] = src ``` -* one_blob_only +- one_blob_only -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | woffset | int | 0 | | -| 1 | hoffset | int | 0 | | -| 13 | doffset | int | 0 | | -| 2 | coffset | int | 0 | | -| 9 | starts | array | [ ] | | -| 11 | axes | array | [ ] | | +| param id | name | type | default | description | +| -------- | ------- | ----- | ------- | ----------- | +| 0 | woffset | int | 0 | | +| 1 | hoffset | int | 0 | | +| 13 | doffset | int | 0 | | +| 2 | coffset | int | 0 | | +| 9 | starts | array | [ ] | | +| 11 | axes | array | [ ] | | # Crop + ``` y = crop(x) ``` -* one_blob_only - -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | woffset | int | 0 | | -| 1 | hoffset | int | 0 | | -| 13 | doffset | int | 0 | | -| 2 | coffset | int | 0 | | -| 3 | outw | int | 0 | | -| 4 | outh | int | 0 | | -| 14 | outd | int | 0 | | -| 5 | outc | int | 0 | | -| 6 | woffset2 | int | 0 | | -| 7 | hoffset2 | int | 0 | | -| 15 | doffset2 | int | 0 | | -| 8 | coffset2 | int | 0 | | -| 9 | starts | array | [ ] | | -| 10 | ends | array | [ ] | | -| 11 | axes | array | [ ] | | +- one_blob_only + +| param id | name | type | default | description | +| -------- | -------- | ----- | ------- | ----------- | +| 0 | woffset | int | 0 | | +| 1 | hoffset | int | 0 | | +| 13 | doffset | int | 0 | | +| 2 | coffset | int | 0 | | +| 3 | outw | int | 0 | | +| 4 | outh | int | 0 | | +| 14 | outd | int | 0 | | +| 5 | outc | int | 0 | | +| 6 | woffset2 | int | 0 | | +| 7 | hoffset2 | int | 0 | | +| 15 | doffset2 | int | 0 | | +| 8 | coffset2 | int | 0 | | +| 9 | starts | array | [ ] | | +| 10 | ends | array | [ ] | | +| 11 | axes | array | [ ] | | # CumulativeSum @@ -500,408 +523,433 @@ If axis < 0, we use axis = x.dims + axis It implements https://pytorch.org/docs/stable/generated/torch.cumsum.html -* one_blob_only -* support_inplace - -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | axis | int | 0 | | +- one_blob_only +- support_inplace +| param id | name | type | default | description | +| -------- | ---- | ---- | ------- | ----------- | +| 0 | axis | int | 0 | | # Deconvolution + ``` x2 = deconv(x, weight, kernel, stride, dilation) + bias x3 = depad(x2, pads, pad_value) y = activation(x3, act_type, act_params) ``` -* one_blob_only - -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | dilation_w | int | 1 | | -| 3 | stride_w | int | 1 | | -| 4 | pad_left | int | 0 | | -| 5 | bias_term | int | 0 | | -| 6 | weight_data_size| int | 0 | | -| 9 | activation_type| int | 0 | | -| 10 | activation_params| array | [ ] | | -| 11 | kernel_h | int | kernel_w | | -| 12 | dilation_h | int | dilation_w | | -| 13 | stride_h | int | stride_w | | -| 14 | pad_top | int | pad_left | | -| 15 | pad_right | int | pad_left | | -| 16 | pad_bottom | int | pad_top | | -| 18 | output_pad_right| int | 0 | | -| 19 | output_pad_bottom| int | output_pad_right | | -| 20 | output_w | int | 0 | | -| 21 | output_h | int | output_w | | -| 28 | dynamic_weight| int | 0 | | - -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_data | float/fp16 | [kernel_w, kernel_h, num_input, num_output] | -| bias_data | float | [num_output] | +- one_blob_only + +| param id | name | type | default | description | +| -------- | ----------------- | ----- | ---------------- | ----------- | +| 0 | num_output | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | dilation_w | int | 1 | | +| 3 | stride_w | int | 1 | | +| 4 | pad_left | int | 0 | | +| 5 | bias_term | int | 0 | | +| 6 | weight_data_size | int | 0 | | +| 9 | activation_type | int | 0 | | +| 10 | activation_params | array | [ ] | | +| 11 | kernel_h | int | kernel_w | | +| 12 | dilation_h | int | dilation_w | | +| 13 | stride_h | int | stride_w | | +| 14 | pad_top | int | pad_left | | +| 15 | pad_right | int | pad_left | | +| 16 | pad_bottom | int | pad_top | | +| 18 | output_pad_right | int | 0 | | +| 19 | output_pad_bottom | int | output_pad_right | | +| 20 | output_w | int | 0 | | +| 21 | output_h | int | output_w | | +| 28 | dynamic_weight | int | 0 | | + +| weight | type | shape | +| ----------- | ---------- | ------------------------------------------- | +| weight_data | float/fp16 | [kernel_w, kernel_h, num_input, num_output] | +| bias_data | float | [num_output] | # Deconvolution1D + ``` x2 = deconv1d(x, weight, kernel, stride, dilation) + bias x3 = depad(x2, pads, pad_value) y = activation(x3, act_type, act_params) ``` -* one_blob_only - -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | dilation_w | int | 1 | | -| 3 | stride_w | int | 1 | | -| 4 | pad_left | int | 0 | | -| 5 | bias_term | int | 0 | | -| 6 | weight_data_size| int | 0 | | -| 9 | activation_type| int | 0 | | -| 10 | activation_params| array | [ ] | | -| 15 | pad_right | int | pad_left | | -| 18 | output_pad_right| int | 0 | | -| 20 | output_w | int | 0 | | -| 28 | dynamic_weight| int | 0 | | - -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_data | float/fp16 | [kernel_w, num_input, num_output] | -| bias_data | float | [num_output] | +- one_blob_only + +| param id | name | type | default | description | +| -------- | ----------------- | ----- | -------- | ----------- | +| 0 | num_output | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | dilation_w | int | 1 | | +| 3 | stride_w | int | 1 | | +| 4 | pad_left | int | 0 | | +| 5 | bias_term | int | 0 | | +| 6 | weight_data_size | int | 0 | | +| 9 | activation_type | int | 0 | | +| 10 | activation_params | array | [ ] | | +| 15 | pad_right | int | pad_left | | +| 18 | output_pad_right | int | 0 | | +| 20 | output_w | int | 0 | | +| 28 | dynamic_weight | int | 0 | | + +| weight | type | shape | +| ----------- | ---------- | --------------------------------- | +| weight_data | float/fp16 | [kernel_w, num_input, num_output] | +| bias_data | float | [num_output] | # Deconvolution3D + ``` x2 = deconv3d(x, weight, kernel, stride, dilation) + bias x3 = depad(x2, pads, pad_value) y = activation(x3, act_type, act_params) ``` -* one_blob_only - -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | dilation_w | int | 1 | | -| 3 | stride_w | int | 1 | | -| 4 | pad_left | int | 0 | | -| 5 | bias_term | int | 0 | | -| 6 | weight_data_size| int | 0 | | -| 9 | activation_type| int | 0 | | -| 10 | activation_params| array | [ ] | | -| 11 | kernel_h | int | kernel_w | | -| 12 | dilation_h | int | dilation_w | | -| 13 | stride_h | int | stride_w | | -| 14 | pad_top | int | pad_left | | -| 15 | pad_right | int | pad_left | | -| 16 | pad_bottom | int | pad_top | | -| 17 | pad_behind | int | pad_front | | -| 18 | output_pad_right| int | 0 | | -| 19 | output_pad_bottom| int | output_pad_right | | -| 20 | output_pad_behind| int | output_pad_right | | -| 21 | kernel_d | int | kernel_w | | -| 22 | dilation_d | int | dilation_w | | -| 23 | stride_d | int | stride_w | | -| 24 | pad_front | int | pad_left | | -| 25 | output_w | int | 0 | | -| 26 | output_h | int | output_w | | -| 27 | output_d | int | output_w | | - -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_data | float/fp16 | [kernel_w, kernel_h, kernel_d, num_input, num_output] | -| bias_data | float | [num_output] | +- one_blob_only + +| param id | name | type | default | description | +| -------- | ----------------- | ----- | ---------------- | ----------- | +| 0 | num_output | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | dilation_w | int | 1 | | +| 3 | stride_w | int | 1 | | +| 4 | pad_left | int | 0 | | +| 5 | bias_term | int | 0 | | +| 6 | weight_data_size | int | 0 | | +| 9 | activation_type | int | 0 | | +| 10 | activation_params | array | [ ] | | +| 11 | kernel_h | int | kernel_w | | +| 12 | dilation_h | int | dilation_w | | +| 13 | stride_h | int | stride_w | | +| 14 | pad_top | int | pad_left | | +| 15 | pad_right | int | pad_left | | +| 16 | pad_bottom | int | pad_top | | +| 17 | pad_behind | int | pad_front | | +| 18 | output_pad_right | int | 0 | | +| 19 | output_pad_bottom | int | output_pad_right | | +| 20 | output_pad_behind | int | output_pad_right | | +| 21 | kernel_d | int | kernel_w | | +| 22 | dilation_d | int | dilation_w | | +| 23 | stride_d | int | stride_w | | +| 24 | pad_front | int | pad_left | | +| 25 | output_w | int | 0 | | +| 26 | output_h | int | output_w | | +| 27 | output_d | int | output_w | | + +| weight | type | shape | +| ----------- | ---------- | ----------------------------------------------------- | +| weight_data | float/fp16 | [kernel_w, kernel_h, kernel_d, num_input, num_output] | +| bias_data | float | [num_output] | # DeconvolutionDepthWise + ``` x2 = deconv(x, weight, kernel, stride, dilation, group) + bias x3 = depad(x2, pads, pad_value) y = activation(x3, act_type, act_params) ``` -* one_blob_only - -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | dilation_w | int | 1 | | -| 3 | stride_w | int | 1 | | -| 4 | pad_left | int | 0 | | -| 5 | bias_term | int | 0 | | -| 6 | weight_data_size| int | 0 | | -| 7 | group | int | 1 | | -| 9 | activation_type| int | 0 | | -| 10 | activation_params| array | [ ] | | -| 11 | kernel_h | int | kernel_w | | -| 12 | dilation_h | int | dilation_w | | -| 13 | stride_h | int | stride_w | | -| 14 | pad_top | int | pad_left | | -| 15 | pad_right | int | pad_left | | -| 16 | pad_bottom | int | pad_top | | -| 18 | output_pad_right| int | 0 | | -| 19 | output_pad_bottom| int | output_pad_right | | -| 20 | output_w | int | 0 | | -| 21 | output_h | int | output_w | | -| 28 | dynamic_weight| int | 0 | | - -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_data | float/fp16 | [kernel_w, kernel_h, num_input / group, num_output / group, group] | -| bias_data | float | [num_output] | +- one_blob_only + +| param id | name | type | default | description | +| -------- | ----------------- | ----- | ---------------- | ----------- | +| 0 | num_output | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | dilation_w | int | 1 | | +| 3 | stride_w | int | 1 | | +| 4 | pad_left | int | 0 | | +| 5 | bias_term | int | 0 | | +| 6 | weight_data_size | int | 0 | | +| 7 | group | int | 1 | | +| 9 | activation_type | int | 0 | | +| 10 | activation_params | array | [ ] | | +| 11 | kernel_h | int | kernel_w | | +| 12 | dilation_h | int | dilation_w | | +| 13 | stride_h | int | stride_w | | +| 14 | pad_top | int | pad_left | | +| 15 | pad_right | int | pad_left | | +| 16 | pad_bottom | int | pad_top | | +| 18 | output_pad_right | int | 0 | | +| 19 | output_pad_bottom | int | output_pad_right | | +| 20 | output_w | int | 0 | | +| 21 | output_h | int | output_w | | +| 28 | dynamic_weight | int | 0 | | + +| weight | type | shape | +| ----------- | ---------- | ------------------------------------------------------------------ | +| weight_data | float/fp16 | [kernel_w, kernel_h, num_input / group, num_output / group, group] | +| bias_data | float | [num_output] | # DeconvolutionDepthWise1D + ``` x2 = deconv1d(x, weight, kernel, stride, dilation, group) + bias x3 = depad(x2, pads, pad_value) y = activation(x3, act_type, act_params) ``` -* one_blob_only - -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | dilation_w | int | 1 | | -| 3 | stride_w | int | 1 | | -| 4 | pad_left | int | 0 | | -| 5 | bias_term | int | 0 | | -| 6 | weight_data_size| int | 0 | | -| 7 | group | int | 1 | | -| 9 | activation_type| int | 0 | | -| 10 | activation_params| array | [ ] | | -| 15 | pad_right | int | pad_left | | -| 18 | output_pad_right| int | 0 | | -| 20 | output_w | int | 0 | | -| 28 | dynamic_weight| int | 0 | | - -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_data | float/fp16 | [kernel_w, num_input / group, num_output / group, group] | -| bias_data | float | [num_output] | +- one_blob_only + +| param id | name | type | default | description | +| -------- | ----------------- | ----- | -------- | ----------- | +| 0 | num_output | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | dilation_w | int | 1 | | +| 3 | stride_w | int | 1 | | +| 4 | pad_left | int | 0 | | +| 5 | bias_term | int | 0 | | +| 6 | weight_data_size | int | 0 | | +| 7 | group | int | 1 | | +| 9 | activation_type | int | 0 | | +| 10 | activation_params | array | [ ] | | +| 15 | pad_right | int | pad_left | | +| 18 | output_pad_right | int | 0 | | +| 20 | output_w | int | 0 | | +| 28 | dynamic_weight | int | 0 | | + +| weight | type | shape | +| ----------- | ---------- | -------------------------------------------------------- | +| weight_data | float/fp16 | [kernel_w, num_input / group, num_output / group, group] | +| bias_data | float | [num_output] | # DeconvolutionDepthWise3D + ``` x2 = deconv3d(x, weight, kernel, stride, dilation, group) + bias x3 = depad(x2, pads, pad_value) y = activation(x3, act_type, act_params) ``` -* one_blob_only - -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | dilation_w | int | 1 | | -| 3 | stride_w | int | 1 | | -| 4 | pad_left | int | 0 | | -| 5 | bias_term | int | 0 | | -| 6 | weight_data_size| int | 0 | | -| 7 | group | int | 1 | | -| 9 | activation_type| int | 0 | | -| 10 | activation_params| array | [ ] | | -| 11 | kernel_h | int | kernel_w | | -| 12 | dilation_h | int | dilation_w | | -| 13 | stride_h | int | stride_w | | -| 14 | pad_top | int | pad_left | | -| 15 | pad_right | int | pad_left | | -| 16 | pad_bottom | int | pad_top | | -| 17 | pad_behind | int | pad_front | | -| 18 | output_pad_right| int | 0 | | -| 19 | output_pad_bottom| int | output_pad_right | | -| 20 | output_pad_behind| int | output_pad_right | | -| 21 | kernel_d | int | kernel_w | | -| 22 | dilation_d | int | dilation_w | | -| 23 | stride_d | int | stride_w | | -| 24 | pad_front | int | pad_left | | -| 25 | output_w | int | 0 | | -| 26 | output_h | int | output_w | | -| 27 | output_d | int | output_w | | - -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_data | float/fp16 | [kernel_w, kernel_h, kernel_d, num_input / group, num_output / group, group] | -| bias_data | float | [num_output] | +- one_blob_only + +| param id | name | type | default | description | +| -------- | ----------------- | ----- | ---------------- | ----------- | +| 0 | num_output | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | dilation_w | int | 1 | | +| 3 | stride_w | int | 1 | | +| 4 | pad_left | int | 0 | | +| 5 | bias_term | int | 0 | | +| 6 | weight_data_size | int | 0 | | +| 7 | group | int | 1 | | +| 9 | activation_type | int | 0 | | +| 10 | activation_params | array | [ ] | | +| 11 | kernel_h | int | kernel_w | | +| 12 | dilation_h | int | dilation_w | | +| 13 | stride_h | int | stride_w | | +| 14 | pad_top | int | pad_left | | +| 15 | pad_right | int | pad_left | | +| 16 | pad_bottom | int | pad_top | | +| 17 | pad_behind | int | pad_front | | +| 18 | output_pad_right | int | 0 | | +| 19 | output_pad_bottom | int | output_pad_right | | +| 20 | output_pad_behind | int | output_pad_right | | +| 21 | kernel_d | int | kernel_w | | +| 22 | dilation_d | int | dilation_w | | +| 23 | stride_d | int | stride_w | | +| 24 | pad_front | int | pad_left | | +| 25 | output_w | int | 0 | | +| 26 | output_h | int | output_w | | +| 27 | output_d | int | output_w | | + +| weight | type | shape | +| ----------- | ---------- | ---------------------------------------------------------------------------- | +| weight_data | float/fp16 | [kernel_w, kernel_h, kernel_d, num_input / group, num_output / group, group] | +| bias_data | float | [num_output] | # DeformableConv2D + ``` x2 = deformableconv2d(x, offset, mask, weight, kernel, stride, dilation) + bias y = activation(x2, act_type, act_params) ``` -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | dilation_w | int | 1 | | -| 3 | stride_w | int | 1 | | -| 4 | pad_left | int | 0 | | -| 5 | bias_term | int | 0 | | -| 6 | weight_data_size| int | 0 | | -| 9 | activation_type| int | 0 | | -| 10 | activation_params| array | [ ] | | -| 11 | kernel_h | int | kernel_w | | -| 12 | dilation_h | int | dilation_w | | -| 13 | stride_h | int | stride_w | | -| 14 | pad_top | int | pad_left | | -| 15 | pad_right | int | pad_left | | -| 16 | pad_bottom | int | pad_top | | - -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_data | float/fp16/int8 | [kernel_w, kernel_h, num_input, num_output] | -| bias_data | float | [num_output] | +| param id | name | type | default | description | +| -------- | ----------------- | ----- | ---------- | ----------- | +| 0 | num_output | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | dilation_w | int | 1 | | +| 3 | stride_w | int | 1 | | +| 4 | pad_left | int | 0 | | +| 5 | bias_term | int | 0 | | +| 6 | weight_data_size | int | 0 | | +| 9 | activation_type | int | 0 | | +| 10 | activation_params | array | [ ] | | +| 11 | kernel_h | int | kernel_w | | +| 12 | dilation_h | int | dilation_w | | +| 13 | stride_h | int | stride_w | | +| 14 | pad_top | int | pad_left | | +| 15 | pad_right | int | pad_left | | +| 16 | pad_bottom | int | pad_top | | + +| weight | type | shape | +| ----------- | --------------- | ------------------------------------------- | +| weight_data | float/fp16/int8 | [kernel_w, kernel_h, num_input, num_output] | +| bias_data | float | [num_output] | # Dequantize + ``` y = x * scale + bias ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | scale_data_size| int | 1 | | -| 1 | bias_data_size| int | 0 | | +| param id | name | type | default | description | +| -------- | --------------- | ---- | ------- | ----------- | +| 0 | scale_data_size | int | 1 | | +| 1 | bias_data_size | int | 0 | | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| scale_data | float | [scale_data_size] | -| bias_data | float | [bias_data_size] | +| weight | type | shape | +| ---------- | ----- | ----------------- | +| scale_data | float | [scale_data_size] | +| bias_data | float | [bias_data_size] | # Diag + ``` y = diag(x, diagonal) ``` -* one_blob_only +- one_blob_only -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | diagonal | int | 0 | | +| param id | name | type | default | description | +| -------- | -------- | ---- | ------- | ----------- | +| 0 | diagonal | int | 0 | | # Dropout + ``` y = x * scale ``` -* one_blob_only +- one_blob_only -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | scale | float | 1.f | | +| param id | name | type | default | description | +| -------- | ----- | ----- | ------- | ----------- | +| 0 | scale | float | 1.f | | # Eltwise + ``` y = elementwise_op(x0, x1, ...) ``` -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | op_type | int | 0 | | -| 1 | coeffs | array | [ ] | | +| param id | name | type | default | description | +| -------- | ------- | ----- | ------- | ----------- | +| 0 | op_type | int | 0 | | +| 1 | coeffs | array | [ ] | | Operation type: + - 0 = PROD - 1 = SUM - 2 = MAX # ELU + ``` if x < 0 y = (exp(x) - 1) * alpha else y = x ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | alpha | float | 0.1f | | +| param id | name | type | default | description | +| -------- | ----- | ----- | ------- | ----------- | +| 0 | alpha | float | 0.1f | | # Embed + ``` y = embedding(x) ``` -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | | -| 1 | input_dim | int | 0 | | -| 2 | bias_term | int | 0 | | -| 3 | weight_data_size | int | 0 | | -| 18 | int8_scale_term| int | 0 | | +| param id | name | type | default | description | +| -------- | ---------------- | ---- | ------- | ----------- | +| 0 | num_output | int | 0 | | +| 1 | input_dim | int | 0 | | +| 2 | bias_term | int | 0 | | +| 3 | weight_data_size | int | 0 | | +| 18 | int8_scale_term | int | 0 | | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_data | float | [weight_data_size] | -| bias_term | float | [num_output] | -| weight_data_int8_scales| float | [1] | +| weight | type | shape | +| ----------------------- | ----- | ------------------ | +| weight_data | float | [weight_data_size] | +| bias_term | float | [num_output] | +| weight_data_int8_scales | float | [1] | # Exp + ``` if base == -1 y = exp(shift + x * scale) else y = pow(base, (shift + x * scale)) ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | base | float | -1.f | | -| 1 | scale | float | 1.f | | -| 2 | shift | float | 0.f | | +| param id | name | type | default | description | +| -------- | ----- | ----- | ------- | ----------- | +| 0 | base | float | -1.f | | +| 1 | scale | float | 1.f | | +| 2 | shift | float | 0.f | | # Flatten + Reshape blob to 1 dimension -* one_blob_only +- one_blob_only + +# Flip + +- one_blob_only + +| param id | name | type | default | description | +| -------- | ---- | ----- | ------- | ----------- | +| 0 | axis | array | [] | | # Fold + ``` y = fold(x) ``` -* one_blob_only - -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | dilation_w | int | 1 | | -| 3 | stride_w | int | 1 | | -| 4 | pad_left | int | 0 | | -| 11 | kernel_h | int | kernel_w | | -| 12 | dilation_h | int | dilation_w | | -| 13 | stride_h | int | stride_w | | -| 14 | pad_top | int | pad_left | | -| 15 | pad_right | int | pad_left | | -| 16 | pad_bottom | int | pad_top | | -| 20 | output_w | int | 0 | | -| 21 | output_h | int | output_w | | +- one_blob_only + +| param id | name | type | default | description | +| -------- | ---------- | ---- | ---------- | ----------- | +| 0 | num_output | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | dilation_w | int | 1 | | +| 3 | stride_w | int | 1 | | +| 4 | pad_left | int | 0 | | +| 11 | kernel_h | int | kernel_w | | +| 12 | dilation_h | int | dilation_w | | +| 13 | stride_h | int | stride_w | | +| 14 | pad_top | int | pad_left | | +| 15 | pad_right | int | pad_left | | +| 16 | pad_bottom | int | pad_top | | +| 20 | output_w | int | 0 | | +| 21 | output_h | int | output_w | | # GELU + ``` if fast_gelu == 1 y = 0.5 * x * (1 + tanh(0.79788452 * (x + 0.044715 * x * x * x))); else y = 0.5 * x * erfc(-0.70710678 * x) ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | fast_gelu | int | 0 | use approximation | +| param id | name | type | default | description | +| -------- | --------- | ---- | ------- | ----------------- | +| 0 | fast_gelu | int | 0 | use approximation | # GLU @@ -913,13 +961,14 @@ where a is the first half of the input matrix and b is the second half. axis specifies the dimension to split the input -* one_blob_only +- one_blob_only -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | axis | int | 0 | | +| param id | name | type | default | description | +| -------- | ---- | ---- | ------- | ----------- | +| 0 | axis | int | 0 | | # Gemm + ``` a = transA ? transpose(x0) : x0 b = transb ? transpose(x1) : x1 @@ -927,88 +976,91 @@ c = x2 y = (gemm(a, b) + c * beta) * alpha ``` -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | alpha | float | 1.f | | -| 1 | beta | float | 1.f | | -| 2 | transA | int | 0 | | -| 3 | transb | int | 0 | | -| 4 | constantA | int | 0 | | -| 5 | constantB | int | 0 | | -| 6 | constantC | int | 0 | | -| 7 | constantM | int | 0 | | -| 8 | constantN | int | 0 | | -| 9 | constantK | int | 0 | | -| 10 | constant_broadcast_type_C | int | 0 | | -| 11 | output_N1M | int | 0 | | -| 12 | output_elempack | int | 0 | | -| 13 | output_elemtype | int | 0 | | -| 14 | output_transpose | int| 0 | | -| 18 | int8_scale_term | int | 0 | | -| 20 | constant_TILE_M | int | 0 | | -| 21 | constant_TILE_N | int | 0 | | -| 22 | constant_TILE_K | int | 0 | | - -| weight | type | shape | -| ------------- | ----- | --------------------- | -| A_data | float/fp16/int8 | [M, K] or [K, M] | -| B_data | float/fp16/int8 | [N, K] or [K, N] | -| C_data | float | [1], [M] or [N] or [1, M] or [N,1] or [N, M] | -| A_data_int8_scales| float | [M] | -| B_data_int8_scales| float | [1] | +| param id | name | type | default | description | +| -------- | ------------------------- | ----- | ------- | ----------- | +| 0 | alpha | float | 1.f | | +| 1 | beta | float | 1.f | | +| 2 | transA | int | 0 | | +| 3 | transb | int | 0 | | +| 4 | constantA | int | 0 | | +| 5 | constantB | int | 0 | | +| 6 | constantC | int | 0 | | +| 7 | constantM | int | 0 | | +| 8 | constantN | int | 0 | | +| 9 | constantK | int | 0 | | +| 10 | constant_broadcast_type_C | int | 0 | | +| 11 | output_N1M | int | 0 | | +| 12 | output_elempack | int | 0 | | +| 13 | output_elemtype | int | 0 | | +| 14 | output_transpose | int | 0 | | +| 18 | int8_scale_term | int | 0 | | +| 20 | constant_TILE_M | int | 0 | | +| 21 | constant_TILE_N | int | 0 | | +| 22 | constant_TILE_K | int | 0 | | + +| weight | type | shape | +| ------------------ | --------------- | -------------------------------------------- | +| A_data | float/fp16/int8 | [M, K] or [K, M] | +| B_data | float/fp16/int8 | [N, K] or [K, N] | +| C_data | float | [1], [M] or [N] or [1, M] or [N,1] or [N, M] | +| A_data_int8_scales | float | [M] | +| B_data_int8_scales | float | [1] | # GridSample + ``` Given an input and a flow-field grid, computes the output using input values and pixel locations from grid. -For each output location output[:, h2, w2], the size-2 vector grid[h2, w2, 2] specifies input pixel[:, h1, w1] locations x and y, +For each output location output[:, h2, w2], the size-2 vector grid[h2, w2, 2] specifies input pixel[:, h1, w1] locations x and y, which are used to interpolate the output value output[:, h2, w2] This function is often used in conjunction with affine_grid() to build Spatial Transformer Networks . ``` -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | sample_type | int | 1 | | -| 1 | padding_mode | int | 1 | | -| 2 | align_corner | int | 0 | | -| 3 | permute_fusion| int | 0 | fuse with permute | - +| param id | name | type | default | description | +| -------- | -------------- | ---- | ------- | ----------------- | +| 0 | sample_type | int | 1 | | +| 1 | padding_mode | int | 1 | | +| 2 | align_corner | int | 0 | | +| 3 | permute_fusion | int | 0 | fuse with permute | Sample type: + - 1 = Nearest - 2 = Bilinear - 3 = Bicubic Padding mode: + - 1 = zeros - 2 = border - 3 = reflection - # GroupNorm + ``` split x along channel axis into group x0, x1 ... l2 normalize for each group x0, x1 ... y = x * gamma + beta ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | group | int | 1 | | -| 1 | channels | int | 0 | | -| 2 | eps | float | 0.001f | x = x / sqrt(var + eps) | -| 3 | affine | int | 1 | | +| param id | name | type | default | description | +| -------- | -------- | ----- | ------- | ----------------------- | +| 0 | group | int | 1 | | +| 1 | channels | int | 0 | | +| 2 | eps | float | 0.001f | x = x / sqrt(var + eps) | +| 3 | affine | int | 1 | | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| gamma_data | float | [channels] | -| beta_data | float | [channels] | +| weight | type | shape | +| ---------- | ----- | ---------- | +| gamma_data | float | [channels] | +| beta_data | float | [channels] | # GRU + Apply a single-layer GRU to a feature sequence of `T` timesteps. The input blob shape is `[w=input_size, h=T]` and the output blob shape is `[w=num_output, h=T]`. ``` @@ -1016,134 +1068,143 @@ y = gru(x) y0, hidden y1 = gru(x0, hidden x1) ``` -* one_blob_only if bidirectional +- one_blob_only if bidirectional -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | hidden size of output | -| 1 | weight_data_size| int | 0 | total size of weight matrix | -| 2 | direction | int | 0 | 0=forward, 1=reverse, 2=bidirectional | +| param id | name | type | default | description | +| -------- | ---------------- | ---- | ------- | ------------------------------------- | +| 0 | num_output | int | 0 | hidden size of output | +| 1 | weight_data_size | int | 0 | total size of weight matrix | +| 2 | direction | int | 0 | 0=forward, 1=reverse, 2=bidirectional | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_xc_data| float/fp16/int8 | [input_size, num_output * 3, num_directions] | -| bias_c_data | float/fp16/int8 | [num_output, 4, num_directions] | -| weight_hc_data| float/fp16/int8 | [num_output, num_output * 3, num_directions] | +| weight | type | shape | +| -------------- | --------------- | -------------------------------------------- | +| weight_xc_data | float/fp16/int8 | [input_size, num_output * 3, num_directions] | +| bias_c_data | float/fp16/int8 | [num_output, 4, num_directions] | +| weight_hc_data | float/fp16/int8 | [num_output, num_output * 3, num_directions] | Direction flag: + - 0 = forward only - 1 = reverse only - 2 = bidirectional # HardSigmoid + ``` y = clamp(x * alpha + beta, 0, 1) ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | alpha | float | 0.2f | | -| 1 | beta | float | 0.5f | | +| param id | name | type | default | description | +| -------- | ----- | ----- | ------- | ----------- | +| 0 | alpha | float | 0.2f | | +| 1 | beta | float | 0.5f | | # HardSwish + ``` y = x * clamp(x * alpha + beta, 0, 1) ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | alpha | float | 0.2f | | -| 1 | beta | float | 0.5f | | +| param id | name | type | default | description | +| -------- | ----- | ----- | ------- | ----------- | +| 0 | alpha | float | 0.2f | | +| 1 | beta | float | 0.5f | | # InnerProduct + ``` x2 = innerproduct(x, weight) + bias y = activation(x2, act_type, act_params) ``` -* one_blob_only - -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | | -| 1 | bias_term | int | 0 | | -| 2 | weight_data_size| int | 0 | | -| 8 | int8_scale_term| int | 0 | | -| 9 | activation_type| int | 0 | | -| 10 | activation_params| array | [ ] | | +- one_blob_only -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_data | float/fp16/int8 | [num_input, num_output] | -| bias_data | float | [num_output] | -| weight_data_int8_scales| float | [num_output] | -| bottom_blob_int8_scales| float | [1] | +| param id | name | type | default | description | +| -------- | ----------------- | ----- | ------- | ----------- | +| 0 | num_output | int | 0 | | +| 1 | bias_term | int | 0 | | +| 2 | weight_data_size | int | 0 | | +| 8 | int8_scale_term | int | 0 | | +| 9 | activation_type | int | 0 | | +| 10 | activation_params | array | [ ] | | + +| weight | type | shape | +| ----------------------- | --------------- | ----------------------- | +| weight_data | float/fp16/int8 | [num_input, num_output] | +| bias_data | float | [num_output] | +| weight_data_int8_scales | float | [num_output] | +| bottom_blob_int8_scales | float | [1] | # Input + ``` y = input ``` -* support_inplace +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | w | int | 0 | | -| 1 | h | int | 0 | | -| 11 | d | int | 0 | | -| 2 | c | int | 0 | | +| param id | name | type | default | description | +| -------- | ---- | ---- | ------- | ----------- | +| 0 | w | int | 0 | | +| 1 | h | int | 0 | | +| 11 | d | int | 0 | | +| 2 | c | int | 0 | | # InstanceNorm + ``` split x along channel axis into instance x0, x1 ... l2 normalize for each channel instance x0, x1 ... y = x * gamma + beta ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | channels | int | 0 | | -| 1 | eps | float | 0.001f | x = x / sqrt(var + eps) | -| 2 | affine | int | 1 | | +| param id | name | type | default | description | +| -------- | -------- | ----- | ------- | ----------------------- | +| 0 | channels | int | 0 | | +| 1 | eps | float | 0.001f | x = x / sqrt(var + eps) | +| 2 | affine | int | 1 | | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| gamma_data | float | [channels] | -| beta_data | float | [channels] | +| weight | type | shape | +| ---------- | ----- | ---------- | +| gamma_data | float | [channels] | +| beta_data | float | [channels] | # Interp + ``` if dynamic_target_size == 0 y = resize(x) by fixed size or scale else y = resize(x0, size(x1)) ``` -* one_blob_only if dynamic_target_size == 0 +- one_blob_only if dynamic_target_size == 0 -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | resize_type | int | 0 | | -| 1 | height_scale | float | 1.f | | -| 2 | width_scale | float | 1.f | | -| 3 | output_height | int | 0 | | -| 4 | output_width | int | 0 | | -| 5 | dynamic_target_size| int | 0 | | -| 6 | align_corner | int | 0 | | +| param id | name | type | default | description | +| -------- | ------------------- | ----- | ------- | ----------- | +| 0 | resize_type | int | 0 | | +| 1 | height_scale | float | 1.f | | +| 2 | width_scale | float | 1.f | | +| 3 | output_height | int | 0 | | +| 4 | output_width | int | 0 | | +| 5 | dynamic_target_size | int | 0 | | +| 6 | align_corner | int | 0 | | Resize type: + - 1 = Nearest - 2 = Bilinear - 3 = Bicubic # InverseSpectrogram + ``` x1 = x as complex x1 = x1 * sqrt(norm) if normalized @@ -1155,77 +1216,82 @@ if returns == 1 return y1 real if returns == 2 return y1 imag ``` -* one_blob_only +- one_blob_only -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | n_fft | int | 0 | | -| 1 | returns | int | 1 | | -| 2 | hoplen | int | n_fft / 4 | | -| 3 | winlen | int | n_fft | | -| 4 | window_type | int | 0 | 0=ones 1=hann 2=hamming | -| 5 | center | int | 1 | | -| 7 | normalized | int | 0 | 0=no 1=n_fft 2=window-l2-energy | +| param id | name | type | default | description | +| -------- | ----------- | ---- | --------- | ------------------------------- | +| 0 | n_fft | int | 0 | | +| 1 | returns | int | 1 | | +| 2 | hoplen | int | n_fft / 4 | | +| 3 | winlen | int | n_fft | | +| 4 | window_type | int | 0 | 0=ones 1=hann 2=hamming | +| 5 | center | int | 1 | | +| 7 | normalized | int | 0 | 0=no 1=n_fft 2=window-l2-energy | # LayerNorm + ``` split x along outmost axis into part x0, x1 ... l2 normalize for each part x0, x1 ... y = x * gamma + beta by elementwise ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | affine_size | int | 0 | | -| 1 | eps | float | 0.001f | x = x / sqrt(var + eps) | -| 2 | affine | int | 1 | | +| param id | name | type | default | description | +| -------- | ----------- | ----- | ------- | ----------------------- | +| 0 | affine_size | int | 0 | | +| 1 | eps | float | 0.001f | x = x / sqrt(var + eps) | +| 2 | affine | int | 1 | | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| gamma_data | float | [affine_size] | -| beta_data | float | [affine_size] | +| weight | type | shape | +| ---------- | ----- | ------------- | +| gamma_data | float | [affine_size] | +| beta_data | float | [affine_size] | # Log + ``` if base == -1 y = log(shift + x * scale) else y = log(shift + x * scale) / log(base) ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | base | float | -1.f | | -| 1 | scale | float | 1.f | | -| 2 | shift | float | 0.f | | +| param id | name | type | default | description | +| -------- | ----- | ----- | ------- | ----------- | +| 0 | base | float | -1.f | | +| 1 | scale | float | 1.f | | +| 2 | shift | float | 0.f | | # LRN + ``` if region_type == ACROSS_CHANNELS square_sum = sum of channel window of local_size if region_type == WITHIN_CHANNEL square_sum = sum of spatial window of local_size y = x * pow(bias + alpha * square_sum / (local_size * local_size), -beta) ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | region_type | int | 0 | | -| 1 | local_size | int | 5 | | -| 2 | alpha | float | 1.f | | -| 3 | beta | float | 0.75f | | -| 4 | bias | float | 1.f | | +| param id | name | type | default | description | +| -------- | ----------- | ----- | ------- | ----------- | +| 0 | region_type | int | 0 | | +| 1 | local_size | int | 5 | | +| 2 | alpha | float | 1.f | | +| 3 | beta | float | 0.75f | | +| 4 | bias | float | 1.f | | Region type: + - 0 = ACROSS_CHANNELS - 1 = WITHIN_CHANNEL # LSTM + Apply a single-layer LSTM to a feature sequence of `T` timesteps. The input blob shape is `[w=input_size, h=T]` and the output blob shape is `[w=num_output, h=T]`. ``` @@ -1233,53 +1299,57 @@ y = lstm(x) y0, hidden y1, cell y2 = lstm(x0, hidden x1, cell x2) ``` -* one_blob_only if bidirectional +- one_blob_only if bidirectional -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | output size of output | -| 1 | weight_data_size| int | 0 | total size of IFOG weight matrix | -| 2 | direction | int | 0 | 0=forward, 1=reverse, 2=bidirectional | -| 3 | hidden_size | int | num_output| hidden size | +| param id | name | type | default | description | +| -------- | ---------------- | ---- | ---------- | ------------------------------------- | +| 0 | num_output | int | 0 | output size of output | +| 1 | weight_data_size | int | 0 | total size of IFOG weight matrix | +| 2 | direction | int | 0 | 0=forward, 1=reverse, 2=bidirectional | +| 3 | hidden_size | int | num_output | hidden size | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_xc_data| float/fp16/int8 | [input_size, hidden_size * 4, num_directions] | -| bias_c_data | float/fp16/int8 | [hidden_size, 4, num_directions] | -| weight_hc_data| float/fp16/int8 | [num_output, hidden_size * 4, num_directions] | -| weight_hr_data| float/fp16/int8 | [hidden_size, num_output, num_directions] | +| weight | type | shape | +| -------------- | --------------- | --------------------------------------------- | +| weight_xc_data | float/fp16/int8 | [input_size, hidden_size * 4, num_directions] | +| bias_c_data | float/fp16/int8 | [hidden_size, 4, num_directions] | +| weight_hc_data | float/fp16/int8 | [num_output, hidden_size * 4, num_directions] | +| weight_hr_data | float/fp16/int8 | [hidden_size, num_output, num_directions] | Direction flag: + - 0 = forward only - 1 = reverse only - 2 = bidirectional # MemoryData + ``` y = data ``` -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | w | int | 0 | | -| 1 | h | int | 0 | | -| 11 | d | int | 0 | | -| 2 | c | int | 0 | | -| 21 | load_type | int | 1 | 1=fp32 | +| param id | name | type | default | description | +| -------- | --------- | ---- | ------- | ----------- | +| 0 | w | int | 0 | | +| 1 | h | int | 0 | | +| 11 | d | int | 0 | | +| 2 | c | int | 0 | | +| 21 | load_type | int | 1 | 1=fp32 | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| data | float | [w, h, d, c] | +| weight | type | shape | +| ------ | ----- | ------------ | +| data | float | [w, h, d, c] | # Mish + ``` y = x * tanh(log(exp(x) + 1)) ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace # MultiHeadAttention + ``` split q k v into num_head part q0, k0, v0, q1, k1, v1 ... for each num_head part @@ -1294,33 +1364,34 @@ for each num_head part y = affine(out) ``` -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | embed_dim | int | 0 | | -| 1 | num_heads | int | 1 | | -| 2 | weight_data_size| int | 0 | qdim = weight_data_size / embed_dim | -| 3 | kdim | int | embed_dim | | -| 4 | vdim | int | embed_dim | | -| 5 | attn_mask | int | 0 | | -| 6 | scale | float | 1.f / sqrt(embed_dim / num_heads) | | -| 18 | int8_scale_term | int | 0 | | - -| weight | type | shape | -| ------------- | ----- | --------------------- | -| q_weight_data | float/fp16/int8 | [embed_dim * qdim] | -| q_bias_data | float | [embed_dim] | -| k_weight_data | float/fp16/int8 | [embed_dim * kdim] | -| k_bias_data | float | [embed_dim] | -| v_weight_data | float/fp16/int8 | [embed_dim * vdim] | -| v_bias_data | float | [embed_dim] | -| out_weight_data| float/fp16/int8 | [qdim * embed_dim] | -| out_bias_data | float | [qdim] | -| q_weight_data_int8_scales| float | [embed_dim] | -| k_weight_data_int8_scales| float | [embed_dim] | -| v_weight_data_int8_scales| float | [embed_dim] | -| out_weight_data_int8_scales| float | [1] | +| param id | name | type | default | description | +| -------- | ---------------- | ----- | --------------------------------- | ----------------------------------- | +| 0 | embed_dim | int | 0 | | +| 1 | num_heads | int | 1 | | +| 2 | weight_data_size | int | 0 | qdim = weight_data_size / embed_dim | +| 3 | kdim | int | embed_dim | | +| 4 | vdim | int | embed_dim | | +| 5 | attn_mask | int | 0 | | +| 6 | scale | float | 1.f / sqrt(embed_dim / num_heads) | | +| 18 | int8_scale_term | int | 0 | | + +| weight | type | shape | +| --------------------------- | --------------- | ------------------ | +| q_weight_data | float/fp16/int8 | [embed_dim * qdim] | +| q_bias_data | float | [embed_dim] | +| k_weight_data | float/fp16/int8 | [embed_dim * kdim] | +| k_bias_data | float | [embed_dim] | +| v_weight_data | float/fp16/int8 | [embed_dim * vdim] | +| v_bias_data | float | [embed_dim] | +| out_weight_data | float/fp16/int8 | [qdim * embed_dim] | +| out_bias_data | float | [qdim] | +| q_weight_data_int8_scales | float | [embed_dim] | +| k_weight_data_int8_scales | float | [embed_dim] | +| v_weight_data_int8_scales | float | [embed_dim] | +| out_weight_data_int8_scales | float | [1] | # MVN + ``` if normalize_variance == 1 && across_channels == 1 y = (x - mean) / (sqrt(var) + eps) of whole blob if normalize_variance == 1 && across_channels == 0 y = (x - mean) / (sqrt(var) + eps) of each channel @@ -1328,20 +1399,22 @@ if normalize_variance == 0 && across_channels == 1 y = x - mean of whole bl if normalize_variance == 0 && across_channels == 0 y = x - mean of each channel ``` -* one_blob_only +- one_blob_only -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | normalize_variance| int | 0 | | -| 1 | across_channels| int | 0 | | -| 2 | eps | float | 0.0001f | x = x / (sqrt(var) + eps) | +| param id | name | type | default | description | +| -------- | ------------------ | ----- | ------- | ------------------------- | +| 0 | normalize_variance | int | 0 | | +| 1 | across_channels | int | 0 | | +| 2 | eps | float | 0.0001f | x = x / (sqrt(var) + eps) | # Noop + ``` y = x ``` # Normalize + ``` if across_spatial == 1 && across_channel == 1 x2 = normalize(x) of whole blob if across_spatial == 1 && across_channel == 0 x2 = normalize(x) of each channel @@ -1349,79 +1422,85 @@ if across_spatial == 0 && across_channel == 1 x2 = normalize(x) of each pos y = x2 * scale ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | across_spatial| int | 0 | | -| 1 | channel_shared| int | 0 | | -| 2 | eps | float | 0.0001f | see eps mode | -| 3 | scale_data_size| int | 0 | | -| 4 | across_channel| int | 0 | | -| 9 | eps_mode | int | 0 | | +| param id | name | type | default | description | +| -------- | --------------- | ----- | ------- | ------------ | +| 0 | across_spatial | int | 0 | | +| 1 | channel_shared | int | 0 | | +| 2 | eps | float | 0.0001f | see eps mode | +| 3 | scale_data_size | int | 0 | | +| 4 | across_channel | int | 0 | | +| 9 | eps_mode | int | 0 | | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| scale_data | float | [scale_data_size] | +| weight | type | shape | +| ---------- | ----- | ----------------- | +| scale_data | float | [scale_data_size] | Eps Mode: -- 0 = caffe/mxnet x = x / sqrt(var + eps) -- 1 = pytorch x = x / max(sqrt(var), eps) -- 2 = tensorflow x = x / sqrt(max(var, eps)) + +- 0 = caffe/mxnet x = x / sqrt(var + eps) +- 1 = pytorch x = x / max(sqrt(var), eps) +- 2 = tensorflow x = x / sqrt(max(var, eps)) # Packing + ``` y = wrap_packing(x) ``` -* one_blob_only +- one_blob_only -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | out_elempack | int | 1 | | -| 1 | use_padding | int | 0 | | -| 2 | cast_type_from| int | 0 | | -| 3 | cast_type_to | int | 0 | | -| 4 | storage_type_from| int | 0 | | -| 5 | storage_type_to| int | 0 | | +| param id | name | type | default | description | +| -------- | ----------------- | ---- | ------- | ----------- | +| 0 | out_elempack | int | 1 | | +| 1 | use_padding | int | 0 | | +| 2 | cast_type_from | int | 0 | | +| 3 | cast_type_to | int | 0 | | +| 4 | storage_type_from | int | 0 | | +| 5 | storage_type_to | int | 0 | | # Padding + ``` y = pad(x, pads) ``` -| param id | name | type | default | description | -| --------- | ------------- | ---- | --------- | ----------------- | -| 0 | top | int | 0 | | -| 1 | bottom | int | 0 | | -| 2 | left | int | 0 | | -| 3 | right | int | 0 | | -| 4 | type | int | 0 | | -| 5 | value | float | 0 | | -| 6 | per_channel_pad_data_size| int | 0 | | -| 7 | front | int | stride_w | | -| 8 | behind | int | pad_left | | +| param id | name | type | default | description | +| -------- | ------------------------- | ----- | -------- | ----------- | +| 0 | top | int | 0 | | +| 1 | bottom | int | 0 | | +| 2 | left | int | 0 | | +| 3 | right | int | 0 | | +| 4 | type | int | 0 | | +| 5 | value | float | 0 | | +| 6 | per_channel_pad_data_size | int | 0 | | +| 7 | front | int | stride_w | | +| 8 | behind | int | pad_left | | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| per_channel_pad_data| float | [per_channel_pad_data_size] | +| weight | type | shape | +| -------------------- | ----- | --------------------------- | +| per_channel_pad_data | float | [per_channel_pad_data_size] | Padding type: + - 0 = CONSTANT - 1 = REPLICATE - 2 = REFLECT # Permute + ``` y = reorder(x) ``` -| param id | name | type | default | description | -| --------- | ------------- | ---- | --------- | ----------------- | -| 0 | order_type | int | 0 | | +| param id | name | type | default | description | +| -------- | ---------- | ---- | ------- | ----------- | +| 0 | order_type | int | 0 | | Order Type: + - 0 = WH WHC WHDC - 1 = HW HWC HWDC - 2 = WCH WDHC @@ -1448,183 +1527,198 @@ Order Type: - 23 = CDHW # PixelShuffle + ``` if mode == 0 y = depth_to_space(x) where x channel order is sw-sh-outc if mode == 1 y = depth_to_space(x) where x channel order is outc-sw-sh ``` -* one_blob_only +- one_blob_only -| param id | name | type | default | description | -| --------- | ------------- | ---- | --------- | ----------------- | -| 0 | upscale_factor| int | 1 | | -| 1 | mode | int | 0 | | +| param id | name | type | default | description | +| -------- | -------------- | ---- | ------- | ----------- | +| 0 | upscale_factor | int | 1 | | +| 1 | mode | int | 0 | | # Pooling + ``` x2 = pad(x, pads) x3 = pooling(x2, kernel, stride) ``` -| param id | name | type | default | description | -| --------- | --------------| ---- | --------- | ----------------- | -| 0 | pooling_type | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | stride_w | int | 1 | | -| 3 | pad_left | int | 0 | | -| 4 | global_pooling| int | 0 | | -| 5 | pad_mode | int | 0 | | -| 6 | avgpool_count_include_pad| int | 0 | | -| 7 | adaptive_pooling| int | 0 | | -| 8 | out_w | int | 0 | | -| 11 | kernel_h | int | kernel_w | | -| 12 | stride_h | int | stride_w | | -| 13 | pad_top | int | pad_left | | -| 14 | pad_right | int | pad_left | | -| 15 | pad_bottom | int | pad_top | | -| 18 | out_h | int | out_w | | +| param id | name | type | default | description | +| -------- | ------------------------- | ---- | -------- | ----------- | +| 0 | pooling_type | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | stride_w | int | 1 | | +| 3 | pad_left | int | 0 | | +| 4 | global_pooling | int | 0 | | +| 5 | pad_mode | int | 0 | | +| 6 | avgpool_count_include_pad | int | 0 | | +| 7 | adaptive_pooling | int | 0 | | +| 8 | out_w | int | 0 | | +| 11 | kernel_h | int | kernel_w | | +| 12 | stride_h | int | stride_w | | +| 13 | pad_top | int | pad_left | | +| 14 | pad_right | int | pad_left | | +| 15 | pad_bottom | int | pad_top | | +| 18 | out_h | int | out_w | | Pooling type: + - 0 = MAX - 1 = AVG Pad mode: + - 0 = full padding - 1 = valid padding - 2 = tensorflow padding=SAME or onnx padding=SAME_UPPER - 3 = onnx padding=SAME_LOWER # Pooling1D + ``` x2 = pad(x, pads) x3 = pooling1d(x2, kernel, stride) ``` -| param id | name | type | default | description | -| --------- | --------------| ---- | --------- | ----------------- | -| 0 | pooling_type | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | stride_w | int | 1 | | -| 3 | pad_left | int | 0 | | -| 4 | global_pooling| int | 0 | | -| 5 | pad_mode | int | 0 | | -| 6 | avgpool_count_include_pad| int | 0 | | -| 7 | adaptive_pooling| int | 0 | | -| 8 | out_w | int | 0 | | -| 14 | pad_right | int | pad_left | | +| param id | name | type | default | description | +| -------- | ------------------------- | ---- | -------- | ----------- | +| 0 | pooling_type | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | stride_w | int | 1 | | +| 3 | pad_left | int | 0 | | +| 4 | global_pooling | int | 0 | | +| 5 | pad_mode | int | 0 | | +| 6 | avgpool_count_include_pad | int | 0 | | +| 7 | adaptive_pooling | int | 0 | | +| 8 | out_w | int | 0 | | +| 14 | pad_right | int | pad_left | | Pooling type: + - 0 = MAX - 1 = AVG Pad mode: + - 0 = full padding - 1 = valid padding - 2 = tensorflow padding=SAME or onnx padding=SAME_UPPER - 3 = onnx padding=SAME_LOWER # Pooling3D + ``` x2 = pad(x, pads) x3 = pooling3d(x2, kernel, stride) ``` -| param id | name | type | default | description | -| --------- | --------------| ---- | --------- | ----------------- | -| 0 | pooling_type | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | stride_w | int | 1 | | -| 3 | pad_left | int | 0 | | -| 4 | global_pooling| int | 0 | | -| 5 | pad_mode | int | 0 | | -| 6 | avgpool_count_include_pad| int | 0 | | -| 7 | adaptive_pooling| int | 0 | | -| 8 | out_w | int | 0 | | -| 11 | kernel_h | int | kernel_w | | -| 12 | stride_h | int | stride_w | | -| 13 | pad_top | int | pad_left | | -| 14 | pad_right | int | pad_left | | -| 15 | pad_bottom | int | pad_top | | -| 16 | pad_behind | int | pad_front | | -| 18 | out_h | int | out_w | | -| 21 | kernel_d | int | kernel_w | | -| 22 | stride_d | int | stride_w | | -| 23 | pad_front | int | pad_left | | -| 28 | out_d | int | out_w | | +| param id | name | type | default | description | +| -------- | ------------------------- | ---- | --------- | ----------- | +| 0 | pooling_type | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | stride_w | int | 1 | | +| 3 | pad_left | int | 0 | | +| 4 | global_pooling | int | 0 | | +| 5 | pad_mode | int | 0 | | +| 6 | avgpool_count_include_pad | int | 0 | | +| 7 | adaptive_pooling | int | 0 | | +| 8 | out_w | int | 0 | | +| 11 | kernel_h | int | kernel_w | | +| 12 | stride_h | int | stride_w | | +| 13 | pad_top | int | pad_left | | +| 14 | pad_right | int | pad_left | | +| 15 | pad_bottom | int | pad_top | | +| 16 | pad_behind | int | pad_front | | +| 18 | out_h | int | out_w | | +| 21 | kernel_d | int | kernel_w | | +| 22 | stride_d | int | stride_w | | +| 23 | pad_front | int | pad_left | | +| 28 | out_d | int | out_w | | Pooling type: + - 0 = MAX - 1 = AVG Pad mode: + - 0 = full padding - 1 = valid padding - 2 = tensorflow padding=SAME or onnx padding=SAME_UPPER - 3 = onnx padding=SAME_LOWER # Power + ``` y = pow((shift + x * scale), power) ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | power | float | 1.f | | -| 1 | scale | float | 1.f | | -| 2 | shift | float | 0.f | | +| param id | name | type | default | description | +| -------- | ----- | ----- | ------- | ----------- | +| 0 | power | float | 1.f | | +| 1 | scale | float | 1.f | | +| 2 | shift | float | 0.f | | # PReLU + ``` if x < 0 y = x * slope else y = x ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_slope | int | 0 | | +| param id | name | type | default | description | +| -------- | --------- | ---- | ------- | ----------- | +| 0 | num_slope | int | 0 | | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| slope_data | float | [num_slope] | +| weight | type | shape | +| ---------- | ----- | ----------- | +| slope_data | float | [num_slope] | # Quantize + ``` y = float2int8(x * scale) ``` -* one_blob_only +- one_blob_only -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | scale_data_size| int | 1 | | +| param id | name | type | default | description | +| -------- | --------------- | ---- | ------- | ----------- | +| 0 | scale_data_size | int | 1 | | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| scale_data | float | [scale_data_size] | +| weight | type | shape | +| ---------- | ----- | ----------------- | +| scale_data | float | [scale_data_size] | # Reduction + ``` y = reduce_op(x * coeff) ``` -* one_blob_only +- one_blob_only -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | operation | int | 0 | | -| 1 | reduce_all | int | 1 | | -| 2 | coeff | float | 1.f | | -| 3 | axes | array | [ ] | | -| 4 | keepdims | int | 0 | | -| 5 | fixbug0 | int | 0 | hack for bug fix, should be 1 | +| param id | name | type | default | description | +| -------- | ---------- | ----- | ------- | ----------------------------- | +| 0 | operation | int | 0 | | +| 1 | reduce_all | int | 1 | | +| 2 | coeff | float | 1.f | | +| 3 | axes | array | [ ] | | +| 4 | keepdims | int | 0 | | +| 5 | fixbug0 | int | 0 | hack for bug fix, should be 1 | Operation type: + - 0 = SUM - 1 = ASUM - 2 = SUMSQ @@ -1638,96 +1732,103 @@ Operation type: - 10 = LogSumExp # ReLU + ``` if x < 0 y = x * slope else y = x ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | slope | float | 0.f | | +| param id | name | type | default | description | +| -------- | ----- | ----- | ------- | ----------- | +| 0 | slope | float | 0.f | | # Reorg + ``` if mode == 0 y = space_to_depth(x) where x channel order is sw-sh-outc if mode == 1 y = space_to_depth(x) where x channel order is outc-sw-sh ``` -* one_blob_only +- one_blob_only -| param id | name | type | default | description | -| --------- | ------------- | ---- | --------- | ----------------- | -| 0 | stride | int | 1 | | -| 1 | mode | int | 0 | | +| param id | name | type | default | description | +| -------- | ------ | ---- | ------- | ----------- | +| 0 | stride | int | 1 | | +| 1 | mode | int | 0 | | # Requantize + ``` x2 = x * scale_in + bias x3 = activation(x2) y = float2int8(x3 * scale_out) ``` -* one_blob_only +- one_blob_only -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | scale_in_data_size| int | 1 | | -| 1 | scale_out_data_size| int | 1 | | -| 2 | bias_data_size| int | 0 | | -| 3 | activation_type| int | 0 | | -| 4 | activation_params| int | [ ] | | +| param id | name | type | default | description | +| -------- | ------------------- | ---- | ------- | ----------- | +| 0 | scale_in_data_size | int | 1 | | +| 1 | scale_out_data_size | int | 1 | | +| 2 | bias_data_size | int | 0 | | +| 3 | activation_type | int | 0 | | +| 4 | activation_params | int | [ ] | | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| scale_in_data | float | [scale_in_data_size] | -| scale_out_data| float | [scale_out_data_size] | -| bias_data | float | [bias_data_size] | +| weight | type | shape | +| -------------- | ----- | --------------------- | +| scale_in_data | float | [scale_in_data_size] | +| scale_out_data | float | [scale_out_data_size] | +| bias_data | float | [bias_data_size] | # Reshape + ``` if permute == 1 y = hwc2chw(reshape(chw2hwc(x))) else y = reshape(x) ``` -* one_blob_only +- one_blob_only -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | w | int | -233 | | -| 1 | h | int | -233 | | -| 11 | d | int | -233 | | -| 2 | c | int | -233 | | -| 3 | permute | int | 0 | | +| param id | name | type | default | description | +| -------- | ------- | ---- | ------- | ----------- | +| 0 | w | int | -233 | | +| 1 | h | int | -233 | | +| 11 | d | int | -233 | | +| 2 | c | int | -233 | | +| 3 | permute | int | 0 | | Reshape flag: + - 0 = copy from bottom - -1 = remaining - -233 = drop this dim(default) # RMSNorm + ``` split x along outmost axis into part x0, x1 ... root mean square normalize for each part x0, x1 ... y = x * gamma by elementwise ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | affine_size | int | 0 | | -| 1 | eps | float | 0.001f | x = x / sqrt(var + eps) | -| 2 | affine | int | 1 | | +| param id | name | type | default | description | +| -------- | ----------- | ----- | ------- | ----------------------- | +| 0 | affine_size | int | 0 | | +| 1 | eps | float | 0.001f | x = x / sqrt(var + eps) | +| 2 | affine | int | 1 | | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| gamma_data | float | [affine_size] | +| weight | type | shape | +| ---------- | ----- | ------------- | +| gamma_data | float | [affine_size] | # RNN + Apply a single-layer RNN to a feature sequence of `T` timesteps. The input blob shape is `[w=input_size, h=T]` and the output blob shape is `[w=num_output, h=T]`. ``` @@ -1735,127 +1836,137 @@ y = rnn(x) y0, hidden y1 = rnn(x0, hidden x1) ``` -* one_blob_only if bidirectional +- one_blob_only if bidirectional -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | hidden size of output | -| 1 | weight_data_size| int | 0 | total size of weight matrix | -| 2 | direction | int | 0 | 0=forward, 1=reverse, 2=bidirectional | +| param id | name | type | default | description | +| -------- | ---------------- | ---- | ------- | ------------------------------------- | +| 0 | num_output | int | 0 | hidden size of output | +| 1 | weight_data_size | int | 0 | total size of weight matrix | +| 2 | direction | int | 0 | 0=forward, 1=reverse, 2=bidirectional | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| weight_xc_data| float/fp16/int8 | [input_size, num_output, num_directions] | -| bias_c_data | float/fp16/int8 | [num_output, 1, num_directions] | -| weight_hc_data| float/fp16/int8 | [num_output, num_output, num_directions] | +| weight | type | shape | +| -------------- | --------------- | ---------------------------------------- | +| weight_xc_data | float/fp16/int8 | [input_size, num_output, num_directions] | +| bias_c_data | float/fp16/int8 | [num_output, 1, num_directions] | +| weight_hc_data | float/fp16/int8 | [num_output, num_output, num_directions] | Direction flag: + - 0 = forward only - 1 = reverse only - 2 = bidirectional # Scale + ``` if scale_data_size == -233 y = x0 * x1 else y = x * scale + bias ``` -* one_blob_only if scale_data_size != -233 -* support_inplace +- one_blob_only if scale_data_size != -233 +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | scale_data_size| int | 0 | | -| 1 | bias_term | int | 0 | | +| param id | name | type | default | description | +| -------- | --------------- | ---- | ------- | ----------- | +| 0 | scale_data_size | int | 0 | | +| 1 | bias_term | int | 0 | | -| weight | type | shape | -| ------------- | ----- | --------------------- | -| scale_data | float | [scale_data_size] | -| bias_data | float | [scale_data_size] | +| weight | type | shape | +| ---------- | ----- | ----------------- | +| scale_data | float | [scale_data_size] | +| bias_data | float | [scale_data_size] | # SELU + ``` if x < 0 y = (exp(x) - 1.f) * alpha * lambda else y = x * lambda ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | alpha | float | 1.67326324f| | -| 1 | lambda | float | 1.050700987f| | +| param id | name | type | default | description | +| -------- | ------ | ----- | ------------ | ----------- | +| 0 | alpha | float | 1.67326324f | | +| 1 | lambda | float | 1.050700987f | | # Shrink + ``` if x < -lambd y = x + bias if x > lambd y = x - bias else y = x ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | bias | float | 0.0f | | -| 1 | lambd | float | 0.5f | | +| param id | name | type | default | description | +| -------- | ----- | ----- | ------- | ----------- | +| 0 | bias | float | 0.0f | | +| 1 | lambd | float | 0.5f | | # ShuffleChannel + ``` if reverse == 0 y = shufflechannel(x) by group if reverse == 1 y = shufflechannel(x) by channel / group ``` -* one_blob_only +- one_blob_only -| param id | name | type | default | description | -| --------- | ------------- | ---- | --------- | ----------------- | -| 0 | group | int | 1 | | -| 1 | reverse | int | 0 | | +| param id | name | type | default | description | +| -------- | ------- | ---- | ------- | ----------- | +| 0 | group | int | 1 | | +| 1 | reverse | int | 0 | | # Sigmoid + ``` y = 1 / (1 + exp(-x)) ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace # Slice + ``` split x along axis into slices, each part slice size is based on slices array ``` -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | slices | array | [ ] | | -| 1 | axis | int | 0 | | -| 2 | indices | array | [ ] | | +| param id | name | type | default | description | +| -------- | ------- | ----- | ------- | ----------- | +| 0 | slices | array | [ ] | | +| 1 | axis | int | 0 | | +| 2 | indices | array | [ ] | | # Softmax + ``` softmax(x, axis) ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | axis | int | 0 | | -| 1 | fixbug0 | int | 0 | hack for bug fix, should be 1 | +| param id | name | type | default | description | +| -------- | ------- | ---- | ------- | ----------------------------- | +| 0 | axis | int | 0 | | +| 1 | fixbug0 | int | 0 | hack for bug fix, should be 1 | # Softplus + ``` y = log(exp(x) + 1) ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace # Spectrogram + ``` x1 = pad(x) if center y = stft(x1) @@ -1866,68 +1977,74 @@ if power == 1 return magnitude if power == 2 return square of magnitude ``` -* one_blob_only +- one_blob_only -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | n_fft | int | 0 | | -| 1 | power | int | 0 | | -| 2 | hoplen | int | n_fft / 4 | | -| 3 | winlen | int | n_fft | | -| 4 | window_type | int | 0 | 0=ones 1=hann 2=hamming | -| 5 | center | int | 1 | | -| 6 | pad_type | int | 2 | 0=CONSTANT 1=REPLICATE 2=REFLECT | -| 7 | normalized | int | 0 | 0=no 1=n_fft 2=window-l2-energy | -| 8 | onesided | int | 1 | | +| param id | name | type | default | description | +| -------- | ----------- | ---- | --------- | -------------------------------- | +| 0 | n_fft | int | 0 | | +| 1 | power | int | 0 | | +| 2 | hoplen | int | n_fft / 4 | | +| 3 | winlen | int | n_fft | | +| 4 | window_type | int | 0 | 0=ones 1=hann 2=hamming | +| 5 | center | int | 1 | | +| 6 | pad_type | int | 2 | 0=CONSTANT 1=REPLICATE 2=REFLECT | +| 7 | normalized | int | 0 | 0=no 1=n_fft 2=window-l2-energy | +| 8 | onesided | int | 1 | | # Split + ``` y0, y1 ... = x ``` # Swish + ``` y = x / (1 + exp(-x)) ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace # TanH + ``` y = tanh(x) ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace # Threshold + ``` if x > threshold y = 1 else y = 0 ``` -* one_blob_only -* support_inplace +- one_blob_only +- support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | threshold | float | 0.f | | +| param id | name | type | default | description | +| -------- | --------- | ----- | ------- | ----------- | +| 0 | threshold | float | 0.f | | # Tile + ``` y = repeat tiles along axis for x ``` -* one_blob_only +- one_blob_only -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | axis | int | 0 | | -| 1 | tiles | int | 1 | | -| 2 | repeats | array | [ ] | | +| param id | name | type | default | description | +| -------- | ------- | ----- | ------- | ----------- | +| 0 | axis | int | 0 | | +| 1 | tiles | int | 1 | | +| 2 | repeats | array | [ ] | | # UnaryOp + ``` y = unaryop(x) ``` @@ -1935,11 +2052,12 @@ y = unaryop(x) - one_blob_only - support_inplace -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | op_type | int | 0 | Operation type as follows | +| param id | name | type | default | description | +| -------- | ------- | ---- | ------- | ------------------------- | +| 0 | op_type | int | 0 | Operation type as follows | Operation type: + - 0 = ABS - 1 = NEG - 2 = FLOOR @@ -1962,22 +2080,23 @@ Operation type: - 19 = TRUNC # Unfold + ``` y = unfold(x) ``` -* one_blob_only - -| param id | name | type | default | description | -| --------- | ------------- | ----- | --------- | ----------------- | -| 0 | num_output | int | 0 | | -| 1 | kernel_w | int | 0 | | -| 2 | dilation_w | int | 1 | | -| 3 | stride_w | int | 1 | | -| 4 | pad_left | int | 0 | | -| 11 | kernel_h | int | kernel_w | | -| 12 | dilation_h | int | dilation_w | | -| 13 | stride_h | int | stride_w | | -| 14 | pad_top | int | pad_left | | -| 15 | pad_right | int | pad_left | | -| 16 | pad_bottom | int | pad_top | | +- one_blob_only + +| param id | name | type | default | description | +| -------- | ---------- | ---- | ---------- | ----------- | +| 0 | num_output | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | dilation_w | int | 1 | | +| 3 | stride_w | int | 1 | | +| 4 | pad_left | int | 0 | | +| 11 | kernel_h | int | kernel_w | | +| 12 | dilation_h | int | dilation_w | | +| 13 | stride_h | int | stride_w | | +| 14 | pad_top | int | pad_left | | +| 15 | pad_right | int | pad_left | | +| 16 | pad_bottom | int | pad_top | | diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c97235d97a0..60f24361d8b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -169,6 +169,7 @@ ncnn_add_layer(Shrink) ncnn_add_layer(RMSNorm) ncnn_add_layer(Spectrogram) ncnn_add_layer(InverseSpectrogram) +ncnn_add_layer(Flip) if(NCNN_VULKAN) ncnn_add_shader(${CMAKE_CURRENT_SOURCE_DIR}/convert_ycbcr.comp) diff --git a/src/layer/flip.cpp b/src/layer/flip.cpp new file mode 100644 index 00000000000..f8726e0fb9f --- /dev/null +++ b/src/layer/flip.cpp @@ -0,0 +1,41 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "flip.h" + +namespace ncnn { + +Flip::Flip() +{ + one_blob_only = true; +} + +int Flip::load_param(const ParamDict& pd) +{ + axis = pd.get(0, Mat()); + // 打印 + const int* axis_ptr = axis; + printf("axis_len = %d", axis.w); + printf("axis[0] = %d", axis_ptr[0]); + printf("axis[1] = %d", axis_ptr[1]); + return 0; +} + +int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const +{ + // wip + return 0; +} + +} // namespace ncnn diff --git a/src/layer/flip.h b/src/layer/flip.h new file mode 100644 index 00000000000..b75bf5e68ef --- /dev/null +++ b/src/layer/flip.h @@ -0,0 +1,37 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef LAYER_FLIP_H +#define LAYER_FLIP_H + +#include "layer.h" + +namespace ncnn { + +class Flip : public Layer +{ +public: + Flip(); + + virtual int load_param(const ParamDict& pd); + + virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + +public: + Mat axis; // 维度翻转 +}; + +} // namespace ncnn + +#endif // LAYER_FLIP_H diff --git a/tools/pnnx/src/CMakeLists.txt b/tools/pnnx/src/CMakeLists.txt index b1ac6f5c024..5d681ab9c4b 100644 --- a/tools/pnnx/src/CMakeLists.txt +++ b/tools/pnnx/src/CMakeLists.txt @@ -575,6 +575,7 @@ set(pnnx_pass_ncnn_SRCS pass_ncnn/torch_cumsum.cpp pass_ncnn/torch_diag.cpp pass_ncnn/torch_flatten.cpp + pass_ncnn/torch_flip.cpp pass_ncnn/torch_istft.cpp pass_ncnn/torch_logsumexp.cpp pass_ncnn/torch_matmul.cpp diff --git a/tools/pnnx/src/pass_ncnn/torch_flip.cpp b/tools/pnnx/src/pass_ncnn/torch_flip.cpp new file mode 100644 index 00000000000..bc0e3348548 --- /dev/null +++ b/tools/pnnx/src/pass_ncnn/torch_flip.cpp @@ -0,0 +1,56 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +#include "pass_ncnn.h" + +namespace pnnx { + +namespace ncnn { + +class torch_flip : public GraphRewriterPass +{ +public: + const char* match_pattern_graph() const + { + return R"PNNXIR(7767517 +3 2 +pnnx.Input input 0 1 input +torch.flip op_0 1 1 input out dims=%dims +pnnx.Output output 1 0 out +)PNNXIR"; + } + + const char* type_str() const + { + return "Flip"; + } + + const char* name_str() const + { + return "flip"; + } + + void write(Operator* op, const std::map& captured_params) const + { + const std::vector& dims = captured_params.at("dims").ai; + + // 设置参数 + op->params["0"] = dims; + } +}; + +REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(torch_flip, 20) + +} // namespace ncnn + +} // namespace pnnx \ No newline at end of file From 700e18a1d2541ebcc8b475a16020d2454b4d4bd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=B0=E9=98=85?= <43716063+Baiyuetribe@users.noreply.github.com> Date: Sun, 12 Jan 2025 21:56:41 +0800 Subject: [PATCH 02/14] done --- src/layer/flip.cpp | 538 ++++++++++++++++++++++- src/layer/flip.h | 2 +- tests/CMakeLists.txt | 1 + tests/test_flip.cpp | 79 ++++ tools/pnnx/tests/ncnn/CMakeLists.txt | 1 + tools/pnnx/tests/ncnn/test_torch_flip.py | 151 +++++++ 6 files changed, 765 insertions(+), 7 deletions(-) create mode 100644 tests/test_flip.cpp create mode 100644 tools/pnnx/tests/ncnn/test_torch_flip.py diff --git a/src/layer/flip.cpp b/src/layer/flip.cpp index f8726e0fb9f..6757b853f80 100644 --- a/src/layer/flip.cpp +++ b/src/layer/flip.cpp @@ -24,17 +24,543 @@ Flip::Flip() int Flip::load_param(const ParamDict& pd) { axis = pd.get(0, Mat()); - // 打印 - const int* axis_ptr = axis; - printf("axis_len = %d", axis.w); - printf("axis[0] = %d", axis_ptr[0]); - printf("axis[1] = %d", axis_ptr[1]); + // 调试 + // const int *axis_ptr = axis; + // printf("axis_len = %d\n", axis.w); + // printf("axis[0] = %d\n", axis_ptr[0]); return 0; } int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const { - // wip + // 已知参数 + int dims = bottom_blob.dims; + int w = bottom_blob.w; + int h = bottom_blob.h; + int d = bottom_blob.d; + int channels = bottom_blob.c; + size_t elemsize = bottom_blob.elemsize; + + // 校准输入参数 + if (axis.w > 4) + { + return -1; + } + const int* axis_ptr = axis; + + if (dims == 1) + { + // 1D 只有一种情况 + top_blob.create(w, elemsize, opt.blob_allocator); + const float* ptr = bottom_blob; + float* outptr = top_blob; + for (int i = 0; i < w; i++) + { + outptr[i] = ptr[w - 1 - i]; + } + } + else if (dims == 2) + { + // 2D 有三种,安装上下、左右和上下左右同时翻转;[-2/0上下翻转, -1/1左右翻转,交叉为上下左右翻转] + top_blob.create(w, h, elemsize, opt.blob_allocator); + if (axis.w == 1) + { + if (axis_ptr[0] == -2 || axis_ptr[0] == 0) + { + // 按照行翻转 + for (int i = 0; i < h; i++) + { + const float* ptr = bottom_blob.row(h - 1 - i); // 从最后一行开始 + float* outptr = top_blob.row(i); // 输出到当前行 + + // 直接复制整行数据 + memcpy(outptr, ptr, w * sizeof(float)); + } + } + else + { + // 按照列翻转 + for (int i = 0; i < h; i++) + { + const float* ptr = bottom_blob.row(i); + float* outptr = top_blob.row(i); + + // 使用临时buffer存储反转的行数据 + std::vector line_buffer(w); + for (int j = 0; j < w; j++) + { + line_buffer[j] = ptr[w - 1 - j]; + } + + // 一次性复制整行 + memcpy(outptr, line_buffer.data(), w * sizeof(float)); + } + } + } + else + { + // 当axis.w=2时,上下左右都翻转 + for (int i = 0; i < h; i++) + { + const float* ptr = bottom_blob.row(h - 1 - i); // 从最后一行开始读取 + float* outptr = top_blob.row(i); // 输出到当前行 + + // 每行内左右翻转 + for (int j = 0; j < w; j++) + { + outptr[j] = ptr[w - 1 - j]; // 反向读取每行像素 + } + } + } + } + else if (dims == 3) + { + top_blob.create(w, h, channels, elemsize, opt.blob_allocator); + if (axis.w == 1) + { + // w、h、c + // 约定到正数,简化后续判断 + int axis0 = axis_ptr[0] < 0 ? 3 + axis_ptr[0] : axis_ptr[0]; + if (axis0 == 0) + { + // -3/0 整体上下翻转 + for (int i = 0; i < channels; i++) + { + for (int j = 0; j < h; j++) + { + const float* ptr = bottom_blob.channel(channels - 1 - i).row(j); // 从最后一个channel开始 + float* outptr = top_blob.channel(i).row(j); + memcpy(outptr, ptr, w * sizeof(float)); + } + } + } + else if (axis0 == 1) + { + // -2/1 整体内部上下翻转 + for (int i = 0; i < channels; i++) + { + for (int j = 0; j < h; j++) + { + const float* ptr = bottom_blob.channel(i).row(h - 1 - j); + float* outptr = top_blob.channel(i).row(j); + memcpy(outptr, ptr, w * sizeof(float)); + } + } + } + else + { + // -1/2 整体左右翻转 + for (int i = 0; i < channels; i++) + { + for (int j = 0; j < h; j++) + { + const float* ptr = bottom_blob.channel(i).row(j); + float* outptr = top_blob.channel(i).row(j); + for (int k = 0; k < w; k++) + { + outptr[k] = ptr[w - 1 - k]; + } + } + } + } + } + else if (axis.w == 2) + { + // wh、wc、hc + int axis0 = axis_ptr[0] < 0 ? 3 + axis_ptr[0] : axis_ptr[0]; + int axis1 = axis_ptr[1] < 0 ? 3 + axis_ptr[1] : axis_ptr[1]; + int axis_sum = axis0 + axis1; + if (axis_sum == 1) + { + // 对应wh + for (int i = 0; i < channels; i++) + { + for (int j = 0; j < h; j++) + { + // 组合两种翻转:channel维度和行维度同时翻转 + const float* ptr = bottom_blob.channel(channels - 1 - i).row(h - 1 - j); + float* outptr = top_blob.channel(i).row(j); + memcpy(outptr, ptr, w * sizeof(float)); + } + } + } + else if (axis_sum == 2) + { + // 对应wc + for (int i = 0; i < channels; i++) + { + for (int j = 0; j < h; j++) + { + const float* ptr = bottom_blob.channel(channels - 1 - i).row(j); + float* outptr = top_blob.channel(i).row(j); + for (int k = 0; k < w; k++) + { + outptr[k] = ptr[w - 1 - k]; + } + } + } + } + else if (axis_sum == 3) + { + // 对应hc + for (int i = 0; i < channels; i++) + { + for (int j = 0; j < h; j++) + { + const float* ptr = bottom_blob.channel(i).row(h - 1 - j); + float* outptr = top_blob.channel(i).row(j); + + // 增加左右翻转 + for (int k = 0; k < w; k++) + { + outptr[k] = ptr[w - 1 - k]; + } + } + } + } + } + else + { + // whc + for (int i = 0; i < channels; i++) + { + for (int j = 0; j < h; j++) + { + const float* ptr = bottom_blob.channel(channels - 1 - i).row(h - 1 - j); + float* outptr = top_blob.channel(i).row(j); + + // 左右翻转实现完全倒序 + for (int k = 0; k < w; k++) + { + outptr[k] = ptr[w - 1 - k]; + } + } + } + } + } + else if (dims == 4) + { + top_blob.create(w, h, d, channels, elemsize, opt.blob_allocator); + if (axis.w == 1) + { + // w、h、d、c + int axis0 = axis_ptr[0] < 0 ? 4 + axis_ptr[0] : axis_ptr[0]; + if (axis0 == 0) + { + // -4/0 整体上下翻转 torch中按c维度翻转 + for (int c = 0; c < channels; c++) // 遍历channels=3 + { + int flipped_c = channels - 1 - c; // 计算channels翻转位置 + + for (int z = 0; z < d; z++) // 遍历d=2维度 + { + for (int j = 0; j < h; j++) // 遍历行 + { + const float* ptr = bottom_blob.channel(c).row(z * h + j); + float* outptr = const_cast(top_blob.channel(flipped_c).row(z * h + j)); + memcpy(outptr, ptr, w * sizeof(float)); + } + } + } + } + else if (axis0 == 1) + { + // -3/1 torh中按d维度内部上下翻转 + for (int i = 0; i < channels; i++) // 遍历channels + { + for (int z = 0; z < d; z++) // 遍历d维度 + { + for (int j = 0; j < h; j++) // 遍历h维度 + { + // 翻转d维度的数据读取位置 + const float* ptr = bottom_blob.channel(i).row((d - 1 - z) * h + j); + float* outptr = const_cast(top_blob.channel(i).row(z * h + j)); + // 逐行复制w元素 + memcpy(outptr, ptr, w * sizeof(float)); + } + } + } + } + else if (axis0 == 2) + { + // -2/2 按torch中H维度翻转 上下 + for (int i = 0; i < channels; i++) + { + for (int z = 0; z < d; z++) + { + for (int j = 0; j < h; j++) + { + const float* ptr = bottom_blob.channel(i).row(z * h + (h - 1 - j)); + float* outptr = top_blob.channel(i).row(z * h + j); + memcpy(outptr, ptr, w * sizeof(float)); + } + } + } + } + else + { + // -1/3 按torch中W维度翻转 左右 + for (int i = 0; i < channels; i++) + { + for (int z = 0; z < d; z++) + { + for (int j = 0; j < h; j++) + { + const float* ptr = bottom_blob.channel(i).row(z * h + j); + float* outptr = top_blob.channel(i).row(z * h + j); + for (int k = 0; k < w; k++) + { + outptr[k] = ptr[w - 1 - k]; + } + } + } + } + } + } + else if (axis.w == 2) + { + // dc1、dh2、dw3、ch3、cw4、hw5 + int axis0 = axis_ptr[0] < 0 ? 4 + axis_ptr[0] : axis_ptr[0]; + int axis1 = axis_ptr[1] < 0 ? 4 + axis_ptr[1] : axis_ptr[1]; + int axis_sum = axis0 + axis1; + if (axis_sum == 1) + { + // 对应dc + for (int c = 0; c < channels; c++) // 遍历channels + { + int flipped_c = channels - 1 - c; // 翻转后的channel位置 + + for (int z = 0; z < d; z++) // 遍历d维度 + { + int flipped_d = d - 1 - z; // 翻转后的d位置 + + for (int j = 0; j < h; j++) // 遍历行 + { + const float* ptr = bottom_blob.channel(c).row(z * h + j); + float* outptr = const_cast(top_blob.channel(flipped_c).row(flipped_d * h + j)); + memcpy(outptr, ptr, w * sizeof(float)); + } + } + } + } + else if (axis_sum == 2) + { + // 对应dh + for (int c = 0; c < channels; c++) // 遍历 channels=2 维度 + { + int flipped_c = channels - 1 - c; // 计算 c 维度翻转位置 (0→1, 1→0) + + for (int z = 0; z < d; z++) // 遍历 d=3 维度 + { + // 按翻转顺序逐行复制 h 维度数据 + for (int i = 0; i < h; i++) + { + const float* ptr = bottom_blob.channel(c).row(z * h + i); + float* outptr = const_cast(top_blob.channel(flipped_c).row(z * h + (h - 1 - i))); // 保持z维度顺序,翻转h维度 + memcpy(outptr, ptr, w * sizeof(float)); // 按行复制,保持 w 维度顺序 + } + } + } + } + else if (axis_sum == 3) + { + // 对应dw;有一个为0或3 + if (axis0 == 0 || axis0 == 3) + { + // 对应dw + for (int c = 0; c < channels; c++) + { + int flipped_c = channels - 1 - c; // 翻转c维度 + + for (int z = 0; z < d; z++) // d维度保持不变 + { + for (int j = 0; j < h; j++) // h维度保持不变 + { + const float* ptr = bottom_blob.channel(c).row(z * h + j); + float* outptr = const_cast(top_blob.channel(flipped_c).row(z * h + j)); + + // 翻转w维度 + for (int k = 0; k < w; k++) + { + outptr[k] = ptr[w - 1 - k]; + } + } + } + } + } + else + { + // 对应ch + for (int c = 0; c < channels; c++) + { + for (int z = 0; z < d; z++) + { + int flipped_d = d - 1 - z; + + for (int j = 0; j < h; j++) + { + int flipped_h = h - 1 - j; + // 读取源数据 + const float* ptr = bottom_blob.channel(c).row(z * h + j); + float* outptr = const_cast(top_blob.channel(c).row(flipped_d * h + flipped_h)); + memcpy(outptr, ptr, w * sizeof(float)); + } + } + } + } + } + else if (axis_sum == 4) + { + // 对应cw + for (int c = 0; c < channels; c++) + { + for (int z = 0; z < d; z++) + { + int flipped_d = d - 1 - z; // 翻转 d 维度 + + for (int j = 0; j < h; j++) + { + const float* ptr = bottom_blob.channel(c).row(z * h + j); + float* outptr = const_cast(top_blob.channel(c).row(flipped_d * h + j)); // c维度保持不变 + + // 翻转 w 维度 + for (int k = 0; k < w; k++) + { + outptr[k] = ptr[w - 1 - k]; + } + } + } + } + } + else + { + // 对应hw + for (int c = 0; c < channels; c++) + { + for (int z = 0; z < d; z++) + { + for (int j = 0; j < h; j++) + { + const float* ptr = bottom_blob.channel(c).row(z * h + j); + float* outptr = const_cast(top_blob.channel(c).row(z * h + (h - 1 - j))); // 翻转 h 维度 + + // 翻转 w 维度 + for (int k = 0; k < w; k++) + { + outptr[k] = ptr[w - 1 - k]; + } + } + } + } + } + } + else if (axis.w == 3) + { + // dch3、dcw4、chw6 + int axis0 = axis_ptr[0] < 0 ? 4 + axis_ptr[0] : axis_ptr[0]; + int axis1 = axis_ptr[1] < 0 ? 4 + axis_ptr[1] : axis_ptr[1]; + int axis2 = axis_ptr[2] < 0 ? 4 + axis_ptr[2] : axis_ptr[2]; + int axis_sum = axis0 + axis1 + axis2; + if (axis_sum == 3) + { + // 对应dch,除w外,其余全翻转 + for (int c = 0; c < channels; c++) + { + int flipped_c = channels - 1 - c; // 翻转c维度 + + for (int z = 0; z < d; z++) + { + int flipped_d = d - 1 - z; // 翻转d维度 + + for (int i = 0; i < h; i++) + { + const float* ptr = bottom_blob.channel(c).row(z * h + i); + float* outptr = const_cast(top_blob.channel(flipped_c).row(flipped_d * h + (h - 1 - i))); // 翻转h维度 + memcpy(outptr, ptr, w * sizeof(float)); // w维度保持不变 + } + } + } + } + else if (axis_sum == 4) + { + // 对应dcw,除h外,其余全翻转 + for (int c = 0; c < channels; c++) + { + int flipped_c = channels - 1 - c; // 翻转c维度 + + for (int z = 0; z < d; z++) + { + int flipped_d = d - 1 - z; // 翻转d维度 + + for (int i = 0; i < h; i++) + { + const float* ptr = bottom_blob.channel(c).row(z * h + i); + float* outptr = const_cast(top_blob.channel(flipped_c).row(flipped_d * h + i)); // h维度保持不变 + + // 翻转w维度 + for (int k = 0; k < w; k++) + { + outptr[k] = ptr[w - 1 - k]; + } + } + } + } + } + else if (axis_sum == 6) + { + // 对应chw,除了c外全翻转 + for (int c = 0; c < channels; c++) // c维度保持不变 + { + for (int z = 0; z < d; z++) + { + int flipped_d = d - 1 - z; // 翻转d维度 + + for (int i = 0; i < h; i++) + { + const float* ptr = bottom_blob.channel(c).row(z * h + i); + float* outptr = const_cast(top_blob.channel(c).row(flipped_d * h + (h - 1 - i))); // 翻转h维度 + + // 翻转w维度 + for (int k = 0; k < w; k++) + { + outptr[k] = ptr[w - 1 - k]; + } + } + } + } + } + } + else + { + // dchw全部翻转 + for (int c = 0; c < channels; c++) + { + int flipped_c = channels - 1 - c; // 翻转c维度 + + for (int z = 0; z < d; z++) + { + int flipped_d = d - 1 - z; // 翻转d维度 + + for (int i = 0; i < h; i++) + { + const float* ptr = bottom_blob.channel(c).row(z * h + i); + float* outptr = const_cast(top_blob.channel(flipped_c).row(flipped_d * h + (h - 1 - i))); // 翻转h维度 + + // 翻转w维度 + for (int k = 0; k < w; k++) + { + outptr[k] = ptr[w - 1 - k]; + } + } + } + } + } + } + else + { + return -1; + } + return 0; } diff --git a/src/layer/flip.h b/src/layer/flip.h index b75bf5e68ef..61a05d4538a 100644 --- a/src/layer/flip.h +++ b/src/layer/flip.h @@ -29,7 +29,7 @@ class Flip : public Layer virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; public: - Mat axis; // 维度翻转 + Mat axis; // 翻转维度 }; } // namespace ncnn diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f55859e736e..48853470d3f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -105,6 +105,7 @@ ncnn_add_layer_test(Embed) ncnn_add_layer_test(Erf) ncnn_add_layer_test(ExpandDims) ncnn_add_layer_test(Flatten) +ncnn_add_layer_test(Flip) ncnn_add_layer_test(Fold) ncnn_add_layer_test(GELU) ncnn_add_layer_test(GLU) diff --git a/tests/test_flip.cpp b/tests/test_flip.cpp new file mode 100644 index 00000000000..55795caadc1 --- /dev/null +++ b/tests/test_flip.cpp @@ -0,0 +1,79 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "layer.h" +#include "testutil.h" + +static int test_flip(const ncnn::Mat& a, std::vector axis) +{ + ncnn::Mat axis_mat(axis.size()); + for (size_t i = 0; i < axis.size(); i++) + { + axis_mat[i] = axis[i]; + } + ncnn::ParamDict pd; + pd.set(0, axis_mat); // axis + + std::vector weights(0); + + int ret = test_layer("Flip", pd, weights, a); + if (ret != 0) + { + fprintf(stderr, "test_flip failed a.dims=%d a=(%d %d %d) axis=", a.dims, a.w, a.h, a.c); + } + + return ret; +} + +static int test_flip_0() +{ + return 0 + || test_flip(RandomMat(3, 2, 6, 7), {0}) + || test_flip(RandomMat(3, 2, 6, 7), {0, 1}) + || test_flip(RandomMat(3, 2, 6, 7), {0, 2}) + || test_flip(RandomMat(3, 2, 6, 7), {0, 3}); +} + +static int test_flip_1() +{ + return 0 + || test_flip(RandomMat(2, 3, 5), {0}) + || test_flip(RandomMat(4, 2, 5), {0, 1}) + || test_flip(RandomMat(3, 4, 2), {0, 1, 2}); +} + +static int test_flip_2() +{ + return 0 + || test_flip(RandomMat(8, 2), {-2}) + || test_flip(RandomMat(16, 3), {-2, -1}); +} + +static int test_flip_3() +{ + return 0 + || test_flip(RandomMat(16), {-1}) + || test_flip(RandomMat(32), {0}); +} + +int main() +{ + SRAND(7767517); + + return 0 + || test_flip_0() + || test_flip_1() + || test_flip_2() + || test_flip_3(); +} \ No newline at end of file diff --git a/tools/pnnx/tests/ncnn/CMakeLists.txt b/tools/pnnx/tests/ncnn/CMakeLists.txt index 42c3bed32e0..54c8896ef77 100644 --- a/tools/pnnx/tests/ncnn/CMakeLists.txt +++ b/tools/pnnx/tests/ncnn/CMakeLists.txt @@ -188,6 +188,7 @@ pnnx_ncnn_add_test(torch_clamp) pnnx_ncnn_add_test(torch_cos) pnnx_ncnn_add_test(torch_exp) pnnx_ncnn_add_test(torch_floor) +pnnx_ncnn_add_test(torch_flip) pnnx_ncnn_add_test(torch_log) pnnx_ncnn_add_test(torch_log10) pnnx_ncnn_add_test(torch_maximum) diff --git a/tools/pnnx/tests/ncnn/test_torch_flip.py b/tools/pnnx/tests/ncnn/test_torch_flip.py new file mode 100644 index 00000000000..4c9702cc505 --- /dev/null +++ b/tools/pnnx/tests/ncnn/test_torch_flip.py @@ -0,0 +1,151 @@ +# Tencent is pleased to support the open source community by making ncnn available. +# +# Copyright (C) 2023 THL A29 Limited, a Tencent company. All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +import torch +import torch.nn as nn +import torch.nn.functional as F + +# Tencent is pleased to support the open source community by making ncnn available. +# +# Copyright (C) 2023 THL A29 Limited, a Tencent company. All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# Tencent is pleased to support the open source community by making ncnn available. +# +# Copyright (C) 2023 THL A29 Limited, a Tencent company. All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Model(nn.Module): + def __init__(self): + super(Model, self).__init__() + + def forward(self, x, y, z, d): + # 1D + x0 = torch.flip(x, [0]) + # 2D + y0 = torch.flip(y, [0]) + y1 = torch.flip(y, [1]) + y2 = torch.flip(y, [-2, -1]) + # 3D + z0 = torch.flip(z, [0]) + z1 = torch.flip(z, [1]) + z2 = torch.flip(z, [2]) + z3 = torch.flip(z, [0, 1]) + z4 = torch.flip(z, [0, 2]) + z5 = torch.flip(z, [1, 2]) + # 4D + d0 = torch.flip(d, [-1]) + d1 = torch.flip(d, [-2]) + d2 = torch.flip(d, [-3]) + d3 = torch.flip(d, [-4]) + d4 = torch.flip(d, [0, 1]) + d5 = torch.flip(d, [0, 2]) + d6 = torch.flip(d, [0, 3]) + d7 = torch.flip(d, [1, 2]) + d8 = torch.flip(d, [1, 3]) + d9 = torch.flip(d, [2, 3]) + d10 = torch.flip(d, [0, 1, 2]) + d11 = torch.flip(d, [0, 1, 3]) + d12 = torch.flip(d, [1, 2, 3]) + d13 = torch.flip(d, [0, 1, 2, 3]) + + return ( + x0, + y0, + y1, + y2, + z0, + z1, + z2, + z3, + z4, + z5, + d0, + d1, + d2, + d3, + d4, + d5, + d6, + d7, + d8, + d9, + d10, + d11, + d12, + d13, + ) + + +def test(): + net = Model() + net.eval() + + torch.manual_seed(0) + x = torch.rand(36) # 1D + y = torch.rand(4, 7) # 2D + z = torch.rand(3, 4, 5) # 3D + d = torch.rand(4, 2, 6, 7) # 4D + + a = net(x, y, z, d) + + # export torchscript + mod = torch.jit.trace(net, (x, y, z, d)) + mod.save("test_torch_flip.pt") + + # torchscript to pnnx + import os + + os.system( + "../../src/pnnx test_torch_flip.pt inputshape=[36],[4,7],[3,4,5],[4,2,6,7]" + ) + + # pnnx inference + import test_torch_flip_ncnn + + b = test_torch_flip_ncnn.test_inference() + + for a0, b0 in zip(a, b): + if not torch.allclose(a0, b0, 1e-3, 1e-3): + return False + return True + + +if __name__ == "__main__": + if test(): + exit(0) + else: + exit(1) From 3cce5b4850d13cce49ae7ce4962d3ad629e25d5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=B0=E9=98=85?= <43716063+Baiyuetribe@users.noreply.github.com> Date: Mon, 13 Jan 2025 13:20:13 +0800 Subject: [PATCH 03/14] test_ctest --- .github/workflows/linux-x64-cpu-gcc.yml | 220 ++++++++++++------------ src/layer/flip.cpp | 3 +- 2 files changed, 112 insertions(+), 111 deletions(-) diff --git a/.github/workflows/linux-x64-cpu-gcc.yml b/.github/workflows/linux-x64-cpu-gcc.yml index ab2185be3e7..580000b498e 100644 --- a/.github/workflows/linux-x64-cpu-gcc.yml +++ b/.github/workflows/linux-x64-cpu-gcc.yml @@ -1,33 +1,33 @@ name: linux-x64-cpu-gcc on: push: - branches: [master] + # branches: [master] paths: - - '.github/workflows/linux-x64-cpu-gcc.yml' - - 'toolchains/host-c.gcc.toolchain.cmake' - - 'CMakeLists.txt' - - 'cmake/**' - - 'src/*' - - 'src/layer/*' - - 'src/layer/x86/**' - - 'tests/**' - - 'tools/**' - - '!tools/pnnx/**' - - 'examples/**' + - ".github/workflows/linux-x64-cpu-gcc.yml" + - "toolchains/host-c.gcc.toolchain.cmake" + - "CMakeLists.txt" + - "cmake/**" + - "src/*" + - "src/layer/*" + - "src/layer/x86/**" + - "tests/**" + - "tools/**" + - "!tools/pnnx/**" + - "examples/**" pull_request: - branches: [master] + # branches: [master] paths: - - '.github/workflows/linux-x64-cpu-gcc.yml' - - 'toolchains/host-c.gcc.toolchain.cmake' - - 'CMakeLists.txt' - - 'cmake/**' - - 'src/*' - - 'src/layer/*' - - 'src/layer/x86/**' - - 'tests/**' - - 'tools/**' - - '!tools/pnnx/**' - - 'examples/**' + - ".github/workflows/linux-x64-cpu-gcc.yml" + - "toolchains/host-c.gcc.toolchain.cmake" + - "CMakeLists.txt" + - "cmake/**" + - "src/*" + - "src/layer/*" + - "src/layer/x86/**" + - "tests/**" + - "tools/**" + - "!tools/pnnx/**" + - "examples/**" concurrency: group: linux-x64-cpu-gcc-${{ github.ref }} cancel-in-progress: true @@ -38,97 +38,97 @@ jobs: linux-gcc: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v4 - - name: update - run: sudo apt-get update - - name: protobuf - run: sudo apt-get install libprotobuf-dev protobuf-compiler libopencv-dev - - name: build-sse2 - run: | - mkdir build-sse2 && cd build-sse2 - cmake -DNCNN_AVX=OFF -DNCNN_AVX2=OFF -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) - - name: test-sse2 - run: cd build-sse2 && ctest --output-on-failure -j $(nproc) - - name: build-shared - run: | - mkdir build-shared && cd build-shared - cmake -DNCNN_AVX2=ON -DNCNN_SHARED_LIB=ON .. - cmake --build . -j $(nproc) - - name: build-avx2 - run: | - mkdir build-avx2 && cd build-avx2 - cmake -DNCNN_AVX2=ON -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) - - name: test-avx2 - run: cd build-avx2 && ctest --output-on-failure -j $(nproc) - - name: build-avx - run: | - mkdir build-avx && cd build-avx - cmake -DNCNN_AVX2=OFF -DNCNN_AVX=ON -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) - - name: test-avx - run: cd build-avx && ctest --output-on-failure -j $(nproc) - - name: build-avx1-2 - run: | - mkdir build-avx1-2 && cd build-avx1-2 - cmake -DNCNN_AVX2=ON -DNCNN_AVX=ON -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) - - name: test-avx1-2 - run: cd build-avx1-2 && ctest --output-on-failure -j $(nproc) - - name: build-noint8 - run: | - mkdir build-noint8 && cd build-noint8 - cmake -DNCNN_INT8=OFF -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) - - name: test-noint8 - run: cd build-noint8 && ctest --output-on-failure -j $(nproc) + - uses: actions/checkout@v4 + - name: update + run: sudo apt-get update + - name: protobuf + run: sudo apt-get install libprotobuf-dev protobuf-compiler libopencv-dev + - name: build-sse2 + run: | + mkdir build-sse2 && cd build-sse2 + cmake -DNCNN_AVX=OFF -DNCNN_AVX2=OFF -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j $(nproc) + - name: test-sse2 + run: cd build-sse2 && ctest --output-on-failure -j $(nproc) + - name: build-shared + run: | + mkdir build-shared && cd build-shared + cmake -DNCNN_AVX2=ON -DNCNN_SHARED_LIB=ON .. + cmake --build . -j $(nproc) + - name: build-avx2 + run: | + mkdir build-avx2 && cd build-avx2 + cmake -DNCNN_AVX2=ON -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j $(nproc) + - name: test-avx2 + run: cd build-avx2 && ctest --output-on-failure -j $(nproc) + - name: build-avx + run: | + mkdir build-avx && cd build-avx + cmake -DNCNN_AVX2=OFF -DNCNN_AVX=ON -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j $(nproc) + - name: test-avx + run: cd build-avx && ctest --output-on-failure -j $(nproc) + - name: build-avx1-2 + run: | + mkdir build-avx1-2 && cd build-avx1-2 + cmake -DNCNN_AVX2=ON -DNCNN_AVX=ON -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j $(nproc) + - name: test-avx1-2 + run: cd build-avx1-2 && ctest --output-on-failure -j $(nproc) + - name: build-noint8 + run: | + mkdir build-noint8 && cd build-noint8 + cmake -DNCNN_INT8=OFF -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j $(nproc) + - name: test-noint8 + run: cd build-noint8 && ctest --output-on-failure -j $(nproc) linux-gcc-cpp03-nostdio-nostring-simplestl: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v4 - - name: build-nostdio - run: | - mkdir build-nostdio && cd build-nostdio - cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc-c++03.toolchain.cmake -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. - cmake --build . -j $(nproc) - - name: test-nostdio - run: cd build-nostdio && ctest --output-on-failure -j $(nproc) - - name: build-nostdio-nostring - run: | - mkdir build-nostdio-nostring && cd build-nostdio-nostring - cmake -DNCNN_STDIO=OFF -DNCNN_STRING=OFF -DNCNN_BUILD_TESTS=OFF -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. - cmake --build . -j $(nproc) - - name: build-simplestl - run: | - mkdir build-simplestl && cd build-simplestl - cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host-c.gcc.toolchain.cmake -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. - cmake --build . -j $(nproc) - - name: test-simplestl - run: cd build-simplestl && ctest --output-on-failure -j $(nproc) - - name: build-simplestl-simpleomp - run: | - mkdir build-simplestl-simpleomp && cd build-simplestl-simpleomp - cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host-c.gcc.toolchain.cmake -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_SIMPLEOMP=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. - cmake --build . -j $(nproc) - - name: test-simplestl-simpleomp - run: cd build-simplestl-simpleomp && ctest --output-on-failure -j $(nproc) + - uses: actions/checkout@v4 + - name: build-nostdio + run: | + mkdir build-nostdio && cd build-nostdio + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc-c++03.toolchain.cmake -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. + cmake --build . -j $(nproc) + - name: test-nostdio + run: cd build-nostdio && ctest --output-on-failure -j $(nproc) + - name: build-nostdio-nostring + run: | + mkdir build-nostdio-nostring && cd build-nostdio-nostring + cmake -DNCNN_STDIO=OFF -DNCNN_STRING=OFF -DNCNN_BUILD_TESTS=OFF -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. + cmake --build . -j $(nproc) + - name: build-simplestl + run: | + mkdir build-simplestl && cd build-simplestl + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host-c.gcc.toolchain.cmake -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. + cmake --build . -j $(nproc) + - name: test-simplestl + run: cd build-simplestl && ctest --output-on-failure -j $(nproc) + - name: build-simplestl-simpleomp + run: | + mkdir build-simplestl-simpleomp && cd build-simplestl-simpleomp + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host-c.gcc.toolchain.cmake -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_SIMPLEOMP=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. + cmake --build . -j $(nproc) + - name: test-simplestl-simpleomp + run: cd build-simplestl-simpleomp && ctest --output-on-failure -j $(nproc) linux-gcc-avx512: runs-on: [self-hosted, linux, t4] steps: - - uses: actions/checkout@v4 - - name: build - env: - CC: gcc - CXX: g++ - LD_LIBRARY_PATH: /data/action/install/lib64 - run: | - mkdir build && cd build - cmake -DNCNN_AVX2=ON -DNCNN_AVX512=ON -DNCNN_AVX512VNNI=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. - cmake --build . -j 4 - - name: test - env: - LD_LIBRARY_PATH: /data/action/install/lib64 - run: cd build && ctest --output-on-failure -j 4 + - uses: actions/checkout@v4 + - name: build + env: + CC: gcc + CXX: g++ + LD_LIBRARY_PATH: /data/action/install/lib64 + run: | + mkdir build && cd build + cmake -DNCNN_AVX2=ON -DNCNN_AVX512=ON -DNCNN_AVX512VNNI=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. + cmake --build . -j 4 + - name: test + env: + LD_LIBRARY_PATH: /data/action/install/lib64 + run: cd build && ctest --output-on-failure -j 4 diff --git a/src/layer/flip.cpp b/src/layer/flip.cpp index 6757b853f80..15ddc7e05a8 100644 --- a/src/layer/flip.cpp +++ b/src/layer/flip.cpp @@ -180,7 +180,8 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons // 组合两种翻转:channel维度和行维度同时翻转 const float* ptr = bottom_blob.channel(channels - 1 - i).row(h - 1 - j); float* outptr = top_blob.channel(i).row(j); - memcpy(outptr, ptr, w * sizeof(float)); + // memcpy(outptr, ptr, w * sizeof(float)); ctest修复测试 + memcpy(outptr, ptr, w * elemsize); } } } From eae435d27e60866972162c4645af94a24a2b0a29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=B0=E9=98=85?= <43716063+Baiyuetribe@users.noreply.github.com> Date: Mon, 13 Jan 2025 13:47:56 +0800 Subject: [PATCH 04/14] ctest char --- src/layer/flip.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/layer/flip.cpp b/src/layer/flip.cpp index 15ddc7e05a8..86475d8b9b7 100644 --- a/src/layer/flip.cpp +++ b/src/layer/flip.cpp @@ -166,28 +166,28 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons } else if (axis.w == 2) { - // wh、wc、hc + // ch、cw、hw int axis0 = axis_ptr[0] < 0 ? 3 + axis_ptr[0] : axis_ptr[0]; int axis1 = axis_ptr[1] < 0 ? 3 + axis_ptr[1] : axis_ptr[1]; int axis_sum = axis0 + axis1; if (axis_sum == 1) { - // 对应wh + // 对应ch for (int i = 0; i < channels; i++) { for (int j = 0; j < h; j++) { // 组合两种翻转:channel维度和行维度同时翻转 - const float* ptr = bottom_blob.channel(channels - 1 - i).row(h - 1 - j); - float* outptr = top_blob.channel(i).row(j); - // memcpy(outptr, ptr, w * sizeof(float)); ctest修复测试 + const unsigned char* ptr = bottom_blob.channel(channels - 1 - i).row(h - 1 - j); + unsigned char* outptr = top_blob.channel(i).row(j); + // memcpy(outptr, ptr, w * sizeof(float)); memcpy(outptr, ptr, w * elemsize); } } } else if (axis_sum == 2) { - // 对应wc + // 对应cw for (int i = 0; i < channels; i++) { for (int j = 0; j < h; j++) @@ -203,7 +203,7 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons } else if (axis_sum == 3) { - // 对应hc + // 对应hw for (int i = 0; i < channels; i++) { for (int j = 0; j < h; j++) From d8424f8ab8a7d31f7e649aff4a2bbcb2c0910a53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=B0=E9=98=85?= <43716063+Baiyuetribe@users.noreply.github.com> Date: Mon, 13 Jan 2025 14:14:51 +0800 Subject: [PATCH 05/14] ctest 2 --- src/layer/flip.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/layer/flip.cpp b/src/layer/flip.cpp index 86475d8b9b7..4cf4dcb9f46 100644 --- a/src/layer/flip.cpp +++ b/src/layer/flip.cpp @@ -178,10 +178,12 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons for (int j = 0; j < h; j++) { // 组合两种翻转:channel维度和行维度同时翻转 - const unsigned char* ptr = bottom_blob.channel(channels - 1 - i).row(h - 1 - j); - unsigned char* outptr = top_blob.channel(i).row(j); + const float* ptr = bottom_blob.channel(channels - 1 - i).row(h - 1 - j); + float* outptr = top_blob.channel(i).row(j); // memcpy(outptr, ptr, w * sizeof(float)); - memcpy(outptr, ptr, w * elemsize); + // memcpy(outptr, ptr, w * elemsize); + for (int x = 0; x < w; x++) + outptr[x] = ptr[x]; } } } From 5be32e11b7427b7d0c6073962b59464ff363bdd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=B0=E9=98=85?= <43716063+Baiyuetribe@users.noreply.github.com> Date: Mon, 13 Jan 2025 21:00:39 +0800 Subject: [PATCH 06/14] ctest 3 --- src/layer/flip.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/layer/flip.cpp b/src/layer/flip.cpp index 4cf4dcb9f46..ad8deac089a 100644 --- a/src/layer/flip.cpp +++ b/src/layer/flip.cpp @@ -179,11 +179,8 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons { // 组合两种翻转:channel维度和行维度同时翻转 const float* ptr = bottom_blob.channel(channels - 1 - i).row(h - 1 - j); - float* outptr = top_blob.channel(i).row(j); - // memcpy(outptr, ptr, w * sizeof(float)); - // memcpy(outptr, ptr, w * elemsize); - for (int x = 0; x < w; x++) - outptr[x] = ptr[x]; + float* outptr = const_cast(top_blob.channel(i).row(j)); + memcpy(outptr, ptr, w * sizeof(float)); } } } From 0a3aa28493177e26b9fc468441abd6293c0ae5ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=B0=E9=98=85?= <43716063+Baiyuetribe@users.noreply.github.com> Date: Mon, 13 Jan 2025 21:20:09 +0800 Subject: [PATCH 07/14] ctest 4 --- src/layer/flip.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/layer/flip.cpp b/src/layer/flip.cpp index ad8deac089a..5ec94816247 100644 --- a/src/layer/flip.cpp +++ b/src/layer/flip.cpp @@ -116,6 +116,8 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons else if (dims == 3) { top_blob.create(w, h, channels, elemsize, opt.blob_allocator); + if (top_blob.empty()) + return -100; if (axis.w == 1) { // w、h、c From 3ac68cd6e3bbe7f98de966c7a9010b59dc03249d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=B0=E9=98=85?= <43716063+Baiyuetribe@users.noreply.github.com> Date: Mon, 13 Jan 2025 21:58:29 +0800 Subject: [PATCH 08/14] fix less c++11 --- .github/workflows/linux-x64-cpu-gcc.yml | 2 +- tests/test_flip.cpp | 93 +++++++++++++++++++------ 2 files changed, 74 insertions(+), 21 deletions(-) diff --git a/.github/workflows/linux-x64-cpu-gcc.yml b/.github/workflows/linux-x64-cpu-gcc.yml index 580000b498e..31abbe47c25 100644 --- a/.github/workflows/linux-x64-cpu-gcc.yml +++ b/.github/workflows/linux-x64-cpu-gcc.yml @@ -15,7 +15,7 @@ on: - "!tools/pnnx/**" - "examples/**" pull_request: - # branches: [master] + branches: [master] paths: - ".github/workflows/linux-x64-cpu-gcc.yml" - "toolchains/host-c.gcc.toolchain.cmake" diff --git a/tests/test_flip.cpp b/tests/test_flip.cpp index 55795caadc1..080de2c16b0 100644 --- a/tests/test_flip.cpp +++ b/tests/test_flip.cpp @@ -15,22 +15,61 @@ #include "layer.h" #include "testutil.h" -static int test_flip(const ncnn::Mat& a, std::vector axis) +// 为兼容低于c++11弃用如下实现 +// ncnn::Mat axis_mat(axis.size()); +// for (size_t i = 0; i < axis.size(); i++) +// { +// axis_mat[i] = axis[i]; +// } +static ncnn::Mat IntArrayMat(int a0) +{ + ncnn::Mat m(1); + int* p = m; + p[0] = a0; + return m; +} + +static ncnn::Mat IntArrayMat(int a0, int a1) +{ + ncnn::Mat m(2); + int* p = m; + p[0] = a0; + p[1] = a1; + return m; +} + +static ncnn::Mat IntArrayMat(int a0, int a1, int a2) +{ + ncnn::Mat m(3); + int* p = m; + p[0] = a0; + p[1] = a1; + p[2] = a2; + return m; +} + +static ncnn::Mat IntArrayMat(int a0, int a1, int a2, int a3) +{ + ncnn::Mat m(4); + int* p = m; + p[0] = a0; + p[1] = a1; + p[2] = a2; + p[3] = a3; + return m; +} + +static int test_flip(const ncnn::Mat& a, const ncnn::Mat& axis) { - ncnn::Mat axis_mat(axis.size()); - for (size_t i = 0; i < axis.size(); i++) - { - axis_mat[i] = axis[i]; - } ncnn::ParamDict pd; - pd.set(0, axis_mat); // axis + pd.set(0, axis); std::vector weights(0); int ret = test_layer("Flip", pd, weights, a); if (ret != 0) { - fprintf(stderr, "test_flip failed a.dims=%d a=(%d %d %d) axis=", a.dims, a.w, a.h, a.c); + fprintf(stderr, "test_flip failed a.dims=%d a=(%d %d %d) axis_w=%d", a.dims, a.w, a.h, a.c, axis.w); } return ret; @@ -39,38 +78,52 @@ static int test_flip(const ncnn::Mat& a, std::vector axis) static int test_flip_0() { return 0 - || test_flip(RandomMat(3, 2, 6, 7), {0}) - || test_flip(RandomMat(3, 2, 6, 7), {0, 1}) - || test_flip(RandomMat(3, 2, 6, 7), {0, 2}) - || test_flip(RandomMat(3, 2, 6, 7), {0, 3}); + || test_flip(RandomMat(2, 3, 4, 5), IntArrayMat(0)) + || test_flip(RandomMat(3, 2, 4, 5), IntArrayMat(1)) + || test_flip(RandomMat(4, 3, 2, 5), IntArrayMat(2)) + || test_flip(RandomMat(2, 3, 1, 5), IntArrayMat(3)) + || test_flip(RandomMat(6, 3, 4, 5), IntArrayMat(0, 1)) + || test_flip(RandomMat(2, 3, 1, 6), IntArrayMat(0, 2)) + || test_flip(RandomMat(5, 1, 2, 5), IntArrayMat(0, 3)) + || test_flip(RandomMat(5, 2, 1, 5), IntArrayMat(1, 2)) + || test_flip(RandomMat(4, 5, 2, 3), IntArrayMat(1, 3)) + || test_flip(RandomMat(2, 6, 4, 5), IntArrayMat(2, 3)) + || test_flip(RandomMat(6, 1, 4, 5), IntArrayMat(0, 1, 2)) + || test_flip(RandomMat(5, 2, 1, 5), IntArrayMat(0, 1, 3)) + || test_flip(RandomMat(4, 3, 3, 5), IntArrayMat(0, 2, 3)) + || test_flip(RandomMat(4, 3, 4, 5), IntArrayMat(1, 2, 3)) + || test_flip(RandomMat(6, 3, 3, 2), IntArrayMat(0, 1, 2, 3)); } static int test_flip_1() { return 0 - || test_flip(RandomMat(2, 3, 5), {0}) - || test_flip(RandomMat(4, 2, 5), {0, 1}) - || test_flip(RandomMat(3, 4, 2), {0, 1, 2}); + || test_flip(RandomMat(2, 3, 5), IntArrayMat(0)) + || test_flip(RandomMat(3, 3, 5), IntArrayMat(1)) + || test_flip(RandomMat(4, 3, 5), IntArrayMat(2)) + || test_flip(RandomMat(3, 1, 5), IntArrayMat(0, 1)) + || test_flip(RandomMat(3, 2, 5), IntArrayMat(0, 2)) + || test_flip(RandomMat(3, 3, 4), IntArrayMat(1, 2)) + || test_flip(RandomMat(4, 3, 2), IntArrayMat(0, 1, 2)); } static int test_flip_2() { return 0 - || test_flip(RandomMat(8, 2), {-2}) - || test_flip(RandomMat(16, 3), {-2, -1}); + || test_flip(RandomMat(8, 2), IntArrayMat(-2)) + || test_flip(RandomMat(16, 3), IntArrayMat(-1)) + || test_flip(RandomMat(7, 2), IntArrayMat(-2, -1)); } static int test_flip_3() { return 0 - || test_flip(RandomMat(16), {-1}) - || test_flip(RandomMat(32), {0}); + || test_flip(RandomMat(18), IntArrayMat(-1)); } int main() { SRAND(7767517); - return 0 || test_flip_0() || test_flip_1() From daf95a0646dae24c6c46ab86d61fb70c49796aa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=B0=E9=98=85?= <43716063+Baiyuetribe@users.noreply.github.com> Date: Mon, 13 Jan 2025 22:16:30 +0800 Subject: [PATCH 09/14] ctest 5 --- src/layer/flip.cpp | 1 + tests/test_flip.cpp | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/layer/flip.cpp b/src/layer/flip.cpp index 5ec94816247..1013a72a580 100644 --- a/src/layer/flip.cpp +++ b/src/layer/flip.cpp @@ -458,6 +458,7 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons } else if (axis.w == 3) { + return 0; // 在线debug // dch3、dcw4、chw6 int axis0 = axis_ptr[0] < 0 ? 4 + axis_ptr[0] : axis_ptr[0]; int axis1 = axis_ptr[1] < 0 ? 4 + axis_ptr[1] : axis_ptr[1]; diff --git a/tests/test_flip.cpp b/tests/test_flip.cpp index 080de2c16b0..7ebf787a462 100644 --- a/tests/test_flip.cpp +++ b/tests/test_flip.cpp @@ -15,7 +15,7 @@ #include "layer.h" #include "testutil.h" -// 为兼容低于c++11弃用如下实现 +// 为兼容低于c++11 // ncnn::Mat axis_mat(axis.size()); // for (size_t i = 0; i < axis.size(); i++) // { @@ -69,7 +69,7 @@ static int test_flip(const ncnn::Mat& a, const ncnn::Mat& axis) int ret = test_layer("Flip", pd, weights, a); if (ret != 0) { - fprintf(stderr, "test_flip failed a.dims=%d a=(%d %d %d) axis_w=%d", a.dims, a.w, a.h, a.c, axis.w); + fprintf(stderr, "test_flip failed a.dims=%d a=(%d %d %d %d) axis_w=%d\n", a.dims, a.w, a.h, a.d, a.c, axis.w); } return ret; From 8376eb7d3d890342a163e44290a1d4e7941fbfe0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=B0=E9=98=85?= <43716063+Baiyuetribe@users.noreply.github.com> Date: Mon, 13 Jan 2025 22:39:59 +0800 Subject: [PATCH 10/14] ctest 6 --- src/layer/flip.cpp | 20 ++++++++++++-------- tests/test_flip.cpp | 42 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 49 insertions(+), 13 deletions(-) diff --git a/src/layer/flip.cpp b/src/layer/flip.cpp index 1013a72a580..7c571ea7e2e 100644 --- a/src/layer/flip.cpp +++ b/src/layer/flip.cpp @@ -458,7 +458,6 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons } else if (axis.w == 3) { - return 0; // 在线debug // dch3、dcw4、chw6 int axis0 = axis_ptr[0] < 0 ? 4 + axis_ptr[0] : axis_ptr[0]; int axis1 = axis_ptr[1] < 0 ? 4 + axis_ptr[1] : axis_ptr[1]; @@ -469,17 +468,19 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons // 对应dch,除w外,其余全翻转 for (int c = 0; c < channels; c++) { - int flipped_c = channels - 1 - c; // 翻转c维度 + int flipped_c = channels - 1 - c; for (int z = 0; z < d; z++) { - int flipped_d = d - 1 - z; // 翻转d维度 + int flipped_d = d - 1 - z; for (int i = 0; i < h; i++) { - const float* ptr = bottom_blob.channel(c).row(z * h + i); - float* outptr = const_cast(top_blob.channel(flipped_c).row(flipped_d * h + (h - 1 - i))); // 翻转h维度 - memcpy(outptr, ptr, w * sizeof(float)); // w维度保持不变 + // 修改前:const float* ptr = bottom_blob.channel(c).row(z * h + i); + // 修改为:使用depth()访问方式 + const float* ptr = bottom_blob.channel(c).depth(z).row(i); + float* outptr = const_cast(top_blob.channel(flipped_c).depth(flipped_d).row(h - 1 - i)); + memcpy(outptr, ptr, w * sizeof(float)); } } } @@ -520,9 +521,12 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons for (int i = 0; i < h; i++) { - const float* ptr = bottom_blob.channel(c).row(z * h + i); - float* outptr = const_cast(top_blob.channel(c).row(flipped_d * h + (h - 1 - i))); // 翻转h维度 + // const float* ptr = bottom_blob.channel(c).row(z * h + i); + // float* outptr = const_cast(top_blob.channel(c).row(flipped_d * h + (h - 1 - i))); // 翻转h维度 + // 修改为使用depth()访问方式 + const float* ptr = bottom_blob.channel(c).depth(z).row(i); + float* outptr = const_cast(top_blob.channel(c).depth(flipped_d).row(h - 1 - i)); // 翻转h维度 // 翻转w维度 for (int k = 0; k < w; k++) { diff --git a/tests/test_flip.cpp b/tests/test_flip.cpp index 7ebf787a462..e3097321b9a 100644 --- a/tests/test_flip.cpp +++ b/tests/test_flip.cpp @@ -124,9 +124,41 @@ static int test_flip_3() int main() { SRAND(7767517); - return 0 - || test_flip_0() - || test_flip_1() - || test_flip_2() - || test_flip_3(); + // return 0 + // || test_flip_0() + // || test_flip_1() + // || test_flip_2() + // || test_flip_3(); + + // debug 测出所有异常 + test_flip(RandomMat(2, 3, 4, 5), IntArrayMat(0)); + test_flip(RandomMat(3, 2, 4, 5), IntArrayMat(1)); + test_flip(RandomMat(4, 3, 2, 5), IntArrayMat(2)); + test_flip(RandomMat(2, 3, 1, 5), IntArrayMat(3)); + test_flip(RandomMat(6, 3, 4, 5), IntArrayMat(0, 1)); + test_flip(RandomMat(2, 3, 1, 6), IntArrayMat(0, 2)); + test_flip(RandomMat(5, 1, 2, 5), IntArrayMat(0, 3)); + test_flip(RandomMat(5, 2, 1, 5), IntArrayMat(1, 2)); + test_flip(RandomMat(4, 5, 2, 3), IntArrayMat(1, 3)); + test_flip(RandomMat(2, 6, 4, 5), IntArrayMat(2, 3)); + test_flip(RandomMat(6, 1, 4, 5), IntArrayMat(0, 1, 2)); + test_flip(RandomMat(5, 2, 1, 5), IntArrayMat(0, 1, 3)); + test_flip(RandomMat(4, 3, 3, 5), IntArrayMat(0, 2, 3)); + test_flip(RandomMat(4, 3, 4, 5), IntArrayMat(1, 2, 3)); + test_flip(RandomMat(6, 3, 3, 2), IntArrayMat(0, 1, 2, 3)); + + test_flip(RandomMat(2, 3, 5), IntArrayMat(0)); + test_flip(RandomMat(3, 3, 5), IntArrayMat(1)); + test_flip(RandomMat(4, 3, 5), IntArrayMat(2)); + test_flip(RandomMat(3, 1, 5), IntArrayMat(0, 1)); + test_flip(RandomMat(3, 2, 5), IntArrayMat(0, 2)); + test_flip(RandomMat(3, 3, 4), IntArrayMat(1, 2)); + test_flip(RandomMat(4, 3, 2), IntArrayMat(0, 1, 2)); + + test_flip(RandomMat(8, 2), IntArrayMat(-2)); + test_flip(RandomMat(16, 3), IntArrayMat(-1)); + test_flip(RandomMat(7, 2), IntArrayMat(-2, -1)); + + test_flip(RandomMat(18), IntArrayMat(-1)); + return 0; } \ No newline at end of file From 5bd16791c84763af79f59d0427af0c96d9c400f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=B0=E9=98=85?= <43716063+Baiyuetribe@users.noreply.github.com> Date: Mon, 13 Jan 2025 23:06:02 +0800 Subject: [PATCH 11/14] ctest 7 --- tests/test_flip.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_flip.cpp b/tests/test_flip.cpp index e3097321b9a..1dc0233c8db 100644 --- a/tests/test_flip.cpp +++ b/tests/test_flip.cpp @@ -160,5 +160,5 @@ int main() test_flip(RandomMat(7, 2), IntArrayMat(-2, -1)); test_flip(RandomMat(18), IntArrayMat(-1)); - return 0; + return -1; } \ No newline at end of file From 5ba56f30677f06df08b9d96ff10adaf33279d499 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=B0=E9=98=85?= <43716063+Baiyuetribe@users.noreply.github.com> Date: Mon, 13 Jan 2025 23:39:12 +0800 Subject: [PATCH 12/14] add 4d dch --- src/layer/flip.cpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/layer/flip.cpp b/src/layer/flip.cpp index 7c571ea7e2e..dbb278a8955 100644 --- a/src/layer/flip.cpp +++ b/src/layer/flip.cpp @@ -458,7 +458,7 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons } else if (axis.w == 3) { - // dch3、dcw4、chw6 + // dch3、dcw4、dhw5,chw6 int axis0 = axis_ptr[0] < 0 ? 4 + axis_ptr[0] : axis_ptr[0]; int axis1 = axis_ptr[1] < 0 ? 4 + axis_ptr[1] : axis_ptr[1]; int axis2 = axis_ptr[2] < 0 ? 4 + axis_ptr[2] : axis_ptr[2]; @@ -510,6 +510,29 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons } } } + else if (axis_sum == 5) + { + // 对应dhw,除了d外全翻转 + for (int c = 0; c < channels; c++) + { + int flipped_c = channels - 1 - c; // 翻转c维度 + + for (int z = 0; z < d; z++) // d维度保持不变 + { + for (int i = 0; i < h; i++) + { + const float* ptr = bottom_blob.channel(c).depth(z).row(i); + float* outptr = const_cast(top_blob.channel(flipped_c).depth(z).row(h - 1 - i)); // 翻转h维度 + + // 翻转w维度 + for (int k = 0; k < w; k++) + { + outptr[k] = ptr[w - 1 - k]; + } + } + } + } + } else if (axis_sum == 6) { // 对应chw,除了c外全翻转 From ad65148c298fb4196d7a7088de166ef2b8a01211 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=B0=E9=98=85?= <43716063+Baiyuetribe@users.noreply.github.com> Date: Tue, 14 Jan 2025 00:12:16 +0800 Subject: [PATCH 13/14] ctest 8 --- tests/test_flip.cpp | 42 +++--------------------- tools/pnnx/tests/ncnn/test_torch_flip.py | 8 +++-- 2 files changed, 11 insertions(+), 39 deletions(-) diff --git a/tests/test_flip.cpp b/tests/test_flip.cpp index 1dc0233c8db..7ebf787a462 100644 --- a/tests/test_flip.cpp +++ b/tests/test_flip.cpp @@ -124,41 +124,9 @@ static int test_flip_3() int main() { SRAND(7767517); - // return 0 - // || test_flip_0() - // || test_flip_1() - // || test_flip_2() - // || test_flip_3(); - - // debug 测出所有异常 - test_flip(RandomMat(2, 3, 4, 5), IntArrayMat(0)); - test_flip(RandomMat(3, 2, 4, 5), IntArrayMat(1)); - test_flip(RandomMat(4, 3, 2, 5), IntArrayMat(2)); - test_flip(RandomMat(2, 3, 1, 5), IntArrayMat(3)); - test_flip(RandomMat(6, 3, 4, 5), IntArrayMat(0, 1)); - test_flip(RandomMat(2, 3, 1, 6), IntArrayMat(0, 2)); - test_flip(RandomMat(5, 1, 2, 5), IntArrayMat(0, 3)); - test_flip(RandomMat(5, 2, 1, 5), IntArrayMat(1, 2)); - test_flip(RandomMat(4, 5, 2, 3), IntArrayMat(1, 3)); - test_flip(RandomMat(2, 6, 4, 5), IntArrayMat(2, 3)); - test_flip(RandomMat(6, 1, 4, 5), IntArrayMat(0, 1, 2)); - test_flip(RandomMat(5, 2, 1, 5), IntArrayMat(0, 1, 3)); - test_flip(RandomMat(4, 3, 3, 5), IntArrayMat(0, 2, 3)); - test_flip(RandomMat(4, 3, 4, 5), IntArrayMat(1, 2, 3)); - test_flip(RandomMat(6, 3, 3, 2), IntArrayMat(0, 1, 2, 3)); - - test_flip(RandomMat(2, 3, 5), IntArrayMat(0)); - test_flip(RandomMat(3, 3, 5), IntArrayMat(1)); - test_flip(RandomMat(4, 3, 5), IntArrayMat(2)); - test_flip(RandomMat(3, 1, 5), IntArrayMat(0, 1)); - test_flip(RandomMat(3, 2, 5), IntArrayMat(0, 2)); - test_flip(RandomMat(3, 3, 4), IntArrayMat(1, 2)); - test_flip(RandomMat(4, 3, 2), IntArrayMat(0, 1, 2)); - - test_flip(RandomMat(8, 2), IntArrayMat(-2)); - test_flip(RandomMat(16, 3), IntArrayMat(-1)); - test_flip(RandomMat(7, 2), IntArrayMat(-2, -1)); - - test_flip(RandomMat(18), IntArrayMat(-1)); - return -1; + return 0 + || test_flip_0() + || test_flip_1() + || test_flip_2() + || test_flip_3(); } \ No newline at end of file diff --git a/tools/pnnx/tests/ncnn/test_torch_flip.py b/tools/pnnx/tests/ncnn/test_torch_flip.py index 4c9702cc505..b07a8d297a7 100644 --- a/tools/pnnx/tests/ncnn/test_torch_flip.py +++ b/tools/pnnx/tests/ncnn/test_torch_flip.py @@ -66,6 +66,7 @@ def forward(self, x, y, z, d): z3 = torch.flip(z, [0, 1]) z4 = torch.flip(z, [0, 2]) z5 = torch.flip(z, [1, 2]) + z6 = torch.flip(z, [0, 1, 2]) # 4D d0 = torch.flip(d, [-1]) d1 = torch.flip(d, [-2]) @@ -79,8 +80,9 @@ def forward(self, x, y, z, d): d9 = torch.flip(d, [2, 3]) d10 = torch.flip(d, [0, 1, 2]) d11 = torch.flip(d, [0, 1, 3]) - d12 = torch.flip(d, [1, 2, 3]) - d13 = torch.flip(d, [0, 1, 2, 3]) + d12 = torch.flip(d, [0, 2, 3]) + d13 = torch.flip(d, [1, 2, 3]) + d14 = torch.flip(d, [0, 1, 2, 3]) return ( x0, @@ -93,6 +95,7 @@ def forward(self, x, y, z, d): z3, z4, z5, + z6, d0, d1, d2, @@ -107,6 +110,7 @@ def forward(self, x, y, z, d): d11, d12, d13, + d14, ) From b54643eeced40ecb7ebb48f9c86ccb022120a837 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=B0=E9=98=85?= <43716063+Baiyuetribe@users.noreply.github.com> Date: Tue, 14 Jan 2025 00:33:38 +0800 Subject: [PATCH 14/14] clean code --- .github/workflows/linux-x64-cpu-gcc.yml | 218 ++++++++++++------------ src/layer/flip.cpp | 41 ++--- 2 files changed, 125 insertions(+), 134 deletions(-) diff --git a/.github/workflows/linux-x64-cpu-gcc.yml b/.github/workflows/linux-x64-cpu-gcc.yml index 31abbe47c25..ab2185be3e7 100644 --- a/.github/workflows/linux-x64-cpu-gcc.yml +++ b/.github/workflows/linux-x64-cpu-gcc.yml @@ -1,33 +1,33 @@ name: linux-x64-cpu-gcc on: push: - # branches: [master] + branches: [master] paths: - - ".github/workflows/linux-x64-cpu-gcc.yml" - - "toolchains/host-c.gcc.toolchain.cmake" - - "CMakeLists.txt" - - "cmake/**" - - "src/*" - - "src/layer/*" - - "src/layer/x86/**" - - "tests/**" - - "tools/**" - - "!tools/pnnx/**" - - "examples/**" + - '.github/workflows/linux-x64-cpu-gcc.yml' + - 'toolchains/host-c.gcc.toolchain.cmake' + - 'CMakeLists.txt' + - 'cmake/**' + - 'src/*' + - 'src/layer/*' + - 'src/layer/x86/**' + - 'tests/**' + - 'tools/**' + - '!tools/pnnx/**' + - 'examples/**' pull_request: branches: [master] paths: - - ".github/workflows/linux-x64-cpu-gcc.yml" - - "toolchains/host-c.gcc.toolchain.cmake" - - "CMakeLists.txt" - - "cmake/**" - - "src/*" - - "src/layer/*" - - "src/layer/x86/**" - - "tests/**" - - "tools/**" - - "!tools/pnnx/**" - - "examples/**" + - '.github/workflows/linux-x64-cpu-gcc.yml' + - 'toolchains/host-c.gcc.toolchain.cmake' + - 'CMakeLists.txt' + - 'cmake/**' + - 'src/*' + - 'src/layer/*' + - 'src/layer/x86/**' + - 'tests/**' + - 'tools/**' + - '!tools/pnnx/**' + - 'examples/**' concurrency: group: linux-x64-cpu-gcc-${{ github.ref }} cancel-in-progress: true @@ -38,97 +38,97 @@ jobs: linux-gcc: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v4 - - name: update - run: sudo apt-get update - - name: protobuf - run: sudo apt-get install libprotobuf-dev protobuf-compiler libopencv-dev - - name: build-sse2 - run: | - mkdir build-sse2 && cd build-sse2 - cmake -DNCNN_AVX=OFF -DNCNN_AVX2=OFF -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) - - name: test-sse2 - run: cd build-sse2 && ctest --output-on-failure -j $(nproc) - - name: build-shared - run: | - mkdir build-shared && cd build-shared - cmake -DNCNN_AVX2=ON -DNCNN_SHARED_LIB=ON .. - cmake --build . -j $(nproc) - - name: build-avx2 - run: | - mkdir build-avx2 && cd build-avx2 - cmake -DNCNN_AVX2=ON -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) - - name: test-avx2 - run: cd build-avx2 && ctest --output-on-failure -j $(nproc) - - name: build-avx - run: | - mkdir build-avx && cd build-avx - cmake -DNCNN_AVX2=OFF -DNCNN_AVX=ON -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) - - name: test-avx - run: cd build-avx && ctest --output-on-failure -j $(nproc) - - name: build-avx1-2 - run: | - mkdir build-avx1-2 && cd build-avx1-2 - cmake -DNCNN_AVX2=ON -DNCNN_AVX=ON -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) - - name: test-avx1-2 - run: cd build-avx1-2 && ctest --output-on-failure -j $(nproc) - - name: build-noint8 - run: | - mkdir build-noint8 && cd build-noint8 - cmake -DNCNN_INT8=OFF -DNCNN_BUILD_TESTS=ON .. - cmake --build . -j $(nproc) - - name: test-noint8 - run: cd build-noint8 && ctest --output-on-failure -j $(nproc) + - uses: actions/checkout@v4 + - name: update + run: sudo apt-get update + - name: protobuf + run: sudo apt-get install libprotobuf-dev protobuf-compiler libopencv-dev + - name: build-sse2 + run: | + mkdir build-sse2 && cd build-sse2 + cmake -DNCNN_AVX=OFF -DNCNN_AVX2=OFF -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j $(nproc) + - name: test-sse2 + run: cd build-sse2 && ctest --output-on-failure -j $(nproc) + - name: build-shared + run: | + mkdir build-shared && cd build-shared + cmake -DNCNN_AVX2=ON -DNCNN_SHARED_LIB=ON .. + cmake --build . -j $(nproc) + - name: build-avx2 + run: | + mkdir build-avx2 && cd build-avx2 + cmake -DNCNN_AVX2=ON -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j $(nproc) + - name: test-avx2 + run: cd build-avx2 && ctest --output-on-failure -j $(nproc) + - name: build-avx + run: | + mkdir build-avx && cd build-avx + cmake -DNCNN_AVX2=OFF -DNCNN_AVX=ON -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j $(nproc) + - name: test-avx + run: cd build-avx && ctest --output-on-failure -j $(nproc) + - name: build-avx1-2 + run: | + mkdir build-avx1-2 && cd build-avx1-2 + cmake -DNCNN_AVX2=ON -DNCNN_AVX=ON -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j $(nproc) + - name: test-avx1-2 + run: cd build-avx1-2 && ctest --output-on-failure -j $(nproc) + - name: build-noint8 + run: | + mkdir build-noint8 && cd build-noint8 + cmake -DNCNN_INT8=OFF -DNCNN_BUILD_TESTS=ON .. + cmake --build . -j $(nproc) + - name: test-noint8 + run: cd build-noint8 && ctest --output-on-failure -j $(nproc) linux-gcc-cpp03-nostdio-nostring-simplestl: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v4 - - name: build-nostdio - run: | - mkdir build-nostdio && cd build-nostdio - cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc-c++03.toolchain.cmake -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. - cmake --build . -j $(nproc) - - name: test-nostdio - run: cd build-nostdio && ctest --output-on-failure -j $(nproc) - - name: build-nostdio-nostring - run: | - mkdir build-nostdio-nostring && cd build-nostdio-nostring - cmake -DNCNN_STDIO=OFF -DNCNN_STRING=OFF -DNCNN_BUILD_TESTS=OFF -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. - cmake --build . -j $(nproc) - - name: build-simplestl - run: | - mkdir build-simplestl && cd build-simplestl - cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host-c.gcc.toolchain.cmake -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. - cmake --build . -j $(nproc) - - name: test-simplestl - run: cd build-simplestl && ctest --output-on-failure -j $(nproc) - - name: build-simplestl-simpleomp - run: | - mkdir build-simplestl-simpleomp && cd build-simplestl-simpleomp - cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host-c.gcc.toolchain.cmake -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_SIMPLEOMP=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. - cmake --build . -j $(nproc) - - name: test-simplestl-simpleomp - run: cd build-simplestl-simpleomp && ctest --output-on-failure -j $(nproc) + - uses: actions/checkout@v4 + - name: build-nostdio + run: | + mkdir build-nostdio && cd build-nostdio + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc-c++03.toolchain.cmake -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. + cmake --build . -j $(nproc) + - name: test-nostdio + run: cd build-nostdio && ctest --output-on-failure -j $(nproc) + - name: build-nostdio-nostring + run: | + mkdir build-nostdio-nostring && cd build-nostdio-nostring + cmake -DNCNN_STDIO=OFF -DNCNN_STRING=OFF -DNCNN_BUILD_TESTS=OFF -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. + cmake --build . -j $(nproc) + - name: build-simplestl + run: | + mkdir build-simplestl && cd build-simplestl + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host-c.gcc.toolchain.cmake -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. + cmake --build . -j $(nproc) + - name: test-simplestl + run: cd build-simplestl && ctest --output-on-failure -j $(nproc) + - name: build-simplestl-simpleomp + run: | + mkdir build-simplestl-simpleomp && cd build-simplestl-simpleomp + cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host-c.gcc.toolchain.cmake -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_SIMPLEOMP=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. + cmake --build . -j $(nproc) + - name: test-simplestl-simpleomp + run: cd build-simplestl-simpleomp && ctest --output-on-failure -j $(nproc) linux-gcc-avx512: runs-on: [self-hosted, linux, t4] steps: - - uses: actions/checkout@v4 - - name: build - env: - CC: gcc - CXX: g++ - LD_LIBRARY_PATH: /data/action/install/lib64 - run: | - mkdir build && cd build - cmake -DNCNN_AVX2=ON -DNCNN_AVX512=ON -DNCNN_AVX512VNNI=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. - cmake --build . -j 4 - - name: test - env: - LD_LIBRARY_PATH: /data/action/install/lib64 - run: cd build && ctest --output-on-failure -j 4 + - uses: actions/checkout@v4 + - name: build + env: + CC: gcc + CXX: g++ + LD_LIBRARY_PATH: /data/action/install/lib64 + run: | + mkdir build && cd build + cmake -DNCNN_AVX2=ON -DNCNN_AVX512=ON -DNCNN_AVX512VNNI=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF .. + cmake --build . -j 4 + - name: test + env: + LD_LIBRARY_PATH: /data/action/install/lib64 + run: cd build && ctest --output-on-failure -j 4 diff --git a/src/layer/flip.cpp b/src/layer/flip.cpp index dbb278a8955..ae191c4ed58 100644 --- a/src/layer/flip.cpp +++ b/src/layer/flip.cpp @@ -116,8 +116,6 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons else if (dims == 3) { top_blob.create(w, h, channels, elemsize, opt.blob_allocator); - if (top_blob.empty()) - return -100; if (axis.w == 1) { // w、h、c @@ -181,7 +179,7 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons { // 组合两种翻转:channel维度和行维度同时翻转 const float* ptr = bottom_blob.channel(channels - 1 - i).row(h - 1 - j); - float* outptr = const_cast(top_blob.channel(i).row(j)); + float* outptr = top_blob.channel(i).row(j); memcpy(outptr, ptr, w * sizeof(float)); } } @@ -253,13 +251,12 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons for (int c = 0; c < channels; c++) // 遍历channels=3 { int flipped_c = channels - 1 - c; // 计算channels翻转位置 - - for (int z = 0; z < d; z++) // 遍历d=2维度 + for (int z = 0; z < d; z++) // 遍历d=2维度 { for (int j = 0; j < h; j++) // 遍历行 { const float* ptr = bottom_blob.channel(c).row(z * h + j); - float* outptr = const_cast(top_blob.channel(flipped_c).row(z * h + j)); + float* outptr = top_blob.channel(flipped_c).row(z * h + j); memcpy(outptr, ptr, w * sizeof(float)); } } @@ -276,7 +273,7 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons { // 翻转d维度的数据读取位置 const float* ptr = bottom_blob.channel(i).row((d - 1 - z) * h + j); - float* outptr = const_cast(top_blob.channel(i).row(z * h + j)); + float* outptr = top_blob.channel(i).row(z * h + j); // 逐行复制w元素 memcpy(outptr, ptr, w * sizeof(float)); } @@ -339,7 +336,7 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons for (int j = 0; j < h; j++) // 遍历行 { const float* ptr = bottom_blob.channel(c).row(z * h + j); - float* outptr = const_cast(top_blob.channel(flipped_c).row(flipped_d * h + j)); + float* outptr = top_blob.channel(flipped_c).row(flipped_d * h + j); memcpy(outptr, ptr, w * sizeof(float)); } } @@ -358,8 +355,8 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons for (int i = 0; i < h; i++) { const float* ptr = bottom_blob.channel(c).row(z * h + i); - float* outptr = const_cast(top_blob.channel(flipped_c).row(z * h + (h - 1 - i))); // 保持z维度顺序,翻转h维度 - memcpy(outptr, ptr, w * sizeof(float)); // 按行复制,保持 w 维度顺序 + float* outptr = top_blob.channel(flipped_c).row(z * h + (h - 1 - i)); // 保持z维度顺序,翻转h维度 + memcpy(outptr, ptr, w * sizeof(float)); // 按行复制,保持 w 维度顺序 } } } @@ -379,7 +376,7 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons for (int j = 0; j < h; j++) // h维度保持不变 { const float* ptr = bottom_blob.channel(c).row(z * h + j); - float* outptr = const_cast(top_blob.channel(flipped_c).row(z * h + j)); + float* outptr = top_blob.channel(flipped_c).row(z * h + j); // 翻转w维度 for (int k = 0; k < w; k++) @@ -404,7 +401,7 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons int flipped_h = h - 1 - j; // 读取源数据 const float* ptr = bottom_blob.channel(c).row(z * h + j); - float* outptr = const_cast(top_blob.channel(c).row(flipped_d * h + flipped_h)); + float* outptr = top_blob.channel(c).row(flipped_d * h + flipped_h); memcpy(outptr, ptr, w * sizeof(float)); } } @@ -423,7 +420,7 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons for (int j = 0; j < h; j++) { const float* ptr = bottom_blob.channel(c).row(z * h + j); - float* outptr = const_cast(top_blob.channel(c).row(flipped_d * h + j)); // c维度保持不变 + float* outptr = top_blob.channel(c).row(flipped_d * h + j); // c维度保持不变 // 翻转 w 维度 for (int k = 0; k < w; k++) @@ -444,7 +441,7 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons for (int j = 0; j < h; j++) { const float* ptr = bottom_blob.channel(c).row(z * h + j); - float* outptr = const_cast(top_blob.channel(c).row(z * h + (h - 1 - j))); // 翻转 h 维度 + float* outptr = top_blob.channel(c).row(z * h + (h - 1 - j)); // 翻转 h 维度 // 翻转 w 维度 for (int k = 0; k < w; k++) @@ -476,10 +473,8 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons for (int i = 0; i < h; i++) { - // 修改前:const float* ptr = bottom_blob.channel(c).row(z * h + i); - // 修改为:使用depth()访问方式 const float* ptr = bottom_blob.channel(c).depth(z).row(i); - float* outptr = const_cast(top_blob.channel(flipped_c).depth(flipped_d).row(h - 1 - i)); + float* outptr = top_blob.channel(flipped_c).depth(flipped_d).row(h - 1 - i); memcpy(outptr, ptr, w * sizeof(float)); } } @@ -499,7 +494,7 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons for (int i = 0; i < h; i++) { const float* ptr = bottom_blob.channel(c).row(z * h + i); - float* outptr = const_cast(top_blob.channel(flipped_c).row(flipped_d * h + i)); // h维度保持不变 + float* outptr = top_blob.channel(flipped_c).row(flipped_d * h + i); // h维度保持不变 // 翻转w维度 for (int k = 0; k < w; k++) @@ -522,7 +517,7 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons for (int i = 0; i < h; i++) { const float* ptr = bottom_blob.channel(c).depth(z).row(i); - float* outptr = const_cast(top_blob.channel(flipped_c).depth(z).row(h - 1 - i)); // 翻转h维度 + float* outptr = top_blob.channel(flipped_c).depth(z).row(h - 1 - i); // 翻转h维度 // 翻转w维度 for (int k = 0; k < w; k++) @@ -544,12 +539,8 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons for (int i = 0; i < h; i++) { - // const float* ptr = bottom_blob.channel(c).row(z * h + i); - // float* outptr = const_cast(top_blob.channel(c).row(flipped_d * h + (h - 1 - i))); // 翻转h维度 - - // 修改为使用depth()访问方式 const float* ptr = bottom_blob.channel(c).depth(z).row(i); - float* outptr = const_cast(top_blob.channel(c).depth(flipped_d).row(h - 1 - i)); // 翻转h维度 + float* outptr = top_blob.channel(c).depth(flipped_d).row(h - 1 - i); // 翻转h维度 // 翻转w维度 for (int k = 0; k < w; k++) { @@ -574,7 +565,7 @@ int Flip::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons for (int i = 0; i < h; i++) { const float* ptr = bottom_blob.channel(c).row(z * h + i); - float* outptr = const_cast(top_blob.channel(flipped_c).row(flipped_d * h + (h - 1 - i))); // 翻转h维度 + float* outptr = top_blob.channel(flipped_c).row(flipped_d * h + (h - 1 - i)); // 翻转h维度 // 翻转w维度 for (int k = 0; k < w; k++)