From 6895cbf8105ef880161c3812e1e11e1b87afda3c Mon Sep 17 00:00:00 2001 From: nihui Date: Mon, 2 Jul 2018 22:43:12 +0800 Subject: [PATCH] single vldm is faster than two vld1 on armv7, and some pipeline optimize --- src/layer/arm/convolution_1x1.h | 188 +++++++++++++++++++++----------- src/layer/arm/convolution_3x3.h | 73 +++++++------ 2 files changed, 168 insertions(+), 93 deletions(-) diff --git a/src/layer/arm/convolution_1x1.h b/src/layer/arm/convolution_1x1.h index f0c974893a4..8cb8d387f57 100644 --- a/src/layer/arm/convolution_1x1.h +++ b/src/layer/arm/convolution_1x1.h @@ -146,8 +146,26 @@ static void conv1x1s1_sgemm_neon(const Mat& bottom_blob, Mat& top_blob, const Ma for (int q=0; q