Skip to content

Commit

Permalink
[ cpu_backend ] Refactor blas_interface considering arch-dep
Browse files Browse the repository at this point in the history
1. Substitute `blas_interface.h` to `cpu_backend.h` which has virtual functions of `blas_interface.h`
2. Actual implementations are implemented at `arm_compute_backend`, `x86_compute_backend`, and `fallback`, and they are included considering target cpu architecture. `cblas.h` is used for both of them for fp32 computation.
3. There are some differences (unsupported intrinsics, or dataTypes ) along the versions, and they are managed under each `arm` or `x86` directory.

**Self evaluation:**
1. Build test:     [X]Passed [ ]Failed [ ]Skipped
2. Run test:     [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: skykongkong8 <[email protected]>
  • Loading branch information
skykongkong8 authored and myungjoo committed Mar 6, 2025
1 parent 7ceb636 commit 41f8368
Show file tree
Hide file tree
Showing 114 changed files with 6,891 additions and 3,266 deletions.
3 changes: 3 additions & 0 deletions Applications/AlexNet/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/compiler \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
4 changes: 1 addition & 3 deletions Applications/Android/NNDetector/app/src/main/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,11 @@ include $(CLEAR_VARS)
NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/include/nntrainer
SIMPLESHOT_DIR = .


LOCAL_ARM_NEON := true
LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib
LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/
LOCAL_CXXFLAGS += -std=c++17 -frtti -fexceptions
LOCAL_CXXFLAGS += -std=c++17 -frtti -fexceptions -fopenmp -static-openmp
LOCAL_CFLAGS += -pthread -fexceptions -fopenmp -static-openmp
LOCAL_LDFLAGS += -fexceptions -fopenmp -static-openmp
LOCAL_MODULE_TAGS := optional
LOCAL_ARM_MODE := arm
LOCAL_MODULE := simpleshot_jni
Expand Down
3 changes: 3 additions & 0 deletions Applications/Custom/LayerClient/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
${ML_API_COMMON_INCLUDES}
Expand Down
3 changes: 3 additions & 0 deletions Applications/LLaMA/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/Layers/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/compiler \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/Multi_input/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/PicoGPT/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/ProductRatings/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer/include \
$(NNTRAINER_ROOT)/nntrainer/layers \
$(NNTRAINER_ROOT)/nntrainer/compiler \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor
Expand Down
3 changes: 3 additions & 0 deletions Applications/ReinforcementLearning/DeepQ/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/Resnet/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/compiler \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/models \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
Expand Down
3 changes: 3 additions & 0 deletions Applications/VGG/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/compiler \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/YOLOv2/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/YOLOv3/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
7 changes: 5 additions & 2 deletions debian/nntrainer-dev.install
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@
/usr/include/nntrainer/uint_tensor.h
/usr/include/nntrainer/float_tensor.h
/usr/include/nntrainer/tensor_wrap_specs.h
/usr/include/nntrainer/blas_interface.h
/usr/include/nntrainer/fallback_internal.h
/usr/include/nntrainer/cblas_interface.h
/usr/include/nntrainer/x86_compute_backend.h
/usr/include/nntrainer/cpu_backend.h
/usr/include/nntrainer/var_grad.h
/usr/include/nntrainer/weight.h
/usr/include/nntrainer/quantizer.h
/usr/include/nntrainer/blas_avx.h
/usr/include/nntrainer/avx2_impl.h
# todo: update dataset headers
/usr/include/nntrainer/databuffer.h
/usr/include/nntrainer/databuffer_factory.h
Expand Down
32 changes: 13 additions & 19 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -85,24 +85,20 @@ arch = host_machine.cpu_family()

target = target_machine.cpu_family()

avx_enabled = false
if get_option('enable-avx')
if get_option('platform') != 'android'
if target == 'x86_64' or target == 'x86'
extra_defines += '-DUSE_AVX=1'
if host_machine.system() == 'windows'
add_project_arguments(['/arch:AVX2'], language: ['c','cpp'])
else
add_project_arguments(['-march=native'], language: ['c','cpp'])
add_project_arguments(['-mavx2'], language: ['c','cpp'])
message('-march=native added for AVX hardware acceleration.')
endif
avx_enabled = true
else
warning('The target arch, ' + target + ', does not support AVX. enable-avx=true is ignored.')
endif

if arch == 'arm' or arch == 'aarch64' or get_option('platform') == 'android'
message('Build for ARM architecture')
if arch == 'arm'
extra_defines += '-DARMV7=1'
endif
elif arch == 'x86' or arch == 'x86_64'
message('Build for X86 architecture')
if host_machine.system() == 'windows'
add_project_arguments(['/arch:AVX2'], language: ['c','cpp'])
else
warning('Android build does not support AVX. enable-avx=true is ignored.')
add_project_arguments(['-march=native'], language: ['c','cpp'])
add_project_arguments(['-mavx2'], language: ['c','cpp'])
message('-march=native added for AVX hardware acceleration.')
endif
endif

Expand All @@ -111,7 +107,6 @@ if get_option('enable-fp16')
add_project_arguments('-mfp16-format=ieee', language: ['c', 'cpp'])
extra_defines += '-DENABLE_FP16=1'
extra_defines += '-DUSE__FP16=1'
extra_defines += '-DUSE_NEON=1'
elif arch == 'aarch64'
## About FP16 in GCC (from GCC-9.1 manual)
# https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/Half-Precision.html
Expand All @@ -130,7 +125,6 @@ if get_option('enable-fp16')
endif
extra_defines += '-DENABLE_FP16=1'
extra_defines += '-DUSE__FP16=1'
extra_defines += '-DUSE_NEON=1'
elif arch == 'arm'
## About FP16-SIMD in arm
# FP16-SIMD is supported since armv8.2.
Expand Down
2 changes: 0 additions & 2 deletions meson_options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ option('enable-blas', type: 'boolean', value: true)
option('enable-fp16', type: 'boolean', value: false)
option('enable-cublas', type: 'boolean', value: false)
option('enable-openmp', type: 'boolean', value: true)
option('enable-neon', type: 'boolean', value: false)
option('enable-avx', type: 'boolean', value: true)
option('enable-opencl', type: 'boolean', value: false)
option('enable-biqgemm', type: 'boolean', value: false)
option('biqgemm-path', type: 'string', value: '../BiQGEMM')
Expand Down
2 changes: 1 addition & 1 deletion nntrainer/layers/acti_func.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#define __ACTI_FUNC_H__
#ifdef __cplusplus

#include <blas_interface.h>
#include <common_properties.h>
#include <cpu_backend.h>

#if defined(_WIN32)
#define _USE_MATH_DEFINES
Expand Down
2 changes: 1 addition & 1 deletion nntrainer/layers/activation_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
#include <vector>

#include <activation_layer.h>
#include <blas_interface.h>
#include <common_properties.h>
#include <cpu_backend.h>
#include <layer_context.h>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
Expand Down
2 changes: 1 addition & 1 deletion nntrainer/layers/conv2d_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#include <limits>
#include <string>

#include <blas_interface.h>
#include <conv2d_layer.h>
#include <cpu_backend.h>
#include <layer_context.h>
#include <lazy_tensor.h>
#include <nntr_threads.h>
Expand Down
2 changes: 1 addition & 1 deletion nntrainer/layers/conv2d_transpose_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
#include <limits>
#include <string>

#include <blas_interface.h>
#include <conv2d_transpose_layer.h>
#include <cpu_backend.h>
#include <layer_context.h>
#include <lazy_tensor.h>
#include <nntr_threads.h>
Expand Down
2 changes: 1 addition & 1 deletion nntrainer/tensor/bcq_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <iostream>

#include <bcq_tensor.h>
#include <blas_interface.h>
#include <cpu_backend.h>
#include <tensor.h>
#include <util_func.h>

Expand Down
Loading

0 comments on commit 41f8368

Please sign in to comment.