Update on "[executorch] Migrate extended_header to new namespace"

Migrate this header to the new `::executorch::runtime` namespace and update its only users. Differential Revision: [D60703364](https://our.internmc.facebook.com/intern/diff/D60703364/) [ghstack-poisoned]
pytorch · Aug 9, 2024 · 2e580c7 · 2e580c7
2 parents f583800 + 5ad777e
commit 2e580c7
Show file tree

Hide file tree

Showing 220 changed files with 494 additions and 3,999 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -61,6 +61,9 @@
 [submodule "third-party/ios-cmake"]
 	path = third-party/ios-cmake
 	url = https://github.com/leetal/ios-cmake
+[submodule "backends/cadence/hifi/third-party/nnlib/nnlib-hifi4"]
+	path = backends/cadence/hifi/third-party/nnlib/nnlib-hifi4
+	url = https://github.com/foss-xtensa/nnlib-hifi4.git
 [submodule "third-party/prelude"]
 	path = third-party/prelude
 	url = https://github.com/facebook/buck2-prelude.git

diff --git a/backends/apple/coreml/runtime/sdk/model_event_logger_impl.h b/backends/apple/coreml/runtime/sdk/model_event_logger_impl.h
@@ -10,7 +10,7 @@
 #import <CoreML/CoreML.h>
 #import <model_event_logger.h>
 
-namespace torch::executor {
+namespace executorch::runtime {
 class EventTracer;
 }
 
@@ -21,7 +21,7 @@ namespace executorchcoreml {
 class ModelEventLoggerImpl final : public ModelEventLogger {
 public:
     /// Construct a `ModelEventLoggerImpl` from the `EventTracer`.
-    explicit ModelEventLoggerImpl(torch::executor::EventTracer* tracer) : tracer_(tracer) { }
+    explicit ModelEventLoggerImpl(::executorch::runtime::EventTracer* tracer) : tracer_(tracer) { }
 
     /// Logs profiling infos.
     ///
@@ -44,6 +44,6 @@ class ModelEventLoggerImpl final : public ModelEventLogger {
         NSDictionary<ETCoreMLModelStructurePath*, NSString*>* op_path_to_debug_symbol_name_map) const noexcept override;
 
 private:
-    torch::executor::EventTracer* tracer_;
+    ::executorch::runtime::EventTracer* tracer_;
 };
 } // namespace executorchcoreml
diff --git a/backends/cadence/CMakeLists.txt b/backends/cadence/CMakeLists.txt
@@ -23,7 +23,12 @@ include(${EXECUTORCH_ROOT}/build/Utils.cmake)
 
 # Let files say "include <executorch/path/to/header.h>".
 set(_common_include_directories ${EXECUTORCH_ROOT}/..)
+set(TARGET_DIR reference)
 
+if(EXECUTORCH_NNLIB_OPT)
+set(TARGET_DIR hifi)
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib)  
+endif()
 
-add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/reference/operators)
-add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/reference/kernels)
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/operators)
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
diff --git a/backends/cadence/aot/functions_hifi.yaml b/backends/cadence/aot/functions_hifi.yaml
@@ -0,0 +1,127 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This yaml file contains operators that are also defined by the ATen library.
+# For lean mode:
+#   - Codegen'd target `executorch_generated_lib` will be reading all the information
+#     from this file, including operator schema and kernel metadata.
+#   - Selective build target `codegen:executorch_defined_ops` now is selecting all the
+#     operators in this file, by dumping all the op names into `selected_operators.yaml`.
+#
+# See the README.md file in executorch/kernels/portable for a description of the syntax used
+# by this file.
+
+
+# aten ops
+- op: _to_copy.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::to_copy_out
+
+- op: _softmax.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::softmax_out
+
+- op: add.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::add_out
+
+- op: bmm.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::bmm_out
+
+- op: cat.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::cat_out
+
+- op: clone.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::clone_out
+
+- op: div.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::div_out
+
+- op: div.out_mode
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::div_out_mode
+
+- op: embedding.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::embedding_out
+
+- op: full.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::full_out
+
+- op: mul.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::mul_out
+
+- op: permute_copy.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::permute_copy_out
+
+- op: sigmoid.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::sigmoid_out
+
+- op: slice_copy.Tensor_out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::slice_copy_Tensor_out
+
+- op: split_with_sizes_copy.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::split_with_sizes_copy_out
+
+- op: sub.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::sub_out
+
+- op: view_copy.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::view_copy_out
+
+- op: where.self_out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::where_out
+
+# custom ops
+- func: cadence::quantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
+  variants: function
+  kernels:
+    - arg_meta: null
+      kernel_name: impl::HiFi::quantize_per_tensor_out
+
+- func: cadence::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
+  variants: function
+  kernels:
+    - arg_meta: null
+      kernel_name: impl::HiFi::dequantize_per_tensor_out
+
+
+- func: cadence::quantized_layer_norm.out(Tensor input, Tensor in_scale, Tensor in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)
+  kernels:
+    - arg_meta: null
+      kernel_name: impl::HiFi::quantized_layer_norm_out
+
+- func: cadence::quantized_linear.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
+  kernels:
+    - arg_meta: null
+      kernel_name: impl::HiFi::quantized_linear_out
diff --git a/backends/cadence/hifi/kernels/CMakeLists.txt b/backends/cadence/hifi/kernels/CMakeLists.txt
@@ -14,9 +14,10 @@ add_library(
 target_include_directories(
   cadence_kernels
   PUBLIC .
-         ${NN_LIB_BASE_DIR}/xa_nnlib/algo/common/include/
-         ${NN_LIB_BASE_DIR}/xa_nnlib/include/nnlib
-         ${NN_LIB_BASE_DIR}/xa_nnlib/include
-         ${NN_LIB_BASE_DIR}/xa_nnlib/algo/ndsp/hifi4/include/
-         ${NXP_SDK_ROOT_DIR}/middleware/dsp/naturedsp/hifi4/include/
+         ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/algo/common/include/
+         ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/include/nnlib
+         ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/include
+         ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/algo/ndsp/hifi4/include/
 )
+
+target_link_libraries(cadence_kernels PRIVATE xa_nnlib)
diff --git a/backends/cadence/hifi/kernels/kernels.cpp b/backends/cadence/hifi/kernels/kernels.cpp
@@ -7,8 +7,6 @@
  */
 
 #include "kernels.h"
-#include "NatureDSP_Signal.h"
-#include "NatureDSP_Signal_vector.h"
 #include "xa_nnlib_common.h"
 #include "xa_nnlib_common_macros.h"
 

diff --git a/backends/cadence/hifi/kernels/kernels.h b/backends/cadence/hifi/kernels/kernels.h
@@ -12,6 +12,9 @@
 #include "stddef.h"
 #include "xa_type_def.h"
 
+/* For NNLIB APIs */
+#include "xa_nnlib_kernels_api.h"
+
 namespace impl {
 namespace HiFi {
 namespace kernels {

diff --git a/backends/cadence/hifi/operators/CMakeLists.txt b/backends/cadence/hifi/operators/CMakeLists.txt
@@ -20,10 +20,6 @@ endif()
 
 # ATen compliant ops that are needed to run this model.
 set(_aten_ops__srcs
-    "${CMAKE_CURRENT_SOURCE_DIR}/op_add.cpp"
-    "${CMAKE_CURRENT_SOURCE_DIR}/op_embedding.cpp"
-    "${CMAKE_CURRENT_SOURCE_DIR}/op_full.cpp"
-    "${CMAKE_CURRENT_SOURCE_DIR}/op_view_copy.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/activation_ops_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/copy_ops_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/broadcast_util.cpp"
@@ -32,10 +28,13 @@ set(_aten_ops__srcs
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/matmul_ops_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/reduce_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/repeat_util.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_add.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_bmm.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_cat.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_clone.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_div.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_embedding.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_full.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_mul.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_permute_copy.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_sigmoid.cpp"
@@ -44,6 +43,7 @@ set(_aten_ops__srcs
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_split_with_sizes_copy.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_sub.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_to_copy.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_view_copy.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_where.cpp")
 add_library(aten_ops_cadence ${_aten_ops__srcs})
 target_link_libraries(aten_ops_cadence PUBLIC executorch)
@@ -58,8 +58,8 @@ target_include_directories(aten_ops_cadence PUBLIC ${ROOT_DIR}/..
 
 # Custom ops that are needed to run the test model.
 add_library(
-  custom_ops "quantized_linear_out.cpp" "quantized_conv_out.cpp"
-  "quantized_relu_out.cpp" "quantized_layer_norm.cpp"
+  custom_ops "quantized_linear_out.cpp"
+  "quantized_layer_norm.cpp"
   "quantize_per_tensor.cpp" "dequantize_per_tensor.cpp")
 target_include_directories(custom_ops PUBLIC ${ROOT_DIR}/..
                                              ${CMAKE_BINARY_DIR}
@@ -72,11 +72,11 @@ target_link_libraries(custom_ops PRIVATE cadence_kernels)
 # Executorch (for runtime). Here select all ops in functions.yaml
 gen_selected_ops(
   LIB_NAME "cadence_ops_lib" OPS_SCHEMA_YAML
-  "${CMAKE_CURRENT_LIST_DIR}/../../aot/functions.yaml" "" ""
+  "${CMAKE_CURRENT_LIST_DIR}/../../aot/functions_hifi.yaml" "" ""
 )
 generate_bindings_for_kernels(
   LIB_NAME "cadence_ops_lib" OPS_SCHEMA_YAML
-  FUNCTIONS_YAML ${CMAKE_CURRENT_SOURCE_DIR}/../../aot/functions.yaml
+  FUNCTIONS_YAML ${CMAKE_CURRENT_SOURCE_DIR}/../../aot/functions_hifi.yaml
 )
 message("Generated files ${gen_command_sources}")
 

diff --git a/backends/cadence/hifi/operators/dequantize_per_tensor.cpp b/backends/cadence/hifi/operators/dequantize_per_tensor.cpp
@@ -35,8 +35,8 @@ void dequantize_per_tensor_out(
         out_data, input_data, scale, zero_point, numel);
   } else if (input.scalar_type() == ScalarType::Char) {
     const int8_t* input_data = input.const_data_ptr<int8_t>();
-    impl::HiFi::kernels::dequantize<int8_t>(
-        out_data, input_data, scale, zero_point, numel);
+    xa_nn_elm_dequantize_asym8s_f32(
+        out_data, input_data, zero_point, scale, numel);
   } else if (input.scalar_type() == ScalarType::Int) {
     const int32_t* input_data = input.const_data_ptr<int32_t>();
     impl::HiFi::kernels::dequantize<int32_t>(

diff --git a/backends/cadence/hifi/operators/quantize_per_tensor.cpp b/backends/cadence/hifi/operators/quantize_per_tensor.cpp
@@ -37,8 +37,8 @@ void quantize_per_tensor_out(
         out_data, input_data, 1. / scale, zero_point, numel);
   } else if (out.scalar_type() == ScalarType::Char) {
     int8_t* out_data = out.mutable_data_ptr<int8_t>();
-    impl::HiFi::kernels::quantize<int8_t>(
-        out_data, input_data, 1. / scale, zero_point, numel);
+    xa_nn_elm_quantize_f32_asym8s(
+        out_data, input_data, scale, zero_point, numel);
   } else if (out.scalar_type() == ScalarType::Int) {
     int32_t* out_data = out.mutable_data_ptr<int32_t>();
     impl::HiFi::kernels::quantize<int32_t>(

diff --git a/backends/cadence/hifi/third-party/nnlib/CMakeLists.txt b/backends/cadence/hifi/third-party/nnlib/CMakeLists.txt
@@ -0,0 +1,30 @@
+
+cmake_minimum_required(VERSION 3.10.0)
+project(cadence_nnlib)
+
+
+add_custom_target( nnlib_target ALL COMMAND 
+                    make install_nnlib -f makefile -C ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/build 
+                    OBJDIR=${CMAKE_CURRENT_BINARY_DIR}/obj 
+                    LIBDIR=${CMAKE_CURRENT_BINARY_DIR}/lib 
+                    -j8 )
+
+add_library(xa_nnlib STATIC IMPORTED GLOBAL)
+add_dependencies(xa_nnlib nnlib_target)
+
+set_property(
+  TARGET xa_nnlib
+  PROPERTY 
+  IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/lib/xa_nnlib.a"
+)
+
+
+
+
+
+
+
+
+
+
+
diff --git a/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4 b/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4
diff --git a/backends/qualcomm/aot/ir/qcir_utils.h b/backends/qualcomm/aot/ir/qcir_utils.h
@@ -9,7 +9,7 @@
 #pragma once
 
 #include "QnnTypes.h"
-#include "qcir_generated.h"
+#include <executorch/backends/qualcomm/aot/ir/qcir_generated.h>
 
 namespace torch {
 namespace executor {

diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpContextCustomConfig.h b/backends/qualcomm/runtime/backends/htpbackend/HtpContextCustomConfig.h
@@ -39,7 +39,7 @@ class HtpContextCustomConfig {
     return htp_context_config_.back().get();
   }
 
-  const QnnContext* context_;
+  [[maybe_unused]] const QnnContext* context_;
   std::vector<std::unique_ptr<QnnHtpContext_CustomConfig_t>>
       htp_context_config_;
   [[maybe_unused]] const QnnExecuTorchHtpBackendOptions* htp_options_;

diff --git a/build/constraints/TARGETS b/build/constraints/TARGETS
@@ -0,0 +1 @@
+oncall("executorch")
diff --git a/exir/serde/serialize.py b/exir/serde/serialize.py
@@ -343,18 +343,9 @@ def serialize(
         else:
             example_inputs = b""
 
-        # TODO (shangdi): change to use `torch._export.utils.remove_proxy_from_state_dict`
-        # after pytorch repo is updated.
-        # Proxy cannot be dumped, so we remove them.
-        new_state_dict = {}
-        for k, v in exported_program.state_dict.items():
-            if "proxy" in v.__dict__:
-                new_state_dict[k] = v.clone().detach()
-            else:
-                new_state_dict[k] = v
         return export_serialize._SerializedProgram(
             serialized_ep,
-            export_serialize.serialize_torch_artifact(new_state_dict),
+            export_serialize.serialize_torch_artifact(exported_program.state_dict),
             export_serialize.serialize_torch_artifact(constants),
             example_inputs,
         )

diff --git a/extension/llm/custom_ops/TARGETS b/extension/llm/custom_ops/TARGETS
@@ -21,3 +21,16 @@ runtime.python_test(
         "//caffe2:torch",
     ],
 )
+
+runtime.python_test(
+    name = "test_preprocess_custom_ops",
+    srcs = [
+        "test_preprocess_custom_ops.py",
+    ],
+    preload_deps = [
+        ":preprocess_custom_ops_py",
+    ],
+    deps = [
+        "//caffe2:torch",
+    ],
+)