Skip to content

Commit

Permalink
BC Deprecate XN00 Support (#4450)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #4450

Final step, we deprecate XN00 support:
We delete the runtime_schema.fbs and move completely to the schema used AoT (this schema only supports XN01)
Remove all wiring within the runtime which supports the XN00 format. This includes any logic surrounding constant_buffer and dq_datatype

Reviewed By: digantdesai

Differential Revision: D60403908

fbshipit-source-id: b7e692a06d9308112626b8e3afc441e1fdb39342
  • Loading branch information
mcr229 authored and facebook-github-bot committed Aug 1, 2024
1 parent aa56e8c commit c329d6a
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 494 deletions.
16 changes: 2 additions & 14 deletions backends/xnnpack/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,10 @@ set(_common_compile_options -Wno-deprecated-declarations -fPIC)

set(_xnnpack_schema__include_dir "${CMAKE_BINARY_DIR}/schema/include")
# Paths to headers generated from the .fbs files.
set(_xnnpack_flatbuffer__outputs)
foreach(fbs_file ${_xnnpack_schema__srcs})
string(REGEX REPLACE "([^/]+)[.]fbs$" "\\1_generated.h" generated
"${fbs_file}"
)
list(APPEND _xnnpack_flatbuffer__outputs
"${_xnnpack_schema__include_dir}/executorch/${generated}"
)
endforeach()

set(_xnnpack_schema__outputs)
foreach(fbs_file ${_xnnpack_schema__srcs})
string(REGEX REPLACE "runtime_([^/]+)[.]fbs$" "\\1_generated.h" generated
"${fbs_file}"
)
string(REGEX REPLACE "([^/]+)[.]fbs$" "\\1_generated.h"
generated "${fbs_file}")
list(APPEND _xnnpack_schema__outputs
"${_xnnpack_schema__include_dir}/executorch/${generated}"
)
Expand All @@ -64,7 +53,6 @@ add_custom_command(
${FLATC_EXECUTABLE} --cpp --cpp-std c++11 --scoped-enums -o
"${_xnnpack_schema__include_dir}/executorch/backends/xnnpack/serialization"
${_xnnpack_schema__srcs}
COMMAND mv ${_xnnpack_flatbuffer__outputs} ${_xnnpack_schema__outputs}
WORKING_DIRECTORY ${EXECUTORCH_ROOT}
COMMENT "Generating xnnpack_schema headers"
VERBATIM
Expand Down
163 changes: 39 additions & 124 deletions backends/xnnpack/runtime/XNNCompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,17 +124,9 @@ const uint8_t* getConstantDataPtr(
const uint8_t* constant_data_ptr) {
auto buffer_idx = tensor_value->constant_buffer_idx();
if (buffer_idx) {
if (!constant_data_ptr) {
// TODO(T172265611): Remove constant_buffer in flatbuffer path after BC
// window
const auto& constant_buffer = *flatbuffer_graph->constant_buffer();
return constant_buffer[buffer_idx]->storage()->data();
} else {
const auto& constant_data_offsets = *flatbuffer_graph->constant_data();
uint64_t constant_data_offset =
constant_data_offsets[buffer_idx]->offset();
return constant_data_ptr + constant_data_offset;
}
const auto& constant_data_offsets = *flatbuffer_graph->constant_data();
uint64_t constant_data_offset = constant_data_offsets[buffer_idx]->offset();
return constant_data_ptr + constant_data_offset;
}

return nullptr;
Expand Down Expand Up @@ -194,105 +186,29 @@ Error defineTensor(

xnn_status status;
// The type we might have to convert to
auto dq_datatype = getDataType(tensor_value->dq_datatype());

if (dq_datatype != xnn_datatype::xnn_datatype_invalid) {
if (dq_datatype != xnn_datatype::xnn_datatype_qint8) {
ET_CHECK_OR_RETURN_ERROR(
false,
Internal,
"Only int8_t is supported for dq_datatype for now, got: %d",
dq_datatype);
} else {
ET_CHECK_OR_RETURN_ERROR(
(tensor_value->flags() & XNN_VALUE_FLAG_EXTERNAL_INPUT),
Internal,
"Dynamic quantization of tensor is only allowed for the external input tensor value for now! got flags: %u",
tensor_value->flags());
}
}
auto datatype = getDataType(tensor_value->datatype());

if (qtensor_value == nullptr) {
// FP32 tensor
if (!isQuantizedDataType(dq_datatype)) {
// Define non-quantied tensor
status = xnn_define_tensor_value(
/*subgraph=*/subgraph_ptr,
/*datatype=*/getDataType(tensor_value->datatype()),
/*num_dims=*/tensor_value->num_dims(),
/*dims=*/dims_data.data(),
/*data=*/buffer_ptr,
/*external_id=*/tensor_value->external_id(),
/*flags=*/tensor_value->flags(),
/*id_out=*/&id);
} else if (dq_datatype != xnn_datatype::xnn_datatype_invalid) {
ET_CHECK_OR_RETURN_ERROR(
isQuantizedDataType(dq_datatype),
Internal,
"Dynamic quantization can only produce supported quantized dtypes");
ET_CHECK_OR_RETURN_ERROR(
tensor_value->external_id() != XNN_INVALID_VALUE_ID,
Internal,
"Dynamic quantization can only work with external inputs for now, got an internal ID");
ET_CHECK_OR_RETURN_ERROR(
buffer_ptr == nullptr,
Internal,
"Dynamic quantization can only work with external inputs for now, got const data");

switch (dq_datatype) {
case xnn_datatype::xnn_datatype_qint8: {
// HACK TO Maintain FC/BC for ASR this will be removed after 01/2024

// When encountering a dynamically quantized tensor via dq_datatype,
// which is the old flow for serializing dynamically quantized linear.
// We replace the definition of a single tensor with a new dynamic
// Quantization pattern. We change the pattern from:
// serialized_qd_input
// to
// (fp32_input --> convert --> qdint8_input)

status = xnn_define_dynamically_quantized_tensor_value(
/*subgraph=*/subgraph_ptr,
/*datatype=*/xnn_datatype_qdint8,
/*num_dims=*/tensor_value->num_dims(),
/*num_nonbatch_dims=*/1, // always do per token quantization
/*dims=*/dims_data.data(),
/*external_id=*/XNN_INVALID_VALUE_ID, // always internal value id
/*flags=*/0, // this is netiher external input or output
/*id_out=*/&id);

// this is the FP16 or FP32 external value that is being dynamically
// quantized
uint32_t float_id;
enum xnn_datatype fp_datatype = getDataType(tensor_value->datatype());
status = xnn_define_tensor_value(
/*subgraph=*/subgraph_ptr,
/*datatype=*/fp_datatype,
/*num_dims=*/tensor_value->num_dims(),
/*dims=*/dims_data.data(),
/*data=*/buffer_ptr,
/*external_id=*/tensor_value->external_id(),
/*flags=*/tensor_value->flags(),
/*id_out=*/&float_id);

// Define dynamic conversion from float to qdint8
status = xnn_define_convert(
/*subgraph=*/subgraph_ptr,
/*input_id=*/float_id,
/*output_id=*/id,
/*flags=*/0);
break;
}
default:
ET_CHECK_OR_RETURN_ERROR(
false,
NotImplemented,
"Unhandled Dyanmic Quantization dtype: %d",
dq_datatype);
}
} else {
ET_CHECK_OR_RETURN_ERROR(false, NotImplemented, "Unhandled fp32 tensor");
}
ET_CHECK_OR_RETURN_ERROR(
!isQuantizedDataType(datatype),
Internal,
"xnn_datatype is quantized, but is not quantized tensor value");

status = xnn_define_tensor_value(
/*subgraph=*/subgraph_ptr,
/*datatype=*/datatype,
/*num_dims=*/tensor_value->num_dims(),
/*dims=*/dims_data.data(),
/*data=*/buffer_ptr,
/*external_id=*/tensor_value->external_id(),
/*flags=*/tensor_value->flags(),
/*id_out=*/&id);
ET_CHECK_OR_RETURN_ERROR(
xnn_status_success == status,
Internal,
"Failed to define tensor with id %i",
id);
} else {
// define tensor for quantized
switch (qtensor_value->quant_params_type()) {
Expand All @@ -306,7 +222,7 @@ Error defineTensor(
qparams->zero_point());
status = xnn_define_quantized_tensor_value(
/*subgraph=*/subgraph_ptr,
/*datatype=*/getDataType(tensor_value->datatype()),
/*datatype=*/datatype,
/*zero_point=*/qparams->zero_point(),
/*scale=*/qparams->scale(),
/*num_dims=*/tensor_value->num_dims(),
Expand All @@ -319,21 +235,20 @@ Error defineTensor(
}
case fb_xnnpack::XNNQuantParams::PerChannelQuant: {
auto qparams = qtensor_value->quant_params_as_PerChannelQuant();
enum xnn_datatype dtype = getDataType(tensor_value->datatype());
int32_t zero_point =
(dtype == xnn_datatype::xnn_datatype_qcint4 ? 8 : 0);
(datatype == xnn_datatype::xnn_datatype_qcint4 ? 8 : 0);

ET_LOG(
Debug,
"define quant tensor (per channel): buffer_ptr: %p, scale.numel(): %u, channel_dim: %u, dtype: %u, zero_point: %d\n",
buffer_ptr,
qparams->scale()->size(),
qparams->channel_dim(),
dtype,
datatype,
zero_point);
status = xnn_define_channelwise_quantized_tensor_value_v2(
/*subgraph=*/subgraph_ptr,
/*datatype=*/dtype,
/*datatype=*/datatype,
/*zero_point=*/zero_point,
/*scale=*/qparams->scale()->data(),
/*num_dims=*/tensor_value->num_dims(),
Expand All @@ -346,7 +261,6 @@ Error defineTensor(
break;
}
case fb_xnnpack::XNNQuantParams::PerChannelGroupQuant: {
xnn_datatype datatype = getDataType(tensor_value->datatype());
ET_CHECK_OR_RETURN_ERROR(
datatype == xnn_datatype::xnn_datatype_qbint4,
Internal,
Expand Down Expand Up @@ -410,7 +324,7 @@ Error defineTensor(
"Dynamically Quantized Tensors currently only support per token quantization");
status = xnn_define_dynamically_quantized_tensor_value(
/*subgraph=*/subgraph_ptr,
/*datatype=*/getDataType(tensor_value->datatype()),
/*datatype=*/datatype,
/*num_dims=*/tensor_value->num_dims(),
/*num_nonbatch_dims*/ qparams->num_nonbatch_dims(),
/*dims=*/dims_data.data(),
Expand Down Expand Up @@ -1594,23 +1508,24 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
constant_data = reinterpret_cast<const uint8_t*>(buffer_pointer) +
header->constant_data_offset;
} else if (header.error() == Error::NotFound) {
flatbuffer_data = reinterpret_cast<const uint8_t*>(buffer_pointer);
ET_LOG(
Error,
"XNNHeader version mismatch: '%.4s' != expected '%.4s'",
// Header Magic and FlatbufferIdentifier are same offset and size
flatbuffers::GetBufferIdentifier(buffer_pointer),
XNNHeader::kMagic);
return header.error();
} else {
ET_LOG(Error, "XNNHeader may be corrupt");
return header.error();
}

// Temporarily support identifier XN00 and XN01
bool is_supported_version =
strncmp(flatbuffers::GetBufferIdentifier(flatbuffer_data), "XN00", 4) ==
0 ||
strncmp(flatbuffers::GetBufferIdentifier(flatbuffer_data), "XN01", 4) ==
0;
ET_CHECK_OR_RETURN_ERROR(
is_supported_version,
fb_xnnpack::XNNGraphBufferHasIdentifier(flatbuffer_data),
DelegateInvalidCompatibility,
"XNNPACK Delegate Serialization Format version identifier '%.4s' != expected XN00 or XN01'",
flatbuffers::GetBufferIdentifier(flatbuffer_data));
"XNNPACK Delegate flatbuffer version mismatch: '%.4s' != expected '%.4s'",
flatbuffers::GetBufferIdentifier(flatbuffer_data),
fb_xnnpack::XNNGraphIdentifier());

auto flatbuffer_graph = fb_xnnpack::GetXNNGraph(flatbuffer_data);
// initialize xnnpack
Expand Down
Loading

0 comments on commit c329d6a

Please sign in to comment.