diff --git a/qa/L0_backend_python/parameters/response_parameters_test.py b/qa/L0_backend_python/parameters/response_parameters_test.py new file mode 100644 index 0000000000..e07bb5eb9f --- /dev/null +++ b/qa/L0_backend_python/parameters/response_parameters_test.py @@ -0,0 +1,171 @@ +# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import sys + +sys.path.append("../../common") + +import json +import unittest + +import numpy as np +import shm_util +import tritonclient.grpc as grpcclient +from tritonclient.utils import InferenceServerException + + +class ResponseParametersTest(unittest.TestCase): + _server_address_grpc = "localhost:8001" + _model_name = "response_parameters" + _shape = [1, 1] + + def setUp(self): + self._shm_leak_detector = shm_util.ShmLeakDetector() + + def _assert_response_parameters_match(self, infer_result, expected_params): + res_params = {} + for param_key, param_value in infer_result.get_response().parameters.items(): + if param_value.HasField("bool_param"): + value = param_value.bool_param + elif param_value.HasField("int64_param"): + value = param_value.int64_param + elif param_value.HasField("string_param"): + value = param_value.string_param + else: + raise ValueError(f"Unsupported parameter choice: {param_value}") + res_params[param_key] = value + self.assertEqual(expected_params, res_params) + + def _assert_response_parameters_infer_success(self, params): + params_str = json.dumps(params) + + inputs = [grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES")] + inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_)) + + with self._shm_leak_detector.Probe() as shm_probe: + with grpcclient.InferenceServerClient(self._server_address_grpc) as client: + result = client.infer(self._model_name, inputs) + + # verify the response parameters + self._assert_response_parameters_match(result, params) + + # model returns the input as output + output = str(result.as_numpy("OUTPUT")[0][0], encoding="utf-8") + self.assertEqual(params_str, output) + + def _assert_response_parameters_infer_fail(self, params, expected_err_msg): + params_str = json.dumps(params) + + inputs = [grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES")] + inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_)) + + with self._shm_leak_detector.Probe() as shm_probe: + with grpcclient.InferenceServerClient(self._server_address_grpc) as client: + with self.assertRaises(InferenceServerException) as e: + client.infer(self._model_name, inputs) + + self.assertIn("[StatusCode.INVALID_ARGUMENT] ", str(e.exception)) + self.assertIn(expected_err_msg, str(e.exception)) + + def test_setting_empty_response_parameters(self): + params = {} + self._assert_response_parameters_infer_success(params) + + def test_setting_one_element_response_parameters(self): + params = {"many_elements": False} + self._assert_response_parameters_infer_success(params) + + def test_setting_three_element_response_parameters(self): + params = {"bool": True, "str": "Hello World!", "int": 1024} + self._assert_response_parameters_infer_success(params) + + def test_setting_multi_element_response_parameters(self): + params = {"a": "1", "b": "2", "c": 3, "d": False, "e": 5, "f": ""} + self._assert_response_parameters_infer_success(params) + + def test_setting_wrong_type_response_parameters(self): + params = [] + expected_err_msg = ", got " + self._assert_response_parameters_infer_fail(params, expected_err_msg) + + def test_setting_int_key_type_response_parameters(self): + params = {"1": "int key"} + expected_err_msg = ( + "Expect parameters keys to have type str, found type " + ) + self._assert_response_parameters_infer_fail(params, expected_err_msg) + + def test_setting_float_response_parameters(self): + params = {"int": 2, "float": 0.5} + expected_err_msg = "Expect parameters values to have type bool/int/str, found type " + self._assert_response_parameters_infer_fail(params, expected_err_msg) + + def test_setting_null_response_parameters(self): + params = {"bool": True, "null": None} + expected_err_msg = "Expect parameters values to have type bool/int/str, found type " + self._assert_response_parameters_infer_fail(params, expected_err_msg) + + def test_setting_nested_response_parameters(self): + params = {"str": "", "list": ["variable"]} + expected_err_msg = "Expect parameters values to have type bool/int/str, found type " + self._assert_response_parameters_infer_fail(params, expected_err_msg) + + def test_setting_response_parameters_decoupled(self): + model_name = "response_parameters_decoupled" + params = [{"bool": False, "int": 2048}, {"str": "Hello World!"}] + params_str = json.dumps(params) + + inputs = [grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES")] + inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_)) + + responses = [] + with self._shm_leak_detector.Probe() as shm_probe: + with grpcclient.InferenceServerClient(self._server_address_grpc) as client: + client.start_stream( + callback=(lambda result, error: responses.append((result, error))) + ) + client.async_stream_infer(model_name=model_name, inputs=inputs) + client.stop_stream() + + self.assertEqual(len(params), len(responses)) + for i in range(len(params)): + result, error = responses[i] + self.assertIsNone(error) + + # Since this is a decoupled model, the 'triton_final_response' parameter + # will be a part of the response parameters, so include it into the expected + # parameters. The model sends the complete final flag separately from the + # response, so the parameter is always False. + expected_params = params[i].copy() + expected_params["triton_final_response"] = False + self._assert_response_parameters_match(result, expected_params) + + output = str(result.as_numpy("OUTPUT")[0][0], encoding="utf-8") + self.assertEqual(json.dumps(params[i]), output) + + +if __name__ == "__main__": + unittest.main() diff --git a/qa/L0_backend_python/parameters/test.sh b/qa/L0_backend_python/parameters/test.sh new file mode 100755 index 0000000000..9d8c86c530 --- /dev/null +++ b/qa/L0_backend_python/parameters/test.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +source ../../common/util.sh + +RET=0 + +# +# Test response parameters +# +rm -rf models && mkdir models +mkdir -p models/response_parameters/1 && \ + cp ../../python_models/response_parameters/model.py models/response_parameters/1 && \ + cp ../../python_models/response_parameters/config.pbtxt models/response_parameters +mkdir -p models/response_parameters_decoupled/1 && \ + cp ../../python_models/response_parameters_decoupled/model.py models/response_parameters_decoupled/1 && \ + cp ../../python_models/response_parameters_decoupled/config.pbtxt models/response_parameters_decoupled + +TEST_LOG="response_parameters_test.log" +SERVER_LOG="response_parameters_test.server.log" +SERVER_ARGS="--model-repository=${MODELDIR}/parameters/models --backend-directory=${BACKEND_DIR} --log-verbose=1" + +run_server +if [ "$SERVER_PID" == "0" ]; then + echo -e "\n***\n*** Failed to start $SERVER\n***" + cat $SERVER_LOG + exit 1 +fi + +set +e +python3 -m pytest --junitxml=response_parameters_test.report.xml response_parameters_test.py > $TEST_LOG 2>&1 +if [ $? -ne 0 ]; then + echo -e "\n***\n*** Response parameters test FAILED\n***" + cat $TEST_LOG + RET=1 +fi +set -e + +kill $SERVER_PID +wait $SERVER_PID + +if [ $RET -eq 1 ]; then + echo -e "\n***\n*** Parameters test FAILED\n***" +else + echo -e "\n***\n*** Parameters test Passed\n***" +fi +exit $RET diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh index 10dbdd75d3..b2e6b8f034 100755 --- a/qa/L0_backend_python/test.sh +++ b/qa/L0_backend_python/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -457,7 +457,7 @@ if [ "$TEST_JETSON" == "0" ]; then fi fi -SUBTESTS="lifecycle argument_validation logging custom_metrics" +SUBTESTS="lifecycle argument_validation logging custom_metrics parameters" # [DLIS-6124] Disable restart test for Windows since it requires more investigation # [DLIS-6122] Disable model_control & request_rescheduling tests for Windows since they require load/unload # [DLIS-6123] Disable examples test for Windows since it requires updates to the example clients diff --git a/qa/python_models/response_parameters/config.pbtxt b/qa/python_models/response_parameters/config.pbtxt new file mode 100644 index 0000000000..c40bba8066 --- /dev/null +++ b/qa/python_models/response_parameters/config.pbtxt @@ -0,0 +1,52 @@ +# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +name: "response_parameters" +backend: "python" +max_batch_size: 8 + +input [ + { + name: "RESPONSE_PARAMETERS" + data_type: TYPE_STRING + dims: [ 1 ] + } +] + +output [ + { + name: "OUTPUT" + data_type: TYPE_STRING + dims: [ 1 ] + } +] + +instance_group [ + { + count: 1 + kind: KIND_CPU + } +] diff --git a/qa/python_models/response_parameters/model.py b/qa/python_models/response_parameters/model.py new file mode 100644 index 0000000000..30fa6aa32a --- /dev/null +++ b/qa/python_models/response_parameters/model.py @@ -0,0 +1,73 @@ +# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import json + +import numpy as np +import triton_python_backend_utils as pb_utils + + +class TritonPythonModel: + def execute(self, requests): + responses = [] + + for request in requests: + res_params_tensor = pb_utils.get_input_tensor_by_name( + request, "RESPONSE_PARAMETERS" + ).as_numpy() + res_params_str = str(res_params_tensor[0][0], encoding="utf-8") + output_tensor = pb_utils.Tensor( + "OUTPUT", np.array([[res_params_str]], dtype=np.object_) + ) + try: + res_params = json.loads(res_params_str) + # convert all digit keys to int, for testing non-str key types + if isinstance(res_params, dict): + res_params_new = {} + for key, value in res_params.items(): + if isinstance(key, str) and key.isdigit(): + key = int(key) + res_params_new[key] = value + res_params = res_params_new + + response = pb_utils.InferenceResponse( + output_tensors=[output_tensor], parameters=res_params + ) + + res_params_set = {} + if response.parameters() != "": + res_params_set = json.loads(response.parameters()) + if res_params_set != res_params: + raise Exception("Response parameters set differ from provided") + except Exception as e: + error = pb_utils.TritonError( + message=str(e), code=pb_utils.TritonError.INVALID_ARG + ) + response = pb_utils.InferenceResponse(error=error) + + responses.append(response) + + return responses diff --git a/qa/python_models/response_parameters_decoupled/config.pbtxt b/qa/python_models/response_parameters_decoupled/config.pbtxt new file mode 100644 index 0000000000..7180a8062e --- /dev/null +++ b/qa/python_models/response_parameters_decoupled/config.pbtxt @@ -0,0 +1,56 @@ +# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +name: "response_parameters_decoupled" +backend: "python" +max_batch_size: 8 + +input [ + { + name: "RESPONSE_PARAMETERS" + data_type: TYPE_STRING + dims: [ 1 ] + } +] + +output [ + { + name: "OUTPUT" + data_type: TYPE_STRING + dims: [ 1 ] + } +] + +instance_group [ + { + count: 1 + kind: KIND_CPU + } +] + +model_transaction_policy { + decoupled: True +} diff --git a/qa/python_models/response_parameters_decoupled/model.py b/qa/python_models/response_parameters_decoupled/model.py new file mode 100644 index 0000000000..be1e606134 --- /dev/null +++ b/qa/python_models/response_parameters_decoupled/model.py @@ -0,0 +1,67 @@ +# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import json + +import numpy as np +import triton_python_backend_utils as pb_utils + + +class TritonPythonModel: + def execute(self, requests): + for request in requests: + res_params_tensor = pb_utils.get_input_tensor_by_name( + request, "RESPONSE_PARAMETERS" + ).as_numpy() + res_params_str = str(res_params_tensor[0][0], encoding="utf-8") + response_sender = request.get_response_sender() + try: + res_params = json.loads(res_params_str) + for r_params in res_params: + output_tensor = pb_utils.Tensor( + "OUTPUT", np.array([[json.dumps(r_params)]], dtype=np.object_) + ) + response = pb_utils.InferenceResponse( + output_tensors=[output_tensor], parameters=r_params + ) + + r_params_set = {} + if response.parameters() != "": + r_params_set = json.loads(response.parameters()) + if r_params_set != r_params: + raise Exception("Response parameters set differ from provided") + + response_sender.send(response) + except Exception as e: + error = pb_utils.TritonError( + message=str(e), code=pb_utils.TritonError.INVALID_ARG + ) + response = pb_utils.InferenceResponse(error=error) + response_sender.send(response) + + response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL) + + return None