triton-inference-server · kthui · Jan 25, 2025 · Jan 22, 2025 · rmccorm4 · Jan 24, 2025
diff --git a/qa/L0_backend_python/parameters/response_parameters_test.py b/qa/L0_backend_python/parameters/response_parameters_test.py
@@ -0,0 +1,171 @@
+# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../../common")
+
+import json
+import unittest
+
+import numpy as np
+import shm_util
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import InferenceServerException
+
+
+class ResponseParametersTest(unittest.TestCase):
+    _server_address_grpc = "localhost:8001"
+    _model_name = "response_parameters"
+    _shape = [1, 1]
+
+    def setUp(self):
+        self._shm_leak_detector = shm_util.ShmLeakDetector()
+
+    def _assert_response_parameters_match(self, infer_result, expected_params):
+        res_params = {}
+        for param_key, param_value in infer_result.get_response().parameters.items():
+            if param_value.HasField("bool_param"):
+                value = param_value.bool_param
+            elif param_value.HasField("int64_param"):
+                value = param_value.int64_param
+            elif param_value.HasField("string_param"):
+                value = param_value.string_param
+            else:
+                raise ValueError(f"Unsupported parameter choice: {param_value}")
+            res_params[param_key] = value
+        self.assertEqual(expected_params, res_params)
+
+    def _assert_response_parameters_infer_success(self, params):
+        params_str = json.dumps(params)
+
+        inputs = [grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES")]
+        inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_))
+
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with grpcclient.InferenceServerClient(self._server_address_grpc) as client:
+                result = client.infer(self._model_name, inputs)
+
+        # verify the response parameters
+        self._assert_response_parameters_match(result, params)
+
+        # model returns the input as output
+        output = str(result.as_numpy("OUTPUT")[0][0], encoding="utf-8")
+        self.assertEqual(params_str, output)
+
+    def _assert_response_parameters_infer_fail(self, params, expected_err_msg):
+        params_str = json.dumps(params)
+
+        inputs = [grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES")]
+        inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_))
+
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with grpcclient.InferenceServerClient(self._server_address_grpc) as client:
+                with self.assertRaises(InferenceServerException) as e:
+                    client.infer(self._model_name, inputs)
+
+        self.assertIn("[StatusCode.INVALID_ARGUMENT] ", str(e.exception))
+        self.assertIn(expected_err_msg, str(e.exception))
+
+    def test_setting_empty_response_parameters(self):
+        params = {}
+        self._assert_response_parameters_infer_success(params)
+
+    def test_setting_one_element_response_parameters(self):
+        params = {"many_elements": False}
+        self._assert_response_parameters_infer_success(params)
+
+    def test_setting_three_element_response_parameters(self):
+        params = {"bool": True, "str": "Hello World!", "int": 1024}
+        self._assert_response_parameters_infer_success(params)
+
+    def test_setting_multi_element_response_parameters(self):
+        params = {"a": "1", "b": "2", "c": 3, "d": False, "e": 5, "f": ""}
+        self._assert_response_parameters_infer_success(params)
+
+    def test_setting_wrong_type_response_parameters(self):
+        params = []
+        expected_err_msg = ", got <class 'list'>"
+        self._assert_response_parameters_infer_fail(params, expected_err_msg)
+
+    def test_setting_int_key_type_response_parameters(self):
+        params = {"1": "int key"}
+        expected_err_msg = (
+            "Expect parameters keys to have type str, found type <class 'int'>"
+        )
+        self._assert_response_parameters_infer_fail(params, expected_err_msg)
+
+    def test_setting_float_response_parameters(self):
+        params = {"int": 2, "float": 0.5}
+        expected_err_msg = "Expect parameters values to have type bool/int/str, found type <class 'float'>"
+        self._assert_response_parameters_infer_fail(params, expected_err_msg)
+
+    def test_setting_null_response_parameters(self):
+        params = {"bool": True, "null": None}
+        expected_err_msg = "Expect parameters values to have type bool/int/str, found type <class 'NoneType'>"
+        self._assert_response_parameters_infer_fail(params, expected_err_msg)
+
+    def test_setting_nested_response_parameters(self):
+        params = {"str": "", "list": ["variable"]}
+        expected_err_msg = "Expect parameters values to have type bool/int/str, found type <class 'list'>"
+        self._assert_response_parameters_infer_fail(params, expected_err_msg)
+
+    def test_setting_response_parameters_decoupled(self):
+        model_name = "response_parameters_decoupled"
+        params = [{"bool": False, "int": 2048}, {"str": "Hello World!"}]
+        params_str = json.dumps(params)
+
+        inputs = [grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES")]
+        inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_))
+
+        responses = []
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with grpcclient.InferenceServerClient(self._server_address_grpc) as client:
+                client.start_stream(
+                    callback=(lambda result, error: responses.append((result, error)))
+                )
+                client.async_stream_infer(model_name=model_name, inputs=inputs)
+                client.stop_stream()
+
+        self.assertEqual(len(params), len(responses))
+        for i in range(len(params)):
+            result, error = responses[i]
+            self.assertIsNone(error)
+
+            # Since this is a decoupled model, the 'triton_final_response' parameter
+            # will be a part of the response parameters, so include it into the expected
+            # parameters. The model sends the complete final flag separately from the
+            # response, so the parameter is always False.
+            expected_params = params[i].copy()
+            expected_params["triton_final_response"] = False
+            self._assert_response_parameters_match(result, expected_params)
+
+            output = str(result.as_numpy("OUTPUT")[0][0], encoding="utf-8")
+            self.assertEqual(json.dumps(params[i]), output)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_backend_python/parameters/test.sh b/qa/L0_backend_python/parameters/test.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+source ../../common/util.sh
+
+RET=0
+
+#
+# Test response parameters
+#
+rm -rf models && mkdir models
+mkdir -p models/response_parameters/1 && \
+    cp ../../python_models/response_parameters/model.py models/response_parameters/1 && \
+    cp ../../python_models/response_parameters/config.pbtxt models/response_parameters
+mkdir -p models/response_parameters_decoupled/1 && \
+    cp ../../python_models/response_parameters_decoupled/model.py models/response_parameters_decoupled/1 && \
+    cp ../../python_models/response_parameters_decoupled/config.pbtxt models/response_parameters_decoupled
+
+TEST_LOG="response_parameters_test.log"
+SERVER_LOG="response_parameters_test.server.log"
+SERVER_ARGS="--model-repository=${MODELDIR}/parameters/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python3 -m pytest --junitxml=response_parameters_test.report.xml response_parameters_test.py > $TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Response parameters test FAILED\n***"
+    cat $TEST_LOG
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 1 ]; then
+    echo -e "\n***\n*** Parameters test FAILED\n***"
+else
+    echo -e "\n***\n*** Parameters test Passed\n***"
+fi
+exit $RET
diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -457,7 +457,7 @@ if [ "$TEST_JETSON" == "0" ]; then
     fi
 fi
 
-SUBTESTS="lifecycle argument_validation logging custom_metrics"
+SUBTESTS="lifecycle argument_validation logging custom_metrics parameters"
 # [DLIS-6124] Disable restart test for Windows since it requires more investigation
 # [DLIS-6122] Disable model_control & request_rescheduling tests for Windows since they require load/unload
 # [DLIS-6123] Disable examples test for Windows since it requires updates to the example clients

diff --git a/qa/python_models/response_parameters/config.pbtxt b/qa/python_models/response_parameters/config.pbtxt
@@ -0,0 +1,52 @@
+# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "response_parameters"
+backend: "python"
+max_batch_size: 8
+
+input [
+  {
+    name: "RESPONSE_PARAMETERS"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/python_models/response_parameters/model.py b/qa/python_models/response_parameters/model.py
@@ -0,0 +1,73 @@
+# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        responses = []
+
+        for request in requests:
+            res_params_tensor = pb_utils.get_input_tensor_by_name(
+                request, "RESPONSE_PARAMETERS"
+            ).as_numpy()
+            res_params_str = str(res_params_tensor[0][0], encoding="utf-8")
+            output_tensor = pb_utils.Tensor(
+                "OUTPUT", np.array([[res_params_str]], dtype=np.object_)
+            )
+            try:
+                res_params = json.loads(res_params_str)
+                # convert all digit keys to int, for testing non-str key types
+                if isinstance(res_params, dict):
+                    res_params_new = {}
+                    for key, value in res_params.items():
+                        if isinstance(key, str) and key.isdigit():
+                            key = int(key)
+                        res_params_new[key] = value
+                    res_params = res_params_new
+
+                response = pb_utils.InferenceResponse(
+                    output_tensors=[output_tensor], parameters=res_params
+                )
+
+                res_params_set = {}
+                if response.parameters() != "":
+                    res_params_set = json.loads(response.parameters())
+                if res_params_set != res_params:
+                    raise Exception("Response parameters set differ from provided")
+            except Exception as e:
+                error = pb_utils.TritonError(
+                    message=str(e), code=pb_utils.TritonError.INVALID_ARG
+                )
+                response = pb_utils.InferenceResponse(error=error)
+
+            responses.append(response)
+
+        return responses