dumbPy · Aug 9, 2021
diff --git a/‎README.md
Lines changed: 84 additions & 7 deletions b/‎README.md
Lines changed: 84 additions & 7 deletions
diff --git a/‎examples/add_sub/client.py
Lines changed: 0 additions & 1 deletion b/‎examples/add_sub/client.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎examples/add_sub/config.pbtxt
Lines changed: 0 additions & 2 deletions b/‎examples/add_sub/config.pbtxt
Lines changed: 0 additions & 2 deletions
diff --git a/‎examples/add_sub/model.py
Lines changed: 0 additions & 2 deletions b/‎examples/add_sub/model.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎examples/bls/README.md
Lines changed: 104 additions & 0 deletions b/‎examples/bls/README.md
Lines changed: 104 additions & 0 deletions
diff --git a/‎examples/bls/client.py
Lines changed: 94 additions & 0 deletions b/‎examples/bls/client.py
Lines changed: 94 additions & 0 deletions
diff --git a/‎examples/bls/config.pbtxt
Lines changed: 66 additions & 0 deletions b/‎examples/bls/config.pbtxt
Lines changed: 66 additions & 0 deletions
diff --git a/‎examples/bls/model.py
Lines changed: 136 additions & 0 deletions b/‎examples/bls/model.py
Lines changed: 136 additions & 0 deletions
diff --git a/‎examples/pytorch/config.pbtxt
Lines changed: 0 additions & 2 deletions b/‎examples/pytorch/config.pbtxt
Lines changed: 0 additions & 2 deletions
diff --git a/‎examples/pytorch/model.py
Lines changed: 0 additions & 6 deletions b/‎examples/pytorch/model.py
Lines changed: 0 additions & 6 deletions
@@ -1,5 +1,5 @@
 <!--
-# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -44,6 +44,7 @@ any C++ code.
 * [Error Handling](#error-handling)
 * [Managing Shared Memory](#managing-shared-memory)
 * [Building From Source](#building-from-source)
+* [Business Logic Scripting (beta)](#business-logic-scripting-beta)
 
 ## Quick Start
 
@@ -471,6 +472,79 @@ properly set the `--shm-size` flag depending on the size of your inputs and
 outputs. The default value for docker run command is `64MB` which is very
 small.
 
+# Business Logic Scripting (beta)
+
+Triton's
+[ensemble](https://github.com/triton-inference-server/server/blob/main/docs/architecture.md#ensemble-models)
+feature supports many use cases where multiple models are composed into a
+pipeline (or more generally a DAG, directed acyclic graph). However, there are
+many other use cases that are not supported because as part of the model
+pipeline they require loops, conditionals (if-then-else), data-dependent
+control-flow and other custom logic to be intermixed with model execution. We
+call this combination of custom logic and model executions *Business Logic
+Scripting (BLS)*. 
+
+Starting from 21.08, you can implement BLS in your Python model. A new set of
+utility functions allows you to execute inference requests on other models being
+served by Triton as a part of executing your Python model. Example below shows
+how to use this feature:
+
+```python
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+  ...
+    def execute(self, requests):
+      ...
+      # Create an InferenceRequest object. `model_name`,
+      # `requested_output_names`, and `inputs` are the required arguments and
+      # must be provided when constructing an InferenceRequest object. Make sure
+      # to replace `inputs` argument with a list of `pb_utils.Tensor` objects.
+      inference_request = pb_utils.InferenceRequest(
+          model_name='model_name',
+          requested_output_names=['REQUESTED_OUTPUT_1', 'REQUESTED_OUTPUT_2'],
+          inputs=[<pb_utils.Tensor object>])
+
+      # `pb_utils.InferenceRequest` supports request_id, correlation_id, and model
+      # version in addition to the arguments described above. These arguments
+      # are optional. An example containing all the arguments:
+      # inference_request = pb_utils.InferenceRequest(model_name='model_name',
+      #   requested_output_names=['REQUESTED_OUTPUT_1', 'REQUESTED_OUTPUT_2'],
+      #   inputs=[<list of pb_utils.Tensor objects>],
+      #   request_id="1", correlation_id=4, model_version=1)
+
+      # Execute the inference_request and wait for the response
+      inference_response = inference_request.exec()
+
+      # Check if the inference response has an error
+      if inference_response.has_error():
+          raise pb_utils.TritonModelException(inference_response.error().message())
+      else:
+          # Extract the output tensors from the inference response.
+          output1 = pb_utils.get_output_tensor_by_name(inference_response, 'REQUESTED_OUTPUT_1')
+          output2 = pb_utils.get_output_tensor_by_name(inference_response, 'REQUESTED_OUTPUT_2')
+
+          # Decide the next steps for model execution based on the received output
+          # tensors. It is possible to use the same output tensors to for the final
+          # inference resposne too.
+```
+
+A complete example for BLS in Python backend is included in the
+[Examples](#examples) section.
+
+## Limitations
+
+- The number of inference requests that can be executed as a part of your model
+execution is limited to the amount of shared memory available to the Triton
+server.  If you are using Docker to start the TritonServer, you can control the
+shared memory usage using the
+[`--shm-size`](https://docs.docker.com/engine/reference/run/) flag.
+- You need to make sure that the inference requests performed as a part of your model
+do not create a circular dependency. For example, if model A performs an inference request
+on itself and there are no more model instances ready to execute the inference request, the
+model will block on the inference execution forever.
+
 # Examples
 
 For using the Triton Python client in these examples you need to install
@@ -486,12 +560,15 @@ find the files in [examples/add_sub](examples/add_sub).
 ## AddSubNet in PyTorch
 
 In order to use this model, you need to install PyTorch. We recommend using
-`pip` method mentioned in the [PyTorch
-website](https://pytorch.org/get-started/locally/). Make sure that PyTorch is
-available in the same Python environment as other dependencies. If you need
-to create another Python environment, please refer to the "Changing Python
-Runtime Path" section of this readme. You can find the files for this example
-in [examples/pytorch](examples/pytorch).
+`pip` method mentioned in the [PyTorch website](https://pytorch.org/get-started/locally/).
+Make sure that PyTorch is available in the same Python environment as other
+dependencies. Alternatively, you can create a [Python Execution Environment](#using-custom-python-execution-environments).
+You can find the files for this example in [examples/pytorch](examples/pytorch).
+
+## Business Logic Scripting
+
+The BLS example needs the dependencies required for both of the above examples.
+You can find the complete example instructions in [examples/bls](examples/bls/README.md).
 
 # Reporting problems, asking questions
 
 
@@ -25,7 +25,6 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from tritonclient.utils import *
-import tritonclient.grpc as grpcclient
 import tritonclient.http as httpclient
 
 import numpy as np
 
@@ -32,15 +32,13 @@ input [
     name: "INPUT0"
     data_type: TYPE_FP32
     dims: [ 4 ]
-    
   }
 ]
 input [
   {
     name: "INPUT1"
     data_type: TYPE_FP32
     dims: [ 4 ]
-    
   }
 ]
 output [
 
@@ -24,8 +24,6 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import numpy as np
-import sys
 import json
 
 # triton_python_backend_utils is available in every Triton Python model. You
 
@@ -0,0 +1,104 @@
+<!--
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# BLS Example
+
+In this example we demonstrate an end-to-end example for
+[BLS](../../README.md#business-logic-scripting-beta) in Python backend. The
+[model repository](https://github.com/triton-inference-server/server/blob/main/docs/model_repository.md)
+should contain [PyTorch](../pytorch), [AddSub](../add_sub), and [BLS](../bls) models.
+The [PyTorch](../pytorch) and [AddSub](../add_sub) models
+calculate the sum and difference of the `INPUT0` and `INPUT1` and put the
+results in `OUTPUT0` and `OUTPUT1` respectively. The goal of the BLS model is
+the same as [PyTorch](../pytorch) and [AddSub](../add_sub) models but the
+difference is that the BLS model will not calculate the sum and difference by
+itself. The BLS model will pass the input tensors to the [PyTorch](../pytorch)
+or [AddSub](../add_sub) models and return the responses of that model as the
+final response. The additional parameter `MODEL_NAME` determines which model
+will be used for calculating the final outputs.
+
+1. Create the model repository:
+
+```console
+$ mkdir -p models/add_sub/1
+$ mkdir -p models/bls/1
+$ mkdir -p models/pytorch/1
+
+# Copy the Python models
+$ cp examples/add_sub/model.py models/add_sub/1/
+$ cp examples/add_sub/config.pbtxt models/add_sub/
+$ cp examples/bls/model.py models/bls/1/
+$ cp examples/bls/config.pbtxt models/bls/
+$ cp examples/pytorch/model.py models/pytorch/1/
+$ cp examples/pytorch/config.pbtxt models/pytorch/
+```
+
+2. Start the tritonserver:
+
+```
+tritonserver --model-repository `pwd`/models
+```
+
+3. Send inference requests to server:
+
+```
+python3 examples/bls/client.py
+```
+
+You should see an output similar to the output below:
+
+```
+=========='add_sub' model result==========
+INPUT0 ([0.34984654 0.6808792  0.6509772  0.6211422 ]) + INPUT1 ([0.37917137 0.9080451  0.60789365 0.33425143]) = OUTPUT0 ([0.7290179 1.5889243 1.2588708 0.9553937])
+INPUT0 ([0.34984654 0.6808792  0.6509772  0.6211422 ]) - INPUT1 ([0.37917137 0.9080451  0.60789365 0.33425143]) = OUTPUT0 ([-0.02932483 -0.22716594  0.04308355  0.28689077])
+
+
+=========='pytorch' model result==========
+INPUT0 ([0.34984654 0.6808792  0.6509772  0.6211422 ]) + INPUT1 ([0.37917137 0.9080451  0.60789365 0.33425143]) = OUTPUT0 ([0.7290179 1.5889243 1.2588708 0.9553937])
+INPUT0 ([0.34984654 0.6808792  0.6509772  0.6211422 ]) - INPUT1 ([0.37917137 0.9080451  0.60789365 0.33425143]) = OUTPUT0 ([-0.02932483 -0.22716594  0.04308355  0.28689077])
+
+
+=========='undefined' model result==========
+Failed to process the request(s) for model instance 'bls_0', message: TritonModelException: Failed for execute the inference request. Model 'undefined_model' is not ready.
+
+At:
+  /tmp/python_backend/models/bls/1/model.py(110): execute
+```
+
+The [bls](./model.py) model file is heavily commented with explanations about
+each of the function calls.
+
+## Explanation of the Client Output
+
+The [client.py](./client.py) sends three inference requests to the 'bls'
+model with different values for the "MODEL_NAME" input. As explained earlier,
+"MODEL_NAME" determines the model name that the "bls" model will use for
+calculating the final outputs. In the first request, it will use the "add_sub"
+model and in the seceond request it will use the "pytorch" model. The third
+request uses an incorrect model name to demonstrate error handling during
+the inference request execution.
@@ -0,0 +1,94 @@
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from tritonclient.utils import *
+import tritonclient.http as httpclient
+import numpy as np
+
+model_name = "bls"
+shape = [4]
+
+with httpclient.InferenceServerClient("localhost:8000") as client:
+    input0_data = np.random.rand(*shape).astype(np.float32)
+    input1_data = np.random.rand(*shape).astype(np.float32)
+    inputs = [
+        httpclient.InferInput("INPUT0", input0_data.shape,
+                              np_to_triton_dtype(input0_data.dtype)),
+        httpclient.InferInput("INPUT1", input1_data.shape,
+                              np_to_triton_dtype(input1_data.dtype)),
+        httpclient.InferInput("MODEL_NAME", [1],
+                              np_to_triton_dtype(np.object_)),
+    ]
+    inputs[0].set_data_from_numpy(input0_data)
+    inputs[1].set_data_from_numpy(input1_data)
+
+    # Will perform the inference request on the 'add_sub' model.
+    inputs[2].set_data_from_numpy(np.array(['add_sub'], dtype=np.object_))
+
+    outputs = [
+        httpclient.InferRequestedOutput("OUTPUT0"),
+        httpclient.InferRequestedOutput("OUTPUT1"),
+    ]
+
+    response = client.infer(model_name,
+                            inputs,
+                            request_id=str(1),
+                            outputs=outputs)
+
+    result = response.get_response()
+    print("=========='add_sub' model result==========")
+    print("INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format(
+        input0_data, input1_data, response.as_numpy("OUTPUT0")))
+    print("INPUT0 ({}) - INPUT1 ({}) = OUTPUT1 ({})".format(
+        input0_data, input1_data, response.as_numpy("OUTPUT1")))
+
+    # Will perform the inference request on the pytorch model:
+    inputs[2].set_data_from_numpy(np.array(['pytorch'], dtype=np.object_))
+    response = client.infer(model_name,
+                            inputs,
+                            request_id=str(1),
+                            outputs=outputs)
+
+    result = response.get_response()
+    print("\n")
+    print("=========='pytorch' model result==========")
+    print("INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format(
+        input0_data, input1_data, response.as_numpy("OUTPUT0")))
+    print("INPUT0 ({}) - INPUT1 ({}) = OUTPUT1 ({})".format(
+        input0_data, input1_data, response.as_numpy("OUTPUT1")))
+
+    # Will perform the same inference request on an undefined model. This leads
+    # to an exception:
+    print("\n")
+    print("=========='undefined' model result==========")
+    try:
+        inputs[2].set_data_from_numpy(np.array(['undefined_model'], dtype=np.object_))
+        response = client.infer(model_name,
+                                inputs,
+                                request_id=str(1),
+                                outputs=outputs)
+    except InferenceServerException as e:
+        print(e.message())
@@ -0,0 +1,66 @@
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "bls"
+backend: "python"
+
+input [
+  {
+    name: "MODEL_NAME"
+    data_type: TYPE_BYTES
+    dims: [ 1 ]
+  }
+]
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
@@ -0,0 +1,136 @@
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# triton_python_backend_utils is available in every Triton Python model. You
+# need to use this module to create inference requests and responses. It also
+# contains some utility functions for extracting information from model_config
+# and converting Triton input/output types to numpy types.
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    """Your Python model must use the same class name. Every Python model
+    that is created must have "TritonPythonModel" as the class name.
+    """
+    def initialize(self, args):
+        """`initialize` is called only once when the model is being loaded.
+        Implementing `initialize` function is optional. This function allows
+        the model to intialize any state associated with this model.
+
+        Parameters
+        ----------
+        args : dict
+          Both keys and values are strings. The dictionary keys and values are:
+          * model_config: A JSON string containing the model configuration
+          * model_instance_kind: A string containing model instance kind
+          * model_instance_device_id: A string containing model instance device ID
+          * model_repository: Model repository path
+          * model_version: Model version
+          * model_name: Model name
+        """
+
+        # You must parse model_config. JSON string is not parsed here
+        self.model_config = json.loads(args['model_config'])
+
+    def execute(self, requests):
+        """`execute` must be implemented in every Python model. `execute`
+        function receives a list of pb_utils.InferenceRequest as the only
+        argument. This function is called when an inference request is made
+        for this model. Depending on the batching configuration (e.g. Dynamic
+        Batching) used, `requests` may contain multiple requests. Every
+        Python model, must create one pb_utils.InferenceResponse for every
+        pb_utils.InferenceRequest in `requests`. If there is an error, you can
+        set the error argument when creating a pb_utils.InferenceResponse
+
+        Parameters
+        ----------
+        requests : list
+          A list of pb_utils.InferenceRequest
+
+        Returns
+        -------
+        list
+          A list of pb_utils.InferenceResponse. The length of this list must
+          be the same as `requests`
+        """
+
+        responses = []
+        # Every Python backend must iterate over everyone of the requests
+        # and create a pb_utils.InferenceResponse for each of them.
+        for request in requests:
+            # Get INPUT0
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+
+            # Get INPUT1
+            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
+
+            # Get Model Name
+            model_name = pb_utils.get_input_tensor_by_name(
+                request, "MODEL_NAME")
+
+            # Model Name string
+            model_name_string = model_name.as_numpy()[0]
+
+            # Create inference request object
+            infer_request = pb_utils.InferenceRequest(
+                model_name=model_name_string,
+                requested_output_names=["OUTPUT0", "OUTPUT1"],
+                inputs=[in_0, in_1])
+
+            # Perform synchronous blocking inference request
+            infer_response = infer_request.exec()
+
+            # Make sure that the inference response doesn't have an error. If
+            # it has an error, raise an exception.
+            if infer_response.has_error():
+                raise pb_utils.TritonModelException(
+                    infer_response.error().message())
+
+            # Create InferenceResponse. You can set an error here in case
+            # there was a problem with handling this inference request.
+            # Below is an example of how you can set errors in inference
+            # response:
+            #
+            # pb_utils.InferenceResponse(
+            #    output_tensors=..., TritonError("An error occured"))
+            #
+            # Because the infer_response of the models contains the final
+            # outputs with correct output names, we can just pass the list
+            # of outputs to the InferenceResponse object.
+            inference_response = pb_utils.InferenceResponse(
+                output_tensors=infer_response.output_tensors())
+            responses.append(inference_response)
+
+        # You should return a list of pb_utils.InferenceResponse. Length
+        # of this list must match the length of `requests` list.
+        return responses
+
+    def finalize(self):
+        """`finalize` is called only once when the model is being unloaded.
+        Implementing `finalize` function is OPTIONAL. This function allows
+        the model to perform any necessary clean ups before exit.
+        """
+        print('Cleaning up...')
@@ -32,15 +32,13 @@ input [
     name: "INPUT0"
     data_type: TYPE_FP32
     dims: [ 4 ]
-    
   }
 ]
 input [
   {
     name: "INPUT1"
     data_type: TYPE_FP32
     dims: [ 4 ]
-    
   }
 ]
 output [
 
@@ -24,8 +24,6 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import numpy as np
-import sys
 import json
 from torch import nn
 
@@ -41,21 +39,17 @@ class AddSubNet(nn.Module):
     Simple AddSub network in PyTorch. This network outputs the sum and
     subtraction of the inputs.
     """
-
     def __init__(self):
         super(AddSubNet, self).__init__()
 
     def forward(self, input0, input1):
-        """ 
-        """
         return (input0 + input1), (input0 - input1)
 
 
 class TritonPythonModel:
     """Your Python model must use the same class name. Every Python model
     that is created must have "TritonPythonModel" as the class name.
     """
-
     def initialize(self, args):
         """`initialize` is called only once when the model is being loaded.
         Implementing `initialize` function is optional. This function allows
Original file line number	Diff line number	Diff line change
`@@ -32,15 +32,13 @@ input [`
`32`	`32`	`name: "INPUT0"`
`33`	`33`	`data_type: TYPE_FP32`
`34`	`34`	`dims: [ 4 ]`
`35`		`-`
`36`	`35`	`}`
`37`	`36`	`]`
`38`	`37`	`input [`
`39`	`38`	`{`
`40`	`39`	`name: "INPUT1"`
`41`	`40`	`data_type: TYPE_FP32`
`42`	`41`	`dims: [ 4 ]`
`43`		`-`
`44`	`42`	`}`
`45`	`43`	`]`
`46`	`44`	`output [`