Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: dumbPy/python_backend
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: ec515c9
Choose a base ref
...
head repository: triton-inference-server/python_backend
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: bb82100
Choose a head ref
Loading
Showing with 26,409 additions and 4,066 deletions.
  1. +2 −1 .clang-format
  2. +48 −0 .devcontainer/Dockerfile
  3. +26 −0 .devcontainer/devcontainer.json
  4. +84 −0 .github/workflows/codeql.yml
  5. +38 −0 .github/workflows/pre-commit.yml
  6. +3 −1 .gitignore
  7. +73 −0 .pre-commit-config.yaml
  8. +85 −0 .vscode/tasks.json
  9. +209 −75 CMakeLists.txt
  10. +1,476 −147 README.md
  11. +1 −1 cmake/TritonPythonBackendConfig.cmake.in
  12. +35 −15 examples/add_sub/client.py
  13. +18 −17 examples/add_sub/model.py
  14. +107 −0 examples/auto_complete/README.md
  15. +212 −0 examples/auto_complete/batch_model.py
  16. +83 −0 examples/auto_complete/client.py
  17. +211 −0 examples/auto_complete/nobatch_model.py
  18. +84 −31 examples/bls/README.md
  19. +82 −0 examples/bls/async_client.py
  20. +59 −0 examples/bls/async_config.pbtxt
  21. +160 −0 examples/bls/async_model.py
  22. +60 −33 examples/bls/{client.py → sync_client.py}
  23. +2 −2 examples/bls/{config.pbtxt → sync_config.pbtxt}
  24. +15 −11 examples/bls/{model.py → sync_model.py}
  25. +163 −0 examples/bls_decoupled/README.md
  26. +68 −0 examples/bls_decoupled/async_client.py
  27. +45 −0 examples/bls_decoupled/async_config.pbtxt
  28. +169 −0 examples/bls_decoupled/async_model.py
  29. +63 −0 examples/bls_decoupled/sync_client.py
  30. +45 −0 examples/bls_decoupled/sync_config.pbtxt
  31. +151 −0 examples/bls_decoupled/sync_model.py
  32. +86 −0 examples/custom_metrics/README.md
  33. +98 −0 examples/custom_metrics/client.py
  34. +65 −0 examples/custom_metrics/config.pbtxt
  35. +174 −0 examples/custom_metrics/model.py
  36. +345 −0 examples/decoupled/README.md
  37. +125 −0 examples/decoupled/repeat_client.py
  38. +62 −0 examples/decoupled/repeat_config.pbtxt
  39. +263 −0 examples/decoupled/repeat_model.py
  40. +129 −0 examples/decoupled/square_client.py
  41. +48 −0 examples/decoupled/square_config.pbtxt
  42. +245 −0 examples/decoupled/square_model.py
  43. +199 −0 examples/instance_kind/README.md
  44. +116 −0 examples/instance_kind/client.py
  45. +42 −0 examples/instance_kind/config.pbtxt
  46. +82 −0 examples/instance_kind/model.py
  47. +1,000 −0 examples/instance_kind/resnet50_labels.txt
  48. +114 −0 examples/jax/README.md
  49. +82 −0 examples/jax/client.py
  50. +59 −0 examples/jax/config.pbtxt
  51. +156 −0 examples/jax/model.py
  52. +71 −0 examples/preprocessing/README.md
  53. +106 −0 examples/preprocessing/client.py
  54. +154 −0 examples/preprocessing/model.py
  55. +71 −0 examples/preprocessing/model_repository/ensemble_python_resnet50/config.pbtxt
  56. +47 −0 examples/preprocessing/model_repository/preprocess/config.pbtxt
  57. +45 −0 examples/preprocessing/model_repository/resnet50_trt/config.pbtxt
  58. +1,000 −0 examples/preprocessing/model_repository/resnet50_trt/labels.txt
  59. +57 −0 examples/preprocessing/onnx_exporter.py
  60. +35 −16 examples/pytorch/client.py
  61. +17 −16 examples/pytorch/model.py
  62. +350 −0 inferentia/README.md
  63. +82 −0 inferentia/qa/Dockerfile.QA
  64. +206 −0 inferentia/qa/setup_test_enviroment_and_test.sh
  65. +898 −0 inferentia/scripts/gen_triton_model.py
  66. +130 −0 inferentia/scripts/setup-pre-container.sh
  67. +186 −0 inferentia/scripts/setup.sh
  68. +48 −0 pyproject.toml
  69. +120 −0 src/correlation_id.cc
  70. +93 −0 src/correlation_id.h
  71. +89 −0 src/gpu_buffers.cc
  72. +67 −0 src/gpu_buffers.h
  73. +122 −0 src/infer_payload.cc
  74. +82 −0 src/infer_payload.h
  75. +474 −96 src/infer_request.cc
  76. +123 −20 src/infer_request.h
  77. +382 −55 src/infer_response.cc
  78. +94 −13 src/infer_response.h
  79. +101 −0 src/infer_trace.cc
  80. +90 −0 src/infer_trace.h
  81. +159 −0 src/ipc_message.cc
  82. +144 −0 src/ipc_message.h
  83. +112 −0 src/memory_manager.cc
  84. +86 −0 src/memory_manager.h
  85. +289 −0 src/message_queue.h
  86. +394 −0 src/metric.cc
  87. +193 −0 src/metric.h
  88. +248 −0 src/metric_family.cc
  89. +154 −0 src/metric_family.h
  90. +267 −0 src/model_loader.cc
  91. +165 −0 src/model_loader.h
  92. +92 −0 src/pb_bls_cancel.cc
  93. +63 −0 src/pb_bls_cancel.h
  94. +93 −0 src/pb_cancel.cc
  95. +64 −0 src/pb_cancel.h
  96. +197 −8 src/pb_env.cc
  97. +14 −3 src/pb_env.h
  98. +51 −1 src/pb_error.cc
  99. +40 −4 src/pb_error.h
  100. +46 −0 src/pb_exception.h
  101. +121 −0 src/pb_log.cc
  102. +91 −0 src/pb_log.h
  103. +0 −266 src/pb_main_utils.cc
  104. +110 −0 src/pb_map.cc
  105. +72 −0 src/pb_map.h
  106. +482 −0 src/pb_memory.cc
  107. +193 −0 src/pb_memory.h
  108. +117 −0 src/pb_metric_reporter.cc
  109. +61 −0 src/pb_metric_reporter.h
  110. +57 −0 src/pb_preferred_memory.h
  111. +171 −0 src/pb_response_iterator.cc
  112. +61 −0 src/pb_response_iterator.h
  113. +126 −0 src/pb_string.cc
  114. +80 −0 src/pb_string.h
  115. +1,904 −449 src/pb_stub.cc
  116. +418 −53 src/pb_stub.h
  117. +29 −7 src/pb_stub_utils.cc
  118. +1 −0 src/pb_stub_utils.h
  119. +322 −242 src/pb_tensor.cc
  120. +119 −83 src/pb_tensor.h
  121. +223 −283 src/pb_utils.cc
  122. +220 −160 src/pb_utils.h
  123. +0 −1,809 src/python.cc
  124. +2,465 −0 src/python_be.cc
  125. +433 −0 src/python_be.h
  126. +541 −0 src/request_executor.cc
  127. +21 −7 src/{pb_main_utils.h → request_executor.h}
  128. +363 −28 src/resources/triton_python_backend_utils.py
  129. +289 −0 src/response_sender.cc
  130. +72 −0 src/response_sender.h
  131. +52 −0 src/scoped_defer.cc
  132. +42 −0 src/scoped_defer.h
  133. +157 −83 src/shm_manager.cc
  134. +207 −30 src/shm_manager.h
  135. +49 −0 src/shm_monitor/CMakeLists.txt
  136. +41 −0 src/shm_monitor/shm_monitor.cc
  137. +900 −0 src/stub_launcher.cc
  138. +221 −0 src/stub_launcher.h
3 changes: 2 additions & 1 deletion .clang-format
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@
BasedOnStyle: Google

IndentWidth: 2
ColumnLimit: 80
ContinuationIndentWidth: 4
UseTab: Never
MaxEmptyLinesToKeep: 2
@@ -34,4 +35,4 @@ BinPackArguments: true
BinPackParameters: true
ConstructorInitializerAllOnOneLineOrOnePerLine: false

IndentCaseLabels: true
IndentCaseLabels: true
48 changes: 48 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

FROM nvcr.io/nvidia/tritonserver:24.03-py3

ARG USERNAME=triton-server

RUN apt-get update \
&& apt-get install -y sudo

RUN pip3 install transformers torch

# Create the user
RUN apt-get update \
&& apt-get install -y sudo \
&& echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME

RUN pip3 install pre-commit ipdb

RUN mkhomedir_helper triton-server

RUN apt-get install -y cmake rapidjson-dev

USER ${USERNAME}
26 changes: 26 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"name": "Python Backend",

"build": {
"dockerfile": "Dockerfile"
},
"customizations": {
"vscode": {
"extensions": [
"ms-python.vscode-pylance",
"ms-python.python",
"ms-vscode.cpptools-extension-pack",
"ms-vscode.cmake-tools",
"github.vscode-pull-request-github"
]
}
},
"postCreateCommand": "sudo chown -R triton-server:triton-server ~/.cache",

"runArgs": [ "--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined", "--gpus=all", "--shm-size=2g", "--ulimit", "stack=67108864" ],
"mounts": [
"source=${localEnv:HOME}/.ssh,target=/home/triton-server/.ssh,type=bind,consistency=cached",
"source=${localEnv:HOME}/.cache/huggingface,target=/home/triton-server/.cache/huggingface,type=bind,consistency=cached"
],
"remoteUser": "triton-server"
}
84 changes: 84 additions & 0 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "CodeQL"

on:
pull_request:

jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write

strategy:
fail-fast: false
matrix:
language: [ 'python' ]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
# Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support

steps:
- name: Checkout repository
uses: actions/checkout@v3

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.

# Details on CodeQL's query packs refer to:
# https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
queries: +security-and-quality


# Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v2

# Command-line programs to run using the OS shell.
# See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun

# If the Autobuild fails above, remove it and uncomment the following three lines.
# modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.

# - run: |
# echo "Run, Build Application using script"
# ./location_of_script_within_repo/buildscript.sh

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
with:
category: "/language:${{matrix.language}}"
38 changes: 38 additions & 0 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: pre-commit

on:
pull_request:

jobs:
pre-commit:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
- uses: pre-commit/action@v3.0.0
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
/build
/.vscode
*.so
builddir

@@ -139,3 +138,6 @@ dmypy.json
# pytype static type analyzer
.pytype/

# vscode
.vscode/settings.json
.vscode/c_cpp_properties.json
73 changes: 73 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

repos:
- repo: https://github.com/timothycrosley/isort
rev: 5.12.0
hooks:
- id: isort
additional_dependencies: [toml]
- repo: https://github.com/psf/black
rev: 23.1.0
hooks:
- id: black
types_or: [python, cython]
- repo: https://github.com/PyCQA/flake8
rev: 5.0.4
hooks:
- id: flake8
args: [--max-line-length=88, --select=C,E,F,W,B,B950, --extend-ignore = E203,E501]
types_or: [python, cython]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v16.0.5
hooks:
- id: clang-format
types_or: [c, c++, cuda, proto, textproto, java]
args: ["-fallback-style=none", "-style=file", "-i"]
- repo: https://github.com/codespell-project/codespell
rev: v2.2.4
hooks:
- id: codespell
additional_dependencies: [tomli]
args: ["--toml", "pyproject.toml"]
exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$)
# More details about these pre-commit hooks here:
# https://pre-commit.com/hooks.html
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: check-case-conflict
- id: check-executables-have-shebangs
- id: check-merge-conflict
- id: check-json
- id: check-toml
- id: check-yaml
- id: check-shebang-scripts-are-executable
- id: end-of-file-fixer
types_or: [c, c++, cuda, proto, textproto, java, python]
- id: mixed-line-ending
- id: requirements-txt-fixer
- id: trailing-whitespace
85 changes: 85 additions & 0 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{
"version": "2.0.0",
"tasks": [
{
"label": "Configure",
"type": "shell",
"command": "cmake",
"args": [
"-DCMAKE_INSTALL_PREFIX:STRING=/opt/tritonserver/",
"-DTRITON_COMMON_REPO_TAG:STRING=main",
"-DTRITON_BACKEND_REPO_TAG:STRING=main",
"-DTRITON_CORE_REPO_TAG:STRING=main",
"-DTRITON_ENABLE_GPU:STRING=ON",
"-DTRITON_ENABLE_NVTX:STRING=ON",
"-DCMAKE_INSTALL_PREFIX:STRING=${workspaceFolder}/build/install",
"-DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=TRUE",
"-DCMAKE_BUILD_TYPE:STRING=Debug",
"-DCMAKE_C_COMPILER:FILEPATH=/usr/bin/gcc",
"-DCMAKE_CXX_COMPILER:FILEPATH=/usr/bin/g++",
"-S${workspaceFolder}",
"-B${workspaceFolder}/build",
"-G",
"Unix Makefiles"
],
"problemMatcher": []
},
{
"label": "Build",
"type": "shell",
"command": "cmake",
"args": [
"--build",
"/${workspaceFolder}/build",
"--config",
"Debug",
"--target",
"all",
"-j",
"18",
"--"
]
},
{
"label": "Install",
"type": "shell",
"command": "cmake",
"args": [
"--build",
"${workspaceFolder}/build",
"--config",
"Debug",
"--target",
"install",
"-j",
"18",
"--"
]
},
{
"label": "Move",
"type": "shell",
"command": "sudo",
"args": [
"cp",
"-r",
"${workspaceFolder}/build/install/backends/python/*",
"/opt/tritonserver/backends/python"
]
},
{
"label": "Build Python Backend",
"dependsOrder": "sequence",
"dependsOn": [
"Configure",
"Build",
"Install",
"Move"
],
"group": {
"kind": "build",
"isDefault": true
}
}
]
}
Loading