33
33
#include < stdio.h>
34
34
#include < sys/wait.h>
35
35
#include < unistd.h>
36
-
37
36
#include < algorithm>
38
37
#include < chrono>
39
38
#include < condition_variable>
53
52
#include < vector>
54
53
55
54
#include " python_host.grpc.pb.h"
56
- #include " src/backends/backend/examples/backend_utils.h"
57
- #include " src/backends/backend/tritonbackend.h"
58
- #include " src/core/json.h"
59
- #include " src/core/tritonserver.h"
60
55
61
- namespace ni = nvidia::inferenceserver;
62
- namespace nib = nvidia::inferenceserver::backend;
56
+ #include " triton/backend/backend_common.h"
57
+ #include " triton/common/triton_json.h"
58
+ #include " triton/common/tritonbackend.h"
59
+ #include " triton/common/tritonserver.h"
63
60
64
- namespace nvidia { namespace inferenceserver { namespace backend {
61
+ namespace triton { namespace backend { namespace python {
65
62
66
63
#define RESPOND_AND_RETURN_IF_ERROR (REQUEST, X ) \
67
64
do { \
@@ -82,10 +79,6 @@ namespace nvidia { namespace inferenceserver { namespace backend {
82
79
} \
83
80
} while (false )
84
81
85
- }}} // namespace nvidia::inferenceserver::backend
86
-
87
- namespace {
88
-
89
82
#define GUARDED_RESPOND_IF_ERROR (RESPONSES, IDX, X ) \
90
83
do { \
91
84
if ((RESPONSES)[IDX] != nullptr ) { \
@@ -130,18 +123,18 @@ class ModelInstanceState {
130
123
131
124
// / Load Triton inputs to the appropriate Protobufs
132
125
TRITONSERVER_Error* GetInputTensor (
133
- const uint32_t iidx, TRITONBACKEND_Request* request,
134
- ni::Tensor* input_tensor, std::vector<TRITONBACKEND_Response*>& responses,
135
- size_t r, uint32_t & batch_size);
126
+ const uint32_t iidx, TRITONBACKEND_Request* request, Tensor* input_tensor,
127
+ std::vector<TRITONBACKEND_Response*>& responses, size_t r ,
128
+ uint32_t & batch_size);
136
129
137
130
// TODO: Create getter and setters
138
- std::unique_ptr<ni:: PythonInterpreter::Stub> stub;
131
+ std::unique_ptr<PythonInterpreter::Stub> stub;
139
132
140
133
private:
141
134
ModelInstanceState (
142
135
ModelState* model_state, TRITONBACKEND_ModelInstance* model_instance,
143
136
const char * name, const TRITONSERVER_InstanceGroupKind kind,
144
- const int32_t device_id, ni ::TritonJson::Value&& model_config,
137
+ const int32_t device_id, triton::common ::TritonJson::Value&& model_config,
145
138
TRITONBACKEND_Model* trition_model);
146
139
147
140
TRITONSERVER_Error* ConnectPythonInterpreter ();
@@ -156,7 +149,7 @@ class ModelInstanceState {
156
149
bool connected_ = false ;
157
150
158
151
public:
159
- ni ::TritonJson::Value model_config;
152
+ triton::common ::TritonJson::Value model_config;
160
153
161
154
private:
162
155
TRITONBACKEND_Model* triton_model_;
@@ -176,7 +169,7 @@ class ModelState {
176
169
uint64_t ModelVersion () const { return model_version_; }
177
170
178
171
// Get backend state
179
- :: BackendState* BackendState () { return backend_state_; }
172
+ BackendState* StateForBackend () { return backend_state_; }
180
173
181
174
// Get Model Path
182
175
const char * ModelPath () { return model_path_; }
@@ -185,15 +178,15 @@ class ModelState {
185
178
ModelState (
186
179
TRITONSERVER_Server* triton_server, TRITONBACKEND_Model* triton_model,
187
180
const char * model_name, const uint64_t model_version,
188
- ni:: TritonJson::Value&& model_config, ::BackendState* backend_state ,
189
- const char * model_path);
181
+ triton::common:: TritonJson::Value&& model_config,
182
+ BackendState* backend_state, const char * model_path);
190
183
191
184
TRITONSERVER_Server* triton_server_;
192
185
TRITONBACKEND_Model* triton_model_;
193
186
const std::string model_name_;
194
187
const uint64_t model_version_;
195
- ni ::TritonJson::Value model_config_;
196
- :: BackendState* backend_state_;
188
+ triton::common ::TritonJson::Value model_config_;
189
+ BackendState* backend_state_;
197
190
const char * model_path_;
198
191
};
199
192
@@ -238,10 +231,10 @@ ModelInstanceState::CreatePythonInterpreter()
238
231
if (interpreter_pid_ == 0 ) {
239
232
// Use the python available in $PATH
240
233
std::string python_interpreter_path =
241
- model_state_->BackendState ()->python_runtime ;
234
+ model_state_->StateForBackend ()->python_runtime ;
242
235
243
236
std::stringstream ss;
244
- ss << model_state_->BackendState ()->python_lib << " /startup.py" ;
237
+ ss << model_state_->StateForBackend ()->python_lib << " /startup.py" ;
245
238
std::string python_interpreter_startup = ss.str ();
246
239
247
240
subinterpreter_commandline[0 ] = python_interpreter_path.c_str ();
@@ -278,10 +271,10 @@ ModelInstanceState::ConnectPythonInterpreter()
278
271
auto grpc_channel =
279
272
grpc::CreateChannel (domain_socket_, grpc::InsecureChannelCredentials ());
280
273
281
- stub = ni:: PythonInterpreter::NewStub (grpc_channel);
274
+ stub = PythonInterpreter::NewStub (grpc_channel);
282
275
283
- std::shared_ptr<ni:: InitializationCommand> initialization_params (
284
- new ni:: InitializationCommand ());
276
+ std::shared_ptr<InitializationCommand> initialization_params (
277
+ new InitializationCommand ());
285
278
286
279
std::vector<std::string> keys;
287
280
LOG_IF_ERROR (model_config.Members (&keys), " can't get key names" );
@@ -294,7 +287,7 @@ ModelInstanceState::ConnectPythonInterpreter()
294
287
value_pair->set_value (val);
295
288
};
296
289
297
- ni ::TritonJson::WriteBuffer buffer;
290
+ triton::common ::TritonJson::WriteBuffer buffer;
298
291
model_config.Write (&buffer);
299
292
300
293
insert_model_param (" model_config" , std::move (buffer.MutableContents ()));
@@ -312,7 +305,7 @@ ModelInstanceState::ConnectPythonInterpreter()
312
305
constexpr uint8_t conn_attempts = 5 ;
313
306
for (int i = 0 ; i < conn_attempts; ++i) {
314
307
grpc::ClientContext context;
315
- ni:: Empty null_msg;
308
+ Empty null_msg;
316
309
status = stub->Init (&context, *initialization_params, &null_msg);
317
310
if (status.ok ()) {
318
311
LOG_MESSAGE (
@@ -334,7 +327,7 @@ ModelInstanceState::ConnectPythonInterpreter()
334
327
ModelInstanceState::ModelInstanceState (
335
328
ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
336
329
const char * name, const TRITONSERVER_InstanceGroupKind kind,
337
- const int32_t device_id, ni ::TritonJson::Value&& model_config,
330
+ const int32_t device_id, triton::common ::TritonJson::Value&& model_config,
338
331
TRITONBACKEND_Model* triton_model)
339
332
: model_state_(model_state), triton_model_instance_(triton_model_instance),
340
333
name_ (name), kind_(kind), device_id_(device_id),
@@ -372,7 +365,7 @@ ModelInstanceState::Create(
372
365
RETURN_IF_ERROR (
373
366
TRITONSERVER_MessageSerializeToJson (config_message, &buffer, &byte_size));
374
367
375
- ni ::TritonJson::Value model_config;
368
+ triton::common ::TritonJson::Value model_config;
376
369
377
370
TRITONSERVER_Error* err = model_config.Parse (buffer, byte_size);
378
371
RETURN_IF_ERROR (TRITONSERVER_MessageDelete (config_message));
@@ -392,7 +385,7 @@ ModelInstanceState::~ModelInstanceState()
392
385
{
393
386
// Close python interpreter.
394
387
grpc::ClientContext context;
395
- ni:: Empty null_msg;
388
+ Empty null_msg;
396
389
397
390
if (connected_) {
398
391
auto err = stub->Fini (&context, null_msg, &null_msg);
@@ -424,9 +417,9 @@ ModelInstanceState::~ModelInstanceState()
424
417
425
418
TRITONSERVER_Error*
426
419
ModelInstanceState::GetInputTensor (
427
- const uint32_t iidx, TRITONBACKEND_Request* request,
428
- ni::Tensor* input_tensor, std::vector<TRITONBACKEND_Response*>& responses,
429
- size_t r, uint32_t & batch_size)
420
+ const uint32_t iidx, TRITONBACKEND_Request* request, Tensor* input_tensor,
421
+ std::vector<TRITONBACKEND_Response*>& responses, size_t r ,
422
+ uint32_t & batch_size)
430
423
{
431
424
const char * input_name;
432
425
// Load iidx'th input name
@@ -495,7 +488,7 @@ ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
495
488
RETURN_IF_ERROR (
496
489
TRITONSERVER_MessageSerializeToJson (config_message, &buffer, &byte_size));
497
490
498
- ni ::TritonJson::Value model_config;
491
+ triton::common ::TritonJson::Value model_config;
499
492
TRITONSERVER_Error* err = model_config.Parse (buffer, byte_size);
500
493
RETURN_IF_ERROR (TRITONSERVER_MessageDelete (config_message));
501
494
RETURN_IF_ERROR (err);
@@ -514,7 +507,7 @@ ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
514
507
515
508
void * bstate;
516
509
RETURN_IF_ERROR (TRITONBACKEND_BackendState (backend, &bstate));
517
- :: BackendState* backend_state = reinterpret_cast <:: BackendState*>(bstate);
510
+ BackendState* backend_state = reinterpret_cast <BackendState*>(bstate);
518
511
519
512
const char * path = nullptr ;
520
513
TRITONBACKEND_ModelArtifactType artifact_type;
@@ -539,17 +532,15 @@ ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
539
532
ModelState::ModelState (
540
533
TRITONSERVER_Server* triton_server, TRITONBACKEND_Model* triton_model,
541
534
const char * model_name, const uint64_t model_version,
542
- ni:: TritonJson::Value&& model_config, ::BackendState* backend_state ,
543
- const char * model_path)
535
+ triton::common:: TritonJson::Value&& model_config,
536
+ BackendState* backend_state, const char * model_path)
544
537
: triton_server_(triton_server), triton_model_(triton_model),
545
538
model_name_ (model_name), model_version_(model_version),
546
539
model_config_(std::move(model_config)), backend_state_(backend_state),
547
540
model_path_(model_path)
548
541
{
549
542
}
550
543
551
- } // namespace
552
-
553
544
extern " C" {
554
545
555
546
TRITONSERVER_Error*
@@ -590,24 +581,24 @@ TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
590
581
TRITONSERVER_LOG_VERBOSE,
591
582
(std::string (" backend configuration:\n " ) + buffer).c_str ());
592
583
593
- ni ::TritonJson::Value backend_config;
584
+ triton::common ::TritonJson::Value backend_config;
594
585
if (byte_size != 0 ) {
595
586
RETURN_IF_ERROR (backend_config.Parse (buffer, byte_size));
596
587
}
597
588
598
589
std::unique_ptr<BackendState> backend_state (new BackendState ());
599
- ni ::TritonJson::Value cmdline;
590
+ triton::common ::TritonJson::Value cmdline;
600
591
bool found_py_lib_config = false ;
601
592
bool found_py_runtime_config = false ;
602
593
603
594
if (backend_config.Find (" cmdline" , &cmdline)) {
604
- ni ::TritonJson::Value python_lib;
595
+ triton::common ::TritonJson::Value python_lib;
605
596
if (cmdline.Find (" python-lib" , &python_lib)) {
606
597
RETURN_IF_ERROR (python_lib.AsString (&backend_state->python_lib ));
607
598
found_py_lib_config = true ;
608
599
}
609
600
610
- ni ::TritonJson::Value python_runtime;
601
+ triton::common ::TritonJson::Value python_runtime;
611
602
if (cmdline.Find (" python-runtime" , &python_runtime)) {
612
603
RETURN_IF_ERROR (python_runtime.AsString (&backend_state->python_runtime ));
613
604
found_py_runtime_config = true ;
@@ -756,11 +747,11 @@ TRITONBACKEND_ModelInstanceExecute(
756
747
}
757
748
758
749
// Create ExecuteRequest
759
- ni:: ExecuteRequest execute_request;
750
+ ExecuteRequest execute_request;
760
751
for (uint32_t r = 0 ; r < request_count; ++r) {
761
752
TRITONBACKEND_Request* request = requests[r];
762
753
763
- ni:: InferenceRequest* inference_request = execute_request.add_requests ();
754
+ InferenceRequest* inference_request = execute_request.add_requests ();
764
755
765
756
uint32_t requested_input_count = 0 ;
766
757
GUARDED_RESPOND_IF_ERROR (
@@ -774,7 +765,7 @@ TRITONBACKEND_ModelInstanceExecute(
774
765
775
766
uint32_t batch_size = 0 ;
776
767
for (size_t iidx = 0 ; iidx < requested_input_count; ++iidx) {
777
- ni:: Tensor* input_tensor = inference_request->add_inputs ();
768
+ Tensor* input_tensor = inference_request->add_inputs ();
778
769
GUARDED_RESPOND_IF_ERROR (
779
770
responses, r,
780
771
instance_state->GetInputTensor (
@@ -806,7 +797,7 @@ TRITONBACKEND_ModelInstanceExecute(
806
797
807
798
// ExecuteResponse
808
799
grpc::ClientContext context;
809
- ni:: ExecuteResponse execute_response;
800
+ ExecuteResponse execute_response;
810
801
811
802
uint64_t compute_start_ns = 0 ;
812
803
SET_TIMESTAMP (compute_start_ns);
@@ -860,7 +851,7 @@ TRITONBACKEND_ModelInstanceExecute(
860
851
uint32_t requested_output_count = 0 ;
861
852
862
853
// Get response r
863
- ni:: InferenceResponse inference_response = execute_response.responses (r);
854
+ InferenceResponse inference_response = execute_response.responses (r);
864
855
865
856
if (inference_response.failed ()) {
866
857
TRITONSERVER_Error* err = TRITONSERVER_ErrorNew (
@@ -883,7 +874,7 @@ TRITONBACKEND_ModelInstanceExecute(
883
874
TRITONBACKEND_RequestOutputCount (request, &requested_output_count));
884
875
for (size_t j = 0 ; j < requested_output_count; ++j) {
885
876
// Prepare output buffers.
886
- const ni:: Tensor python_output_result = inference_response.outputs (j);
877
+ const Tensor python_output_result = inference_response.outputs (j);
887
878
TRITONBACKEND_Output* triton_output;
888
879
TRITONSERVER_DataType triton_dt =
889
880
static_cast <TRITONSERVER_DataType>(python_output_result.dtype ());
@@ -901,7 +892,7 @@ TRITONBACKEND_ModelInstanceExecute(
901
892
902
893
std::vector<int64_t > output_dims (
903
894
python_output_dims.begin (), python_output_dims.end ());
904
- int64_t output_byte_size = nib:: GetByteSize (triton_dt, output_dims);
895
+ int64_t output_byte_size = GetByteSize (triton_dt, output_dims);
905
896
void * output_buffer;
906
897
907
898
TRITONSERVER_MemoryType output_memory_type = TRITONSERVER_MEMORY_CPU;
@@ -932,7 +923,7 @@ TRITONBACKEND_ModelInstanceExecute(
932
923
auto output_response_tensor = std::find_if (
933
924
inference_response.outputs ().begin (),
934
925
inference_response.outputs ().end (),
935
- [&output_tensor_name](const ni:: Tensor& itr) {
926
+ [&output_tensor_name](const Tensor& itr) {
936
927
return itr.name () == output_tensor_name;
937
928
});
938
929
@@ -1028,3 +1019,5 @@ TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
1028
1019
}
1029
1020
1030
1021
} // extern "C"
1022
+
1023
+ }}} // namespace triton::backend::python
0 commit comments