Skip to content

Commit 655a23f

Browse files
karim-vadjslhclKarim Vadsariyagithub-actions[bot]
authored
[onnxruntime/build] Add new flag enable_generic_interface to build primary EPs by default (#23342)
### Description - Add new build flag in build.py to build onnxruntime.dll supporting interfaces for all primary EPs( QNN, TensoRT, OpenVino, VitisAI). - Modify onnxruntime.dll/onnxruntime_shared.dll build settings to remove dependency of IHV SDK Toolset to be installed on the system. - Change CMake variables to be explicit when building EP vs ORT. e.g. onnxruntime_USE_TENSORRT vs onnxruntime_USE_TENSORRT_INTERFACE, to evolve the build system to build ORT independent of EPs. ### Motivation and Context Changes in the build system required to evolve the repo to build the components independently while removing unnecessary dependencies --------- Co-authored-by: Lei Cao <[email protected]> Co-authored-by: Karim Vadsariya <[email protected]> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent a770a8d commit 655a23f

File tree

5 files changed

+71
-24
lines changed

5 files changed

+71
-24
lines changed

cmake/CMakeLists.txt

+15-9
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,12 @@ option(onnxruntime_USE_AZURE "Build with azure inferencing support" OFF)
259259
option(onnxruntime_USE_LOCK_FREE_QUEUE "Build with lock-free task queue for threadpool." OFF)
260260
option(onnxruntime_FORCE_GENERIC_ALGORITHMS "Disable optimized arch-specific algorithms. Use only for testing and debugging generic algorithms." OFF)
261261

262+
option(onnxruntime_USE_TENSORRT_INTERFACE "Build ONNXRuntime shared lib which is compatible with TensorRT EP interface" OFF)
263+
option(onnxruntime_USE_CUDA_INTERFACE "Build ONNXRuntime shared lib which is compatible with Cuda EP interface" OFF)
264+
option(onnxruntime_USE_OPENVINO_INTERFACE "Build ONNXRuntime shared lib which is compatible with OpenVINO EP interface" OFF)
265+
option(onnxruntime_USE_VITISAI_INTERFACE "Build ONNXRuntime shared lib which is compatible with Vitis-AI EP interface" OFF)
266+
option(onnxruntime_USE_QNN_INTERFACE "Build ONNXRuntime shared lib which is compatible with QNN EP interface" OFF)
267+
262268
# ENABLE_TRAINING includes all training functionality
263269
# The following 2 entry points
264270
# 1. ORTModule
@@ -703,7 +709,7 @@ if (WIN32)
703709
# structure was padded due to __declspec(align())
704710
list(APPEND ORT_WARNING_FLAGS "/wd4324")
705711
# warning C4800: Implicit conversion from 'X' to bool. Possible information loss
706-
if (onnxruntime_USE_OPENVINO)
712+
if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE)
707713
list(APPEND ORT_WARNING_FLAGS "/wd4800")
708714
endif()
709715
# operator 'operator-name': deprecated between enumerations of different types
@@ -864,7 +870,7 @@ else()
864870
set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF)
865871
endif()
866872

867-
if (onnxruntime_USE_CUDA)
873+
if (onnxruntime_USE_CUDA OR onnxruntime_USE_CUDA_INTERFACE)
868874
list(APPEND ORT_PROVIDER_FLAGS -DUSE_CUDA=1)
869875
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CUDA=1)
870876
list(APPEND ONNXRUNTIME_PROVIDER_NAMES cuda)
@@ -888,7 +894,7 @@ if (onnxruntime_USE_CUDA)
888894
endif()
889895
endif()
890896

891-
if (onnxruntime_USE_VITISAI)
897+
if (onnxruntime_USE_VITISAI OR onnxruntime_USE_VITISAI_INTERFACE)
892898
list(APPEND ORT_PROVIDER_FLAGS -DUSE_VITISAI=1)
893899
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_VITISAI=1)
894900
list(APPEND ONNXRUNTIME_PROVIDER_NAMES vitisai)
@@ -898,12 +904,12 @@ if (onnxruntime_USE_DNNL)
898904
list(APPEND ONNXRUNTIME_PROVIDER_NAMES dnnl)
899905
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_DNNL=1)
900906
endif()
901-
if (onnxruntime_USE_OPENVINO)
907+
if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE)
902908
list(APPEND ORT_PROVIDER_FLAGS -DUSE_OPENVINO=1)
903909
list(APPEND ONNXRUNTIME_PROVIDER_NAMES openvino)
904910
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_OPENVINO=1)
905911
endif()
906-
if (onnxruntime_USE_TENSORRT)
912+
if (onnxruntime_USE_TENSORRT OR onnxruntime_USE_TENSORRT_INTERFACE)
907913
list(APPEND ORT_PROVIDER_FLAGS -DUSE_TENSORRT=1)
908914
#TODO: remove the following line and change the test code in onnxruntime_shared_lib_test to use the new EP API.
909915
list(APPEND ONNXRUNTIME_PROVIDER_NAMES tensorrt)
@@ -929,7 +935,7 @@ if (onnxruntime_USE_JSEP)
929935
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_JSEP=1)
930936
list(APPEND ONNXRUNTIME_PROVIDER_NAMES js)
931937
endif()
932-
if (onnxruntime_USE_QNN)
938+
if (onnxruntime_USE_QNN OR onnxruntime_USE_QNN_INTERFACE)
933939
list(APPEND ORT_PROVIDER_FLAGS -DUSE_QNN=1)
934940
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_QNN=1)
935941
list(APPEND ONNXRUNTIME_PROVIDER_NAMES qnn)
@@ -957,7 +963,7 @@ if (onnxruntime_USE_QNN)
957963
endif()
958964
endif()
959965

960-
if (MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
966+
if ((NOT onnxruntime_USE_QNN_INTERFACE) AND (MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux"))
961967
file(GLOB QNN_LIB_FILES LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/libQnn*.so"
962968
"${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/Qnn*.dll"
963969
"${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/libHtpPrepare.so"
@@ -1416,7 +1422,7 @@ if (onnxruntime_ENABLE_TRAINING_APIS)
14161422
)
14171423
endif()
14181424

1419-
if (onnxruntime_USE_OPENVINO)
1425+
if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE)
14201426

14211427
add_definitions(-DUSE_OPENVINO=1)
14221428

@@ -1429,7 +1435,7 @@ if (onnxruntime_USE_OPENVINO)
14291435
add_definitions(-DOPENVINO_CONFIG_GPU=1)
14301436
endif()
14311437

1432-
if (onnxruntime_USE_OPENVINO_CPU)
1438+
if (onnxruntime_USE_OPENVINO_CPU OR onnxruntime_USE_OPENVINO_INTERFACE) # OpenVino CPU interface is default built.
14331439
add_definitions(-DOPENVINO_CONFIG_CPU=1)
14341440
endif()
14351441

onnxruntime/core/providers/shared_library/provider_interfaces.h

-5
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,6 @@ struct ProviderHost {
178178
virtual std::string demangle(const char* name) = 0;
179179
virtual std::string demangle(const std::string& name) = 0;
180180

181-
#ifdef USE_CUDA
182181
virtual std::unique_ptr<IAllocator> CreateCUDAAllocator(int16_t device_id, const char* name) = 0;
183182
virtual std::unique_ptr<IAllocator> CreateCUDAPinnedAllocator(const char* name) = 0;
184183
virtual std::unique_ptr<IDataTransfer> CreateGPUDataTransfer() = 0;
@@ -190,7 +189,6 @@ struct ProviderHost {
190189

191190
virtual Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0;
192191
virtual void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0;
193-
#endif
194192

195193
#ifdef USE_MIGRAPHX
196194
virtual std::unique_ptr<IAllocator> CreateMIGraphXAllocator(int16_t device_id, const char* name) = 0;
@@ -200,7 +198,6 @@ struct ProviderHost {
200198
#ifdef USE_ROCM
201199
virtual std::unique_ptr<IAllocator> CreateROCMAllocator(int16_t device_id, const char* name) = 0;
202200
virtual std::unique_ptr<IAllocator> CreateROCMPinnedAllocator(const char* name) = 0;
203-
virtual std::unique_ptr<IDataTransfer> CreateGPUDataTransfer() = 0;
204201

205202
virtual void rocm__Impl_Cast(void* stream, const int64_t* input_data, int32_t* output_data, size_t count) = 0;
206203
virtual void rocm__Impl_Cast(void* stream, const int32_t* input_data, int64_t* output_data, size_t count) = 0;
@@ -1256,9 +1253,7 @@ struct ProviderHost {
12561253
virtual training::DistributedRunContext& GetDistributedRunContextInstance() = 0;
12571254
#endif
12581255

1259-
#if defined(USE_CUDA) || defined(USE_ROCM)
12601256
virtual PhiloxGenerator& PhiloxGenerator__Default() = 0;
1261-
#endif
12621257

12631258
#ifdef ENABLE_TRAINING_TORCH_INTEROP
12641259
virtual void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) = 0;

onnxruntime/core/session/provider_bridge_ort.cc

+2-5
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,8 @@ struct ProviderHostImpl : ProviderHost {
258258
void* CPUAllocator__Alloc(CPUAllocator* p, size_t size) override { return p->CPUAllocator::Alloc(size); }
259259
void CPUAllocator__Free(CPUAllocator* p, void* allocation) override { return p->CPUAllocator::Free(allocation); }
260260

261-
#ifdef USE_CUDA
262261
std::unique_ptr<IAllocator> CreateCUDAAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_CUDA().CreateCUDAAllocator(device_id, name); }
263262
std::unique_ptr<IAllocator> CreateCUDAPinnedAllocator(const char* name) override { return GetProviderInfo_CUDA().CreateCUDAPinnedAllocator(name); }
264-
std::unique_ptr<IDataTransfer> CreateGPUDataTransfer() override { return GetProviderInfo_CUDA().CreateGPUDataTransfer(); }
265263

266264
void cuda__Impl_Cast(void* stream, const int64_t* input_data, int32_t* output_data, size_t count) override { return GetProviderInfo_CUDA().cuda__Impl_Cast(stream, input_data, output_data, count); }
267265
void cuda__Impl_Cast(void* stream, const int32_t* input_data, int64_t* output_data, size_t count) override { return GetProviderInfo_CUDA().cuda__Impl_Cast(stream, input_data, output_data, count); }
@@ -271,7 +269,6 @@ struct ProviderHostImpl : ProviderHost {
271269

272270
Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { return GetProviderInfo_CUDA().CudaCall_false(retCode, exprString, libName, successCode, msg, file, line); }
273271
void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { GetProviderInfo_CUDA().CudaCall_true(retCode, exprString, libName, successCode, msg, file, line); }
274-
#endif
275272

276273
#ifdef USE_MIGRAPHX
277274
std::unique_ptr<IAllocator> CreateMIGraphXAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_MIGraphX().CreateMIGraphXAllocator(device_id, name); }
@@ -291,6 +288,8 @@ struct ProviderHostImpl : ProviderHost {
291288

292289
Status RocmCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { return GetProviderInfo_ROCM().RocmCall_false(retCode, exprString, libName, successCode, msg, file, line); }
293290
void RocmCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { GetProviderInfo_ROCM().RocmCall_true(retCode, exprString, libName, successCode, msg, file, line); }
291+
#else
292+
std::unique_ptr<IDataTransfer> CreateGPUDataTransfer() override { return GetProviderInfo_CUDA().CreateGPUDataTransfer(); }
294293
#endif
295294

296295
std::string GetEnvironmentVar(const std::string& var_name) override { return Env::Default().GetEnvironmentVar(var_name); }
@@ -1560,9 +1559,7 @@ struct ProviderHostImpl : ProviderHost {
15601559
training::DistributedRunContext& GetDistributedRunContextInstance() override { return training::DistributedRunContext::GetInstance(); }
15611560
#endif
15621561

1563-
#if defined(USE_CUDA) || defined(USE_ROCM)
15641562
PhiloxGenerator& PhiloxGenerator__Default() override { return PhiloxGenerator::Default(); }
1565-
#endif
15661563

15671564
#ifdef ENABLE_TRAINING_TORCH_INTEROP
15681565
void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) override { p->PythonOpBase::Init(info); }

tools/ci_build/build.py

+35-5
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,12 @@ def convert_arg_line_to_args(self, arg_line):
782782
parser.add_argument("--use_triton_kernel", action="store_true", help="Use triton compiled kernels")
783783
parser.add_argument("--use_lock_free_queue", action="store_true", help="Use lock-free task queue for threadpool.")
784784

785+
parser.add_argument(
786+
"--enable_generic_interface",
787+
action="store_true",
788+
help="build ORT shared library and compatible bridge with primary EPs(tensorRT, OpenVino, Qnn, vitisai) but not tests",
789+
)
790+
785791
if not is_windows():
786792
parser.add_argument(
787793
"--allow_running_as_root",
@@ -1042,6 +1048,12 @@ def generate_build_tree(
10421048
"-Donnxruntime_USE_TENSORRT=" + ("ON" if args.use_tensorrt else "OFF"),
10431049
"-Donnxruntime_USE_TENSORRT_BUILTIN_PARSER="
10441050
+ ("ON" if args.use_tensorrt_builtin_parser and not args.use_tensorrt_oss_parser else "OFF"),
1051+
# interface variables are used only for building onnxruntime/onnxruntime_shared.dll but not EPs
1052+
"-Donnxruntime_USE_TENSORRT_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"),
1053+
"-Donnxruntime_USE_CUDA_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"),
1054+
"-Donnxruntime_USE_OPENVINO_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"),
1055+
"-Donnxruntime_USE_VITISAI_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"),
1056+
"-Donnxruntime_USE_QNN_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"),
10451057
# set vars for migraphx
10461058
"-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"),
10471059
"-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"),
@@ -1372,6 +1384,8 @@ def generate_build_tree(
13721384
cmake_args += ["-Donnxruntime_BUILD_QNN_EP_STATIC_LIB=ON"]
13731385
if args.android and args.use_qnn != "static_lib":
13741386
raise BuildError("Only support Android + QNN builds with QNN EP built as a static library.")
1387+
if args.use_qnn == "static_lib" and args.enable_generic_interface:
1388+
raise BuildError("Generic ORT interface only supported with QNN EP built as a shared library.")
13751389

13761390
if args.use_coreml:
13771391
cmake_args += ["-Donnxruntime_USE_COREML=ON"]
@@ -1529,6 +1543,12 @@ def generate_build_tree(
15291543
"-Donnxruntime_USE_FULL_PROTOBUF=ON",
15301544
]
15311545

1546+
# When this flag is enabled, that means we only build ONNXRuntime shared library, expecting some compatible EP
1547+
# shared lib being build in a seperate process. So we skip the test for now as ONNXRuntime shared lib built under
1548+
# this flag is not expected to work alone
1549+
if args.enable_generic_interface:
1550+
cmake_args += ["-Donnxruntime_BUILD_UNIT_TESTS=OFF"]
1551+
15321552
if args.enable_lazy_tensor:
15331553
import torch
15341554

@@ -2649,6 +2669,9 @@ def main():
26492669
# Disable ONNX Runtime's builtin memory checker
26502670
args.disable_memleak_checker = True
26512671

2672+
if args.enable_generic_interface:
2673+
args.test = False
2674+
26522675
# If there was no explicit argument saying what to do, default
26532676
# to update, build and test (for native builds).
26542677
if not (args.update or args.clean or args.build or args.test or args.gen_doc):
@@ -2752,7 +2775,10 @@ def main():
27522775
source_dir = os.path.normpath(os.path.join(script_dir, "..", ".."))
27532776

27542777
# if using cuda, setup cuda paths and env vars
2755-
cuda_home, cudnn_home = setup_cuda_vars(args)
2778+
cuda_home = ""
2779+
cudnn_home = ""
2780+
if args.use_cuda:
2781+
cuda_home, cudnn_home = setup_cuda_vars(args)
27562782

27572783
mpi_home = args.mpi_home
27582784
nccl_home = args.nccl_home
@@ -2765,10 +2791,14 @@ def main():
27652791
armnn_home = args.armnn_home
27662792
armnn_libs = args.armnn_libs
27672793

2768-
qnn_home = args.qnn_home
2794+
qnn_home = ""
2795+
if args.use_qnn:
2796+
qnn_home = args.qnn_home
27692797

27702798
# if using tensorrt, setup tensorrt paths
2771-
tensorrt_home = setup_tensorrt_vars(args)
2799+
tensorrt_home = ""
2800+
if args.use_tensorrt:
2801+
tensorrt_home = setup_tensorrt_vars(args)
27722802

27732803
# if using migraphx, setup migraphx paths
27742804
migraphx_home = setup_migraphx_vars(args)
@@ -2853,9 +2883,9 @@ def main():
28532883
toolset = "host=" + host_arch + ",version=" + args.msvc_toolset
28542884
else:
28552885
toolset = "host=" + host_arch
2856-
if args.cuda_version:
2886+
if args.use_cuda and args.cuda_version:
28572887
toolset += ",cuda=" + args.cuda_version
2858-
elif args.cuda_home:
2888+
elif args.use_cuda and args.cuda_home:
28592889
toolset += ",cuda=" + args.cuda_home
28602890
if args.windows_sdk_version:
28612891
target_arch += ",version=" + args.windows_sdk_version

tools/ci_build/github/azure-pipelines/win-ci-pipeline.yml

+19
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,25 @@ stages:
177177
WITH_CACHE: false
178178
MachinePool: 'onnxruntime-Win-CPU-2022'
179179

180+
- stage: x64_release_ep_generic_interface
181+
dependsOn: []
182+
jobs:
183+
- template: templates/jobs/win-ci-vs-2022-job.yml
184+
parameters:
185+
BuildConfig: 'RelWithDebInfo'
186+
buildArch: x64
187+
additionalBuildFlags: --enable_generic_interface
188+
msbuildPlatform: x64
189+
isX86: false
190+
job_name_suffix: x64_release_ep_generic_interface
191+
RunOnnxRuntimeTests: false # --enable_generic_interface does not build tests
192+
EnablePython: false
193+
isTraining: false
194+
ORT_EP_NAME: CPU
195+
GenerateDocumentation: false
196+
WITH_CACHE: false
197+
MachinePool: 'onnxruntime-Win-CPU-2022'
198+
180199
- stage: x86_release
181200
dependsOn: []
182201
jobs:

0 commit comments

Comments
 (0)