特点:
onnxruntime支持if,动态输入
环境准备:
-
pip install onnx==1.9.0
pip install onnxruntime-gpu==1.9.0
- 报错 [ONNXRuntimeError] : 9 : NOT_IMPLEMENTED : Could not find an implementation for xxx node with name 'yyy'
解决:
onnx 版本对应关系 ,在本机安装对应版本的 CUDA cudnn - 报错 libonnxruntime.so.1.9.0: cannot open shared object file: No such file or dictory
解决:
下载 onnxruntime lib
export LD_LIBRARY_PATH+=":/home/xxx/onnxruntime-linux-x64-gpu-1.9.0/lib"
source .bashrc
或者source .profile
使得onnx找得到.so
转换代码:
import torch
# net
net_torch = net().cuda()
net_torch.eval()
# input data
input_0 = torch.randn(10, 3, 20).cuda()
input_1 = torch.randint(low=0, high=num_lane, size=(6, 2, 100), dtype=torch.long).cuda()
inputs = (
input_0,
input_1,
)
inputs_dict = {
'input_0': input_0,
'input_1': input_1,
}
for key in inputs_dict.keys():
inputs_dict[key] = inputs_dict[key].detach().cpu().numpy()
# torch forward
outputs = net_torch(*inputs)
print(outputs.shape)
# convert and save
onnx_path = "./save_onnx/test.onnx"
output_gragh = torch.onnx.export(
model=net_torch,
args=(
input_0,
input_1,
),
f=onnx_path,
input_names=[
'input_0',
'input_1',
],
output_names=['output_0', 'output_1'],
dynamic_axes={
'input_0': {0: 'num_0'},
'input_1': {0: 'num_0'},
'output_0': {0: 'num_0'},
'output_1': {0: 'num_0'},
},
verbose=True,
opset_version=11,
)
# onnxruntime test
import onnxruntime
import numpy as np
import torch
def get_input_np(num_0, num_1):
input_0 = torch.randn([num_0, 20, 5], device='cuda')
input_1 = torch.randn([num_0, 2], device='cuda')
input_2 = torch.randn([num_0, 2], device='cuda')
input_3 = torch.randn([num_1, 2], device='cuda')
input_4 = torch.randn([num_1, 2], device='cuda')
inputs = (
input_0,
input_1,
input_2,
input_3,
input_4,
)
inputs_dict = {
'input_0': input_0,
'input_1': input_1,
'input_2': input_2,
'input_3': input_3,
'input_4': input_4,
}
for key in inputs_dict.keys():
inputs_dict[key] = inputs_dict[key].detach().cpu().numpy()
return inputs_dict
sess = onnxruntime.InferenceSession('./save_onnx/hivt.onnx', providers=['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider'])
input_np = get_input_np(15, 5000)
output = sess.run(None, input_np)
print(output[0].shape)
# onnxruntime test with plugin
import onnxruntime as ort
sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
# sess_options.register_custom_ops_library('./xxx.so')
ort_sess = ort.InferenceSession(onnx_path, sess_options, providers=['CUDAExecutionProvider'])
ort_out_gpu = ort_sess.run(None, inputs_dict)