-
Notifications
You must be signed in to change notification settings - Fork 0
/
backend_onnxruntime.py
75 lines (64 loc) · 2.75 KB
/
backend_onnxruntime.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import onnxruntime
import time
import backend
import numpy as np
import utils
from cuda_profiler import cuda_profiler_start, cuda_profiler_stop
class BackendOnnxruntime(backend.Backend):
def __init__(self):
super(BackendOnnxruntime, self).__init__()
self.session = None
def name(self):
return "onnxruntime"
def version(self):
return onnxruntime.__version__
def load(self, model, enable_profiling=False, cuda_profile=False, batch_size=1):
utils.debug("running on {}".format(onnxruntime.get_device()))
utils.debug("model path = {}".format(model.path))
self.model = model
self.enable_profiling = enable_profiling
self.cuda_profile = cuda_profile
# https://microsoft.github.io/onnxruntime/auto_examples/plot_profiling.html
options = onnxruntime.SessionOptions()
if enable_profiling:
options.enable_profiling = True
if utils.DEBUG:
options.session_log_severity_level = 0
options.session_thread_pool_size = 2
options.enable_sequential_execution = True
options.set_graph_optimization_level(3)
self.session = onnxruntime.InferenceSession(model.path, options)
self.inputs = [meta.name for meta in self.session.get_inputs()]
self.outputs = [meta.name for meta in self.session.get_outputs()]
utils.debug("inputs of onnxruntime is {}".format(self.inputs))
utils.debug("outputs of onnxruntime is {}".format(self.outputs))
def __del__(self):
if self.enable_profiling:
prof_file = self.session.end_profiling()
print("profile file = {}".format(prof_file))
def forward_once(self, img):
run_options = onnxruntime.RunOptions()
if utils.DEBUG:
run_options.run_log_severity_level = 0
input_feeds = {i.name: np.zeros(shape=[d if (d and d > 0) else 1 for d in i.shape], dtype=np.float32)
for i in self.session.get_inputs()}
start = time.time()
result = self.session.run(
self.outputs, input_feeds, run_options=run_options)
end = time.time() # stop timer
return end - start
def forward(self, img, warmup=True, num_warmup=100, num_iterations=100, validate=False):
utils.debug("image shape = {}".format(np.shape(img)))
if warmup:
for ii in range(num_warmup,):
self.forward_once(img)
res = []
if self.cuda_profile:
cuda_profiler_start()
for i in range(num_iterations):
t = self.forward_once(img)
utils.debug("processing iteration = {} which took {}".format(i, t))
res.append(t)
if self.cuda_profile:
cuda_profiler_stop()
return res