-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
76 lines (61 loc) · 2.83 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from typing import Dict, List
import tensorrt as trt
import torch
# from common_utils import loadDataset
import ctypes
class CustomProfiler(trt.IProfiler):
def __init__(self) -> None:
super().__init__()
self.history: Dict[str, List[float]] = dict()
def report_layer_time(self, layer_name: str, ms: float) -> None:
if layer_name in self.history:
self.history[layer_name].append(ms)
else:
self.history[layer_name] = [ms]
def dump(self, output_path: str) -> None:
with open(output_path, "w") as f:
writer = csv.writer(f)
writer.writerow(["layer_name"] + [f"iter {i + 1}" for i in range(len(next(iter(self.history.values()))))])
for key, value in self.history.items():
writer.writerow([key] + list(map(str, value)))
self.history.clear()
def test_trt(engine_path, input_data):
# Load tensor RT engine
logger = trt.Logger()
runtime = trt.Runtime(logger)
with open(engine_path, "rb") as f:
engine = runtime.deserialize_cuda_engine(f.read())
input_binding_idx = engine.get_binding_index(engine.get_tensor_name(0))
output_binding_idx = engine.get_binding_index(engine.get_tensor_name(1))
input_shape = (1, 16, 64, 64)
output_shape = (1, 16, 64, 64)
# 実行時の情報を格納
context = engine.create_execution_context()
# デバイス側メモリの確保
input_buffer = torch.zeros(input_shape, dtype=torch.float16, device=torch.device("cuda"))
output_buffer = torch.zeros(output_shape, dtype=torch.float16, device=torch.device("cuda"))
bindings = [None, None]
bindings[input_binding_idx] = input_buffer.data_ptr() # type: ignore
bindings[output_binding_idx] = output_buffer.data_ptr() # type: ignore
# ホスト側からデバイス側に転送
input_buffer[0:1].copy_(input_data)
# 計算を実行
# context.execute_async_v2(bindings, torch.cuda.current_stream().cuda_stream)
context.execute_v2(bindings)
torch.cuda.current_stream().synchronize()
output = output_buffer[0:1]
torch.cuda.synchronize()
return output
if __name__ == "__main__":
ctypes.CDLL("depthwiseConvolutionPlugin/build/libdepthwiseConvolutionPlugin.so")
torch.manual_seed(0)
device = torch.device("cuda")
input_data = torch.randn(1, 16, 64, 64, dtype=torch.float16).to(device)
res_trt = test_trt("depthwiseTensorrt/dense/depthwiseTensorrt.trt", input_data)
res_trt_with_plugin = test_trt("depthwiseWithPlugin/dense/depthwiseWithPlugin.trt", input_data)
print("mean of relative error:", torch.mean((res_trt - res_trt_with_plugin)/ res_trt).item())
are_equal = torch.allclose(res_trt, res_trt_with_plugin, atol=1e-1)
if are_equal:
print("The outputs are approximately equal.")
else:
print("The outputs are not equal.")