【精选】TensorRT学习笔记(一)——使用Python API调用TensorRT_tensorrt python api-CSDN博客
#导入必用依赖
import tensorrt as trt
import pycuda.autoinit #负责数据初始化,内存管理,销毁等
import pycuda.driver as cuda #GPU CPU之间的数据传输
#创建logger:日志记录器
logger = trt.Logger(trt.Logger.WARNING)
#创建runtime并反序列化生成engine
with open(“sample.engine”, “rb”) as f, trt.Runtime(logger) as runtime:
engine = runtime.deserialize_cuda_engine(f.read())
# 分配空间并绑定输入输出
host_inputs = []
cuda_inputs = []
host_outputs = []
cuda_outputs = []
bindings = []
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# 分配主机和设备buffers
host_mem = cuda.pagelocked_empty(size, dtype)
cuda_mem = cuda.mem_alloc(host_mem.nbytes)
# 将设备buffer绑定到设备.
bindings.append(int(cuda_mem))
# 绑定到输入输出
if engine.binding_is_input(binding):
host_inputs.append(host_mem)
cuda_inputs.append(cuda_mem)
else:
host_outputs.append(host_mem)
cuda_outputs.append(cuda_mem)
#创建cuda流
stream = cuda.Stream()
# 拷贝输入图像到主机buffer
np.copyto(host_inputs[0], input_image.ravel())
# 将输入数据转到GPU.
cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
# 推理.
context.execute_async(bindings=bindings, stream_handle=stream.handle)
# 将推理结果传到CPU.
cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
# 同步 stream
stream.synchronize()
# 拿到推理结果 batch_size = 1
output = host_outputs[0]
下一步学习多stream如何处理