API 迁移指南#

本节重点介绍 TensorRT API 的修改。如果您不熟悉这些更改，请参阅我们的示例代码以获得澄清。

Python#

Python API 更改#

分配缓冲区和使用基于名称的引擎 API

TensorRT 8.x

def allocate_buffers(self, engine):
    '''
    Allocates all buffers required for an engine, i.e., host/device inputs/outputs.
    '''
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()

    # binding is the name of input/output
    for binding in the engine:
        size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
        dtype = trt.nptype(engine.get_binding_dtype(binding))

        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype) # page-locked memory buffer (won't swap to disk)
        device_mem = cuda.mem_alloc(host_mem.nbytes)

        # Append the device buffer address to device bindings.
        # When cast to int, it's a linear index into the context's memory (like memory address).
        bindings.append(int(device_mem))

        # Append to the appropriate input/output list.
        if engine.binding_is_input(binding):
            inputs.append(self.HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(self.HostDeviceMem(host_mem, device_mem))

    return inputs, outputs, bindings, stream

TensorRT 10.0

def allocate_buffers(self, engine):
    '''
    Allocates all buffers required for an engine, i.e., host/device inputs/outputs.
    '''
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()

    for i in range(engine.num_io_tensors):
        tensor_name = engine.get_tensor_name(i)
        size = trt.volume(engine.get_tensor_shape(tensor_name))
        dtype = trt.nptype(engine.get_tensor_dtype(tensor_name))

        # Allocate host and device buffers
        host_mem = cuda.pagelocked_empty(size, dtype) # page-locked memory buffer (won't swap to disk)
        device_mem = cuda.mem_alloc(host_mem.nbytes)

        # Append the device buffer address to device bindings.
        # When cast to int, it's a linear index into the context's memory (like memory address).
        bindings.append(int(device_mem))

        # Append to the appropriate input/output list.
        if engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT:
            inputs.append(self.HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(self.HostDeviceMem(host_mem, device_mem))

    return inputs, outputs, bindings, stream

从 Python 的 enqueueV2 过渡到 enqueueV3

TensorRT 8.x

# Allocate device memory for inputs.
d_inputs = [cuda.mem_alloc(input_nbytes) for binding in range(input_num)]

# Allocate device memory for outputs.
h_output = cuda.pagelocked_empty(output_nbytes, dtype=np.float32)
d_output = cuda.mem_alloc(h_output.nbytes)

# Transfer data from host to device.
cuda.memcpy_htod_async(d_inputs[0], input_a, stream)
cuda.memcpy_htod_async(d_inputs[1], input_b, stream)
cuda.memcpy_htod_async(d_inputs[2], input_c, stream)

# Run inference
context.execute_async_v2(bindings=[int(d_inp) for d_inp in d_inputs] + [int(d_output)], stream_handle=stream.handle)

# Synchronize the stream
stream.synchronize()

TensorRT 10.0

# Allocate device memory for inputs.
d_inputs = [cuda.mem_alloc(input_nbytes) for binding in range(input_num)]

# Allocate device memory for outputs.
h_output = cuda.pagelocked_empty(output_nbytes, dtype=np.float32)
d_output = cuda.mem_alloc(h_output.nbytes)

# Transfer data from host to device.
cuda.memcpy_htod_async(d_inputs[0], input_a, stream)
cuda.memcpy_htod_async(d_inputs[1], input_b, stream)
cuda.memcpy_htod_async(d_inputs[2], input_c, stream)

# Setup tensor address
bindings = [int(d_inputs[i]) for i in range(3)] + [int(d_output)]

for i in range(engine.num_io_tensors):
    context.set_tensor_address(engine.get_tensor_name(i), bindings[i])

# Run inference
context.execute_async_v3(stream_handle=stream.handle)

# Synchronize the stream
stream.synchronize()

引擎构建，仅使用 build_serialized_network

TensorRT 8.x

engine_bytes = None
try:
    engine_bytes = self.builder.build_serialized_network(self.network, self.config)
except AttributeError:
    engine = self.builder.build_engine(self.network, self.config)
    engine_bytes = engine.serialize()
    del engine
assert engine_bytes

TensorRT 10.0

engine_bytes = self.builder.build_serialized_network(self.network, self.config)
if engine_bytes is None:
    log.error("Failed to create engine")
    sys.exit(1)

添加的 Python API#

类型

APILanguage
ExecutionContextAllocationStrategy
IGpuAsyncAllocator
InterfaceInfo
IPluginResource
IPluginV3
IStreamReader
IVersionedInterface

方法和属性

ICudaEngine.is_debug_tensor()
ICudaEngine.minimum_weight_streaming_budget
ICudaEngine.streamable_weights_size
ICudaEngine.weight_streaming_budget
IExecutionContext.get_debug_listener()
IExecutionContext.get_debug_state()
IExecutionContext.set_all_tensors_debug_state()
IExecutionContext.set_debug_listener()
IExecutionContext.set_tensor_debug_state()
IExecutionContext.update_device_memory_size_for_shapes()
IGpuAllocator.allocate_async()
IGpuAllocator.deallocate_async()
INetworkDefinition.add_plugin_v3()
INetworkDefinition.is_debug_tensor()
INetworkDefinition.mark_debug()
INetworkDefinition.unmark_debug()
IPluginRegistry.acquire_plugin_resource()
IPluginRegistry.all_creators
IPluginRegistry.deregister_creator()
IPluginRegistry.get_creator()
IPluginRegistry.register_creator()
IPluginRegistry.release_plugin_resource()

移除的 Python API#

移除的 Python API 及其建议的替代 API#
Python API	替代 API
BuilderFlag.ENABLE_TACTIC_HEURISTIC	构建器优化级别 2
BuilderFlag.STRICT_TYPES	使用所有三个标志 BuilderFlag.DIRECT_IO BuilderFlag.PREFER_PRECISION_CONSTRAINTS BuilderFlag.REJECT_EMPTY_ALGORITHMS
EngineCapability.DEFAULT EngineCapability.kSAFE_DLA EngineCapability.SAFE_GPU	EngineCapability.STANDARD EngineCapability.DLA_STANDALONE EngineCapability.SAFETY
IAlgorithmIOInfo.tensor_format	步幅、数据类型和向量化信息足以唯一标识张量格式。
IBuilder.max_batch_size	不再支持隐式批处理。
IBuilderConfig.max_workspace_size	IBuilderConfig.set_memory_pool_limit() with MemoryPoolType.WORKSPACE IBuilderConfig.get_memory_pool_limit() with MemoryPoolType.WORKSPACE
IBuilderConfig.min_timing_iterations	IBuilderConfig.avg_timing_iterations
1ICudaEngine.binding_is_input() 2ICudaEngine.get_binding_bytes_per_component() 3ICudaEngine.get_binding_components_per_element() 4ICudaEngine.get_binding_dtype() 5ICudaEngine.get_binding_format() 6ICudaEngine.get_binding_format_desc() 7ICudaEngine.get_binding_index() 8ICudaEngine.get_binding_name() 9ICudaEngine.get_binding_shape() 10ICudaEngine.get_binding_vectorized_dim() 11ICudaEngine.get_location() 12ICudaEngine.get_profile_shape() 13ICudaEngine.get_profile_shape_input() 14ICudaEngine.has_implicit_batch_dimension() 15ICudaEngine.is_execution_binding() 16ICudaEngine.is_shape_binding() 17ICudaEngine.max_batch_size() 18ICudaEngine.num_bindings()	1ICudaEngine.get_tensor_mode() 2ICudaEngine.get_tensor_bytes_per_component() 3ICudaEngine.get_tensor_components_per_element() 4ICudaEngine.get_tensor_dtype() 5ICudaEngine.get_tensor_format() 6ICudaEngine.get_tensor_format_desc() 7No name-based equivalent replacement 8No name-based equivalent replacement 9ICudaEngine.get_tensor_shape() 10ICudaEngine.get_tensor_vectorized_dim() 11ITensor.location 12ICudaEngine.get_tensor_profile_shape() 13ICudaEngine.get_tensor_profile_values() 14Implicit batch is no longer supported 15No name-based equivalent replacement 16ICudaEngine.is_shape_inference_io() 17Implicit batch is no longer supported 18ICudaEngine.num_io_tensors()
IExecutionContext.get_binding_shape() IExecutionContext.get_strides() IExecutionContext.set_binding_shape()	IExecutionContext.get_tensor_shape() IExecutionContext.get_tensor_strides() IExecutionContext.set_input_shape()
IFullyConnectedLayer	IMatrixMultiplyLayer
1INetworkDefinition.add_convolution() 2INetworkDefinition.add_deconvolution() 3INetworkDefinition.add_fully_connected() 4INetworkDefinition.add_padding() 5INetworkDefinition.add_pooling() 6INetworkDefinition.add_rnn_v2() 7INetworkDefinition.has_explicit_precision 8INetworkDefinition.has_implicit_batch_dimension	1INetworkDefinition.add_convolution_nd() 2INetworkDefinition.add_deconvolution_nd() 3INetworkDefinition.add_matrix_multiply() 4INetworkDefinition.add_padding_nd() 5INetworkDefinition.add_pooling_nd() 6INetworkDefinition.add_loop() 7Explicit precision support is removed in 10.0 8Implicit batch is no longer supported
IRNNv2Layer	ILoop
NetworkDefinitionCreationFlag.EXPLICIT_BATCH NetworkDefinitionCreationFlag.EXPLICIT_PRECISION	10.0 版本中已移除支持
PaddingMode.CAFFE_ROUND_DOWN PaddingMode.CAFFE_ROUND_UP	自 9.0 版本起不支持 Caffe
PreviewFeature.DISABLE_EXTERNAL_TACTIC_SOURCES_FOR_CORE_0805 PreviewFeature.FASTER_DYNAMIC_SHAPES_0805	外部策略始终对核心代码禁用此标志默认开启
ProfilingVerbosity.DEFAULT ProfilingVerbosity.VERBOSE	ProfilingVerbosity.LAYER_NAMES_ONLY ProfilingVerbosity.DETAILED
ResizeMode	使用 `InterpolationMode`。别名已移除。
SampleMode.DEFAULT	SampleMode.STRICT_BOUNDS
SliceMode	使用 `SampleMode`。别名已移除。

C++#

C++ API 更改#

从 C++ 的 enqueueV2 过渡到 enqueueV3

TensorRT 8.x

// Create RAII buffer manager object.
samplesCommon::BufferManager buffers(mEngine);

auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
if (!context)
{
    return false;
}

// Pick a random digit to try to infer.
srand(time(NULL));
int32_t const digit = rand() % 10;

// Read the input data into the managed buffers.
// There should be just 1 input tensor.
ASSERT(mParams.inputTensorNames.size() == 1);

if (!processInput(buffers, mParams.inputTensorNames[0], digit))
{
    return false;
}
// Create a CUDA stream to execute this inference.
cudaStream_t stream;
CHECK(cudaStreamCreate(&stream));

// Asynchronously copy data from host input buffers to device input
buffers.copyInputToDeviceAsync(stream);

// Asynchronously enqueue the inference work
if (!context->enqueueV2(buffers.getDeviceBindings().data(), stream, nullptr))
{
    return false;
}
// Asynchronously copy data from device output buffers to host output buffers.
buffers.copyOutputToHostAsync(stream);

// Wait for the work in the stream to complete.
CHECK(cudaStreamSynchronize(stream));

// Release stream.
CHECK(cudaStreamDestroy(stream));

TensorRT 10.0

// Create RAII buffer manager object.
samplesCommon::BufferManager buffers(mEngine);

auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
if (!context)
{
    return false;
}

for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
{
    auto const name = mEngine->getIOTensorName(i);
    context->setTensorAddress(name, buffers.getDeviceBuffer(name));
}

// Pick a random digit to try to infer.
srand(time(NULL));
int32_t const digit = rand() % 10;

// Read the input data into the managed buffers.
// There should be just 1 input tensor.
ASSERT(mParams.inputTensorNames.size() == 1);

if (!processInput(buffers, mParams.inputTensorNames[0], digit))
{
    return false;
}
// Create a CUDA stream to execute this inference.
cudaStream_t stream;
CHECK(cudaStreamCreate(&stream));

// Asynchronously copy data from host input buffers to device input
buffers.copyInputToDeviceAsync(stream);

// Asynchronously enqueue the inference work
if (!context->enqueueV3(stream))
{
    return false;
}

// Asynchronously copy data from device output buffers to host output buffers.
buffers.copyOutputToHostAsync(stream);

// Wait for the work in the stream to complete.
CHECK(cudaStreamSynchronize(stream));

// Release stream.
CHECK(cudaStreamDestroy(stream));

64 位维度更改#

Dims 持有的维度已从 int32_t 更改为 int64_t。但是，在 TensorRT 10.0 中，TensorRT 通常会拒绝使用超出 int32_t 范围的维度的网络。IShapeLayer 返回的张量类型现在是 DataType::kINT64。如果需要 32 位维度，请使用 ICastLayer 将结果转换为 DataType::kINT32 类型的张量。

检查位复制到 Dims 和从 Dims 位复制的代码，以确保其对于 int64_t 维度是正确的。

添加的 C++ API#

枚举

ActivationType::kGELU_ERF
ActivationType::kGELU_TANH
BuilderFlag::kREFIT_IDENTICAL
BuilderFlag::kSTRIP_PLAN
BuilderFlag::kWEIGHT_STREAMING
Datatype::kINT4
LayerType::kPLUGIN_V3

类型

APILanguage
Dims64
ExecutionContextAllocationStrategy
IGpuAsyncAllocator
InterfaceInfo
IPluginResource
IPluginV3
IStreamReader
IVersionedInterface

方法和属性

getInferLibBuildVersion
getInferLibMajorVersion
getInferLibMinorVersion
getInferLibPatchVersion
ICudaEngine::createRefitter
IcudaEngine::getMinimumWeightStreamingBudget
IcudaEngine::getStreamableWeightsSize
ICudaEngine::getWeightStreamingBudget
IcudaEngine::isDebugTensor
ICudaEngine::setWeightStreamingBudget
IExecutionContext::getDebugListener
IExecutionContext::getTensorDebugState
IExecutionContext::setAllTensorsDebugState
IExecutionContext::setDebugListener
IExecutionContext::setOuputTensorAddress
IExecutionContext::setTensorDebugState
IExecutionContext::updateDeviceMemorySizeForShapes
IGpuAllocator::allocateAsync
IGpuAllocator::deallocateAsync
INetworkDefinition::addPluginV3
INetworkDefinition::isDebugTensor
INetworkDefinition::markDebug
INetworkDefinition::unmarkDebug
IPluginRegistry::acquirePluginResource
IPluginRegistry::deregisterCreator
IPluginRegistry::getAllCreators
IPluginRegistry::getCreator
IPluginRegistry::registerCreator
IPluginRegistry::releasePluginResource

移除的 C++ API#

移除的 C++ API 及其建议的替代 API#
C++ API	替代 API
BuilderFlag::kENABLE_TACTIC_HEURISTIC	构建器优化级别 2
BuilderFlag::kSTRICT_TYPES	使用所有三个标志 kREJECT_EMPTY_ALGORITHMS kDIRECT_IO kPREFER_PRECISION_CONSTRAINTS 注意当移除枚举成员（对于此列表中的所有枚举）时，我们将枚举更改为连续数字。
EngineCapability::kDEFAULT EngineCapability::kSAFE_DLA EngineCapability::kSAFE_GPU	EngineCapability::kSTANDARD EngineCapability::kDLA_STANDALONE EngineCapability::kSAFETY
IAlgorithm::getAlgorithmIOInfo()	IAlgorithm::getAlgorithmIOInfoByIndex()
IAlgorithmIOInfo::getTensorFormat()	步幅、数据类型和向量化信息足以唯一标识张量格式。
IBuilder::buildEngineWithConfig() IBuilder::destroy() IBuilder::getMaxBatchSize() IBuilder::setMaxBatchSize()	IBuilder::buildSerializedNetwork() delete ObjectName Implicit batch is no longer supported Implicit batch is no longer supported
IBuilderConfig::destroy() IBuilderConfig::getMaxWorkspaceSize() IBuilderConfig::getMinTimingIterations() IBuilderConfig::setMaxWorkspaceSize() IBuilderConfig::setMinTimingIterations()	delete ObjectName IBuilderConfig::getMemoryPoolLimit() with MemoryPoolType::kWORKSPACE IBuilderConfig::getAvgTimingIterations() IBuilderConfig::setMemoryPoolLimit() with MemoryPoolType::kWORKSPACE IBuilderConfig::setAvgTimingIterations()
1IConvolutionLayer::getDilation() 2IConvolutionLayer::getKernelSize() 3IConvolutionLayer::getPadding() 4IConvolutionLayer::getStride() 5IConvolutionLayer::setDilation() 6IConvolutionLayer::setKernelSize() 7IConvolutionLayer::setPadding() 8IConvolutionLayer::setStride()	1IConvolutionLayer::getDilationNd() 2IConvolutionLayer::getKernelSizeNd() 3IConvolutionLayer::getPaddingNd() 4IConvolutionLayer::getStrideNd() 5IConvolutionLayer::setDilationNd() 6IConvolutionLayer::setKernelSizeNd() 7IConvolutionLayer::setPaddingNd() 8IConvolutionLayer::setStrideNd()
1ICudaEngine::bindingIsInput() 2ICudaEngine::destroy() 3ICudaEngine::getBindingBytesPerComponent() 4ICudaEngine::getBindingComponentsPerElement() 5ICudaEngine::getBindingDataType() 6ICudaEngine::getBindingDimensions() 7ICudaEngine::getBindingFormat() 8ICudaEngine::getBindingFormatDesc() 9ICudaEngine::getBindingIndex() 10ICudaEngine::getBindingName() 11ICudaEngine::getBindingVectorizedDim() 12ICudaEngine::getLocation() 13ICudaEngine::getMaxBatchSize() 14ICudaEngine::getNbBindings() 15ICudaEngine::getProfileDimensions() 16ICudaEngine::getProfileShapeValues() 17ICudaEngine::hasImplicitBatchDimension() 18ICudaEngine::isExecutionBinding() 19ICudaEngine::isShapeBinding()	1ICudaEngine::getTensorIOMode() 2delete ObjectName 3ICudaEngine::getTensorBytesPerComponent() 4ICudaEngine::getTensorComponentsPerElement() 5ICudaEngine::getTensorDataType() 6ICudaEngine::getTensorShape() 7ICudaEngine::getTensorFormat() 8ICudaEngine::getTensorFormatDesc() 9Name-based methods 10Name-based methods 11ICudaEngine::getTensorVectorizedDim() 12ITensor::getLocation() 13Implicit batch is no longer supported 14ICudaEngine::getNbIOTensors() 15ICudaEngine::getProfileShape() 16ICudaEngine::getShapeValues() 17Implicit batch is no longer supported 18No name-based equivalent replacement 19ICudaEngine::isShapeInferenceIO()
1IDeconvolutionLayer::getKernelSize() 2IDeconvolutionLayer::getPadding() 3IDeconvolutionLayer::getStride() 4IDeconvolutionLayer::setKernelSize() 5IDeconvolutionLayer::setPadding() 6IDeconvolutionLayer::setStride()	1IDeconvolutionLayer::getKernelSizeNd() 2IDeconvolutionLayer::getPaddingNd() 3IDeconvolutionLayer::getStrideNd() 4IDeconvolutionLayer::setKernelSizeNd() 5IDeconvolutionLayer::setPaddingNd() 6IDeconvolutionLayer::setStrideNd()
1IExecutionContext::destroy() 2IExecutionContext::enqueue() 3IExecutionContext::enqueueV2() 4IExecutionContext::execute() 5IExecutionContext::getBindingDimensions() 6IExecutionContext::getShapeBinding() 7IExecutionContext::getStrides() 8IExecutionContext::setBindingDimensions() 9IExecutionContext::setInputShapeBinding() 10IExecutionContext::setOptimizationProfile()	1delete ObjectName 2IExecutionContext::enqueueV3() 3IExecutionContext::enqueueV3() 4IExecutionContext::executeV2() 5IExecutionContext::getTensorShape() 6IExecutionContext::getTensorAddress() or getOutputTensorAddress() 7IExecutionContext::getTensorStrides() 8IExecutionContext::setInputShape() 9IExecutionContext::setInputTensorAddress() or setTensorAddress() 10IExecutionContext::setOptimizationProfileAsync()
IFullyConnectedLayer	IMatrixMultiplyLayer
IGpuAllocator::free()	IGpuAllocator::deallocate()
IHostMemory::destroy()	delete ObjectName
1INetworkDefinition::addConvolution() 2INetworkDefinition::addDeconvolution() 3INetworkDefinition::addFullyConnected() 4INetworkDefinition::addPadding() 5INetworkDefinition::addPooling() 6INetworkDefinition::addRNNv2() 7INetworkDefinition::destroy() 8INetworkDefinition::hasExplicitPrecision() 9INetworkDefinition::hasImplicitBatchDimension()	1INetworkDefinition::addConvolutionNd() 2INetworkDefinition::addDeconvolutionNd() 3INetworkDefinition::addMatrixMultiply() 4INetworkDefinition::addPaddingNd() 5INetworkDefinition::addPoolingNd() 6INetworkDefinition::addLoop() 7delete ObjectName 8Explicit precision support is removed in 10.0 9Implicit batch support is removed
IOnnxConfig::destroy()	delete ObjectName
IPaddingLayer::getPostPadding() IPaddingLayer::getPrePadding() IPaddingLayer::setPostPadding() IPaddingLayer::setPrePadding()	IPaddingLayer::getPostPaddingNd() IPaddingLayer::getPrePaddingNd() IPaddingLayer::setPostPaddingNd() IPaddingLayer::setPrePaddingNd()
1IPoolingLayer::getPadding() 2IPoolingLayer::getStride() 3IPoolingLayer::getWindowSize() 4IPoolingLayer::setPadding() 5IPoolingLayer::setStride() 6IPoolingLayer::setWindowSize()	1IPoolingLayer::getPaddingNd() 2IPoolingLayer::getStrideNd() 3IPoolingLayer::getWindowSizeNd() 4IPoolingLayer::setPaddingNd() 5IPoolingLayer::setStrideNd() 6IPoolingLayer::setWindowSizeNd()
IRefitter::destroy()	delete ObjectName
IResizeLayer::getAlignCorners() IResizeLayer::setAlignCorners()	IResizeLayer::getAlignCornersNd() IResizeLayer::setAlignCornersNd()
IRuntime::deserializeCudaEngine (void const* blob, std::size_t size, IPluginFactory* pluginFactory) IRuntime::destroy()	使用带两个参数的 `deserializeCudaEngine` delete ObjectName
IRNNv2Layer	ILoop
kNV_TENSORRT_VERSION_IMPL	#define NV_TENSORRT_VERSION_INT(major, minor, patch) ((major) 10000L + (minor) 100L + (patch) *1L) 注意 TensorRT 版本编码已更改为容纳两位数次要版本。
NetworkDefinitionCreationFlag::kEXPLICIT_BATCH NetworkDefinitionCreationFlag::kEXPLICIT_PRECISION	10.0 版本中已移除支持
NV_TENSORRT_SONAME_MAJOR NV_TENSORRT_SONAME_MINOR NV_TENSORRT_SONAME_PATCH	NV_TENSORRT_MAJOR NV_TENSORRT_MINOR NV_TENSORRT_PATCH
PaddingMode::kCAFFE_ROUND_DOWN PaddingMode::kCAFFE_ROUND_UP	自 9.0 版本起不支持 Caffe
PreviewFeature::kDISABLE_EXTERNAL_TACTIC_SOURCES_FOR_CORE_0805 PreviewFeature::kFASTER_DYNAMIC_SHAPES_0805	外部策略始终对核心代码禁用此标志默认开启
ProfilingVerbosity::kDEFAULT ProfilingVerbosity::kVERBOSE	ProfilingVerbosity::kLAYER_NAMES_ONLY ProfilingVerbosity::kDETAILED
ResizeMode	使用 `InterpolationMode`。别名已移除。
RNNDirection RNNGateType RNNInputMode RNNOperation	已移除 RNN 相关的数据结构
SampleMode::kDEFAULT	SampleMode::kSTRICT_BOUNDS
SliceMode	使用 `SampleMode`。别名已移除。

移除的 C++ 插件#

移除的 C++ 插件及其建议的替代插件#

C++ 插件

替代插件

createAnchorGeneratorPlugin()
createBatchedNMSPlugin()
createInstanceNormalizationPlugin()
createNMSPlugin()
createNormalizePlugin()
createPriorBoxPlugin()
createRegionPlugin()
createReorgPlugin()
createRPNROIPlugin()
createSplitPlugin()

GridAnchorPluginCreator::createPlugin()
BatchedNMSPluginCreator::createPlugin()
InstanceNormalizationPluginCreator::createPlugin()
NMSPluginCreator::createPlugin()
NormalizePluginCreator::createPlugin()
PriorBoxPluginCreator::createPlugin()
RegionPluginCreator::createPlugin()
ReorgPluginCreator::createPlugin()
RPROIPluginCreator::createPlugin()
INetworkDefinition::addSlice()

struct Quadruple

移除的安全 C++ API#

移除的安全 C++ API 及其建议的替代安全 API#

安全 C++ API

替代安全 API

safe::ICudaEngine::bindingIsInput()
safe::ICudaEngine::getBindingBytesPerComponent()
safe::ICudaEngine::getBindingComponentsPerElement()
safe::ICudaEngine::getBindingDataType()
safe::ICudaEngine::getBindingDimensions()
safe::ICudaEngine::getBindingIndex()
safe::ICudaEngine::getBindingName()
safe::ICudaEngine::getBindingVectorizedDim()
safe::ICudaEngine::getNbBindings()
safe::ICudaEngine::getTensorFormat()

safe::ICudaEngine::tensorIOMode()
safe::ICudaEngine::getTensorBytesPerComponent()
safe::ICudaEngine::getTensorComponentsPerElement()
safe::ICudaEngine::getTensorDataType()
safe::ICudaEngine::getTensorShape()
safe::name-based methods
safe::name-based methods
safe::ICudaEngine::getTensorVectorizedDim()
safe::ICudaEngine::getNbIOTensors()
safe::ICudaEngine::getBindingFormat()

safe::IExecutionContext::enqueueV2()
safe::IExecutionContext::getStrides()

safe::IExecutionContext::enqueueV3()
safe::IExecutionContext::getTensorStrides()

trtexec#

trtexec 标志更改#

标志 workspace 和 minTiming 的更改

TensorRT 8.x

trtexec \
    --onnx=/path/to/model.onnx \
    --saveEngine=/path/to/engine.trt \
    --optShapes=input:$INPUT_SHAPE \
    --avgTiming=1 \
    --workspace=1024 \
    --minTiming=1

TensorRT 10.0

trtexec \
    --onnx=/path/to/model.onnx \
    --saveEngine=/path/to/engine.trt \
    --optShapes=input:$INPUT_SHAPE \
    --avgTiming=1 \
    --memPoolSize=workspace:1024

移除的 trtexec 标志#

移除的 trtexec 标志及其建议的替代标志#
trtexec 标志	替代标志
--minTiming	avgTiming
`--preview=features` 选项 `disableExternalTacticSourcesForCore0805` `fasterDynamicShapes0805`	不适用
--workspace=N	--memPoolSize=poolspec

已弃用的 trtexec 标志#

--buildOnly
--explicitPrecision
--heuristic
--nvtxMode