C/C++ API
C/C++ API 允许您直接从 C/C++ 访问 libtransformer_engine.so 库中定义的自定义内核,而无需 Python。
头文件
- transformer_engine.h
NVTETensor
NVTEDType
NVTETensorParam
NVTEScalingMode
nvte_create_tensor()
nvte_destroy_tensor()
nvte_tensor_data()
nvte_tensor_columnwise_data()
nvte_tensor_shape()
nvte_tensor_columnwise_shape()
nvte_tensor_ndims()
nvte_tensor_size()
nvte_tensor_numel()
nvte_tensor_element_size()
nvte_tensor_type()
nvte_tensor_amax()
nvte_tensor_scale()
nvte_tensor_scale_inv()
nvte_tensor_scale_inv_shape()
nvte_zero_tensor()
nvte_set_tensor_param()
nvte_get_tensor_param()
nvte_tensor_scaling_mode()
nvte_tensor_pack_create()
nvte_tensor_pack_destroy()
NVTEShape
NVTEBasicTensor
NVTETensorPack
transformer_engine
transformer_engine::DType
transformer_engine::DType::kByte
transformer_engine::DType::kInt32
transformer_engine::DType::kInt64
transformer_engine::DType::kFloat32
transformer_engine::DType::kFloat16
transformer_engine::DType::kBFloat16
transformer_engine::DType::kFloat8E4M3
transformer_engine::DType::kFloat8E5M2
transformer_engine::DType::kFloat8E8M0
transformer_engine::DType::kNumTypes
transformer_engine::TensorWrapper
transformer_engine::TensorWrapper::TensorWrapper()
transformer_engine::TensorWrapper::TensorWrapper()
transformer_engine::TensorWrapper::TensorWrapper()
transformer_engine::TensorWrapper::~TensorWrapper()
transformer_engine::TensorWrapper::operator=()
transformer_engine::TensorWrapper::TensorWrapper()
transformer_engine::TensorWrapper::TensorWrapper()
transformer_engine::TensorWrapper::operator=()
transformer_engine::TensorWrapper::set_parameter()
transformer_engine::TensorWrapper::set_rowwise_data()
transformer_engine::TensorWrapper::set_columnwise_data()
transformer_engine::TensorWrapper::set_scale()
transformer_engine::TensorWrapper::set_amax()
transformer_engine::TensorWrapper::set_rowwise_scale_inv()
transformer_engine::TensorWrapper::set_columnwise_scale_inv()
transformer_engine::TensorWrapper::get_parameter()
transformer_engine::TensorWrapper::get_rowwise_data()
transformer_engine::TensorWrapper::get_columnwise_data()
transformer_engine::TensorWrapper::get_scale()
transformer_engine::TensorWrapper::get_amax()
transformer_engine::TensorWrapper::get_rowwise_scale_inv()
transformer_engine::TensorWrapper::get_columnwise_scale_inv()
transformer_engine::TensorWrapper::data()
transformer_engine::TensorWrapper::shape()
transformer_engine::TensorWrapper::columnwise_shape()
transformer_engine::TensorWrapper::size()
transformer_engine::TensorWrapper::ndim()
transformer_engine::TensorWrapper::numel()
transformer_engine::TensorWrapper::element_size()
transformer_engine::TensorWrapper::bytes()
transformer_engine::TensorWrapper::dtype()
transformer_engine::TensorWrapper::dptr()
transformer_engine::TensorWrapper::columnwise_dptr()
transformer_engine::TensorWrapper::amax()
transformer_engine::TensorWrapper::scale()
transformer_engine::TensorWrapper::scale_inv()
transformer_engine::TensorWrapper::scale_inv_shape()
transformer_engine::TensorWrapper::scaling_mode()
transformer_engine::TensorWrapper::zero_()
transformer_engine::TensorWrapper::defaultData
transformer_engine::TensorWrapper::defaultShape
transformer_engine::TensorWrapper::convertShape()
transformer_engine::TensorWrapper::convertShape()
transformer_engine::TensorWrapper::tensor_
- activation.h
- cast.h
- fused_attn.h
NVTE_QKV_Layout
NVTE_QKV_Layout::NVTE_SB3HD
NVTE_QKV_Layout::NVTE_SBH3D
NVTE_QKV_Layout::NVTE_SBHD_SB2HD
NVTE_QKV_Layout::NVTE_SBHD_SBH2D
NVTE_QKV_Layout::NVTE_SBHD_SBHD_SBHD
NVTE_QKV_Layout::NVTE_BS3HD
NVTE_QKV_Layout::NVTE_BSH3D
NVTE_QKV_Layout::NVTE_BSHD_BS2HD
NVTE_QKV_Layout::NVTE_BSHD_BSH2D
NVTE_QKV_Layout::NVTE_BSHD_BSHD_BSHD
NVTE_QKV_Layout::NVTE_T3HD
NVTE_QKV_Layout::NVTE_TH3D
NVTE_QKV_Layout::NVTE_THD_T2HD
NVTE_QKV_Layout::NVTE_THD_TH2D
NVTE_QKV_Layout::NVTE_THD_THD_THD
NVTE_QKV_Layout_Group
NVTE_QKV_Format
NVTE_Bias_Type
NVTE_Mask_Type
NVTE_Fused_Attn_Backend
nvte_get_qkv_layout_group()
nvte_get_qkv_format()
nvte_get_fused_attn_backend()
nvte_fused_attn_fwd_qkvpacked()
nvte_fused_attn_bwd_qkvpacked()
nvte_fused_attn_fwd_kvpacked()
nvte_fused_attn_bwd_kvpacked()
nvte_fused_attn_fwd()
nvte_fused_attn_bwd()
- fused_rope.h
- gemm.h
- normalization.h
- padding.h
- permutation.h
- recipe.h
- softmax.h
nvte_scaled_softmax_forward()
nvte_scaled_softmax_backward()
nvte_scaled_masked_softmax_forward()
nvte_scaled_masked_softmax_backward()
nvte_scaled_upper_triang_masked_softmax_forward()
nvte_scaled_upper_triang_masked_softmax_backward()
nvte_scaled_aligned_causal_masked_softmax_forward()
nvte_scaled_aligned_causal_masked_softmax_backward()
- swizzle.h
- transpose.h
nvte_cast_transpose()
nvte_transpose()
nvte_cast_transpose_dbias()
nvte_fp8_transpose_dbias()
nvte_multi_cast_transpose()
nvte_cast_transpose_dbias_dgelu()
nvte_cast_transpose_dbias_dsilu()
nvte_cast_transpose_dbias_drelu()
nvte_cast_transpose_dbias_dqgelu()
nvte_cast_transpose_dbias_dsrelu()
nvte_dgeglu_cast_transpose()
nvte_dswiglu_cast_transpose()
nvte_dreglu_cast_transpose()
nvte_dqgeglu_cast_transpose()
nvte_dsreglu_cast_transpose()