以下代码示例说明了如何集成 cuTensorNet 功能来执行基本的 MPS 模拟。该工作流程封装在 MPSHelper 类中。完整代码可以在 NVIDIA/cuQuantum 存储库中找到 (此处)。

定义 MPSHelper 类¶

我们首先定义一个 MPSHelper 类，以跟踪所有物理键和虚拟键的模式和范围。模拟设置也存储在此类中。一旦超出作用域，此类拥有的所有资源都将被释放。

class MPSHelper
{
   public:
      /**
       * \brief Construct an MPSHelper object for gate splitting algorithm.
       *        i       j       k
       *     -------A-------B-------                      i        j        k
       *           p|       |q            ------->     -------A`-------B`-------
       *            GGGGGGGGG                                r|        |s
       *           r|       |s
       * \param[in] numSites The number of sites in the MPS
       * \param[in] physExtent The extent for the physical mode where the gate tensors are acted on. 
       * \param[in] maxVirtualExtent The maximal extent allowed for the virtual mode shared between adjacent MPS tensors. 
       * \param[in] initialVirtualExtents A vector of size \p numSites-1 where the ith element denotes the extent of the shared mode for site i and site i+1 in the beginning of the simulation.
       * \param[in] typeData The data type for all tensors and gates
       * \param[in] typeCompute The compute type for all gate splitting process
       */
      MPSHelper(int32_t numSites, 
                int64_t physExtent,
                int64_t maxVirtualExtent,
                const std::vector<int64_t>& initialVirtualExtents,
                cudaDataType_t typeData, 
                cutensornetComputeType_t typeCompute);
      
      /**
       * \brief Initialize the MPS metadata and cutensornet library.
       */
      cutensornetStatus_t initialize();

      /**
       * \brief Compute the maximal number of elements for each site.
       */
      std::vector<size_t> getMaxTensorElements() const;

      /**
       * \brief Update the SVD truncation setting.
       * \param[in] absCutoff The cutoff value for absolute singular value truncation.
       * \param[in] relCutoff The cutoff value for relative singular value truncation.
       * \param[in] renorm The option for renormalization of the truncated singular values.
       * \param[in] partition The option for partitioning of the singular values. 
       */
      cutensornetStatus_t setSVDConfig(double absCutoff, 
                                       double relCutoff, 
                                       cutensornetTensorSVDNormalization_t renorm,
                                       cutensornetTensorSVDPartition_t partition);

      /**
       * \brief Update the algorithm to use for the gating process.
       * \param[in] gateAlgo The gate algorithm to use for MPS simulation.
       */
      void setGateAlgorithm(cutensornetGateSplitAlgo_t gateAlgo) {gateAlgo_ = gateAlgo;}

      /**
       * \brief Compute the maximal workspace needed for MPS gating algorithm.
       * \param[out] workspaceSize The required workspace size on the device. 
       */
      cutensornetStatus_t computeMaxWorkspaceSizes(int64_t* workspaceSize);

      /**
       * \brief Compute the maximal workspace needed for MPS gating algorithm.
       * \param[in] work Pointer to the allocated workspace.
       * \param[in] workspaceSize The required workspace size on the device. 
       */
      cutensornetStatus_t setWorkspace(void* work, int64_t workspaceSize);

      /**
       * \brief In-place execution of the apply gate algorithm on \p siteA and \p siteB.
       * \param[in] siteA The first site where the gate is applied to.
       * \param[in] siteB The second site where the gate is applied to. Must be adjacent to \p siteA.
       * \param[in,out] dataInA The data for the MPS tensor at \p siteA. The input will be overwritten with output mps tensor data.
       * \param[in,out] dataInB The data for the MPS tensor at \p siteB. The input will be overwritten with output mps tensor data.
       * \param[in] dataInG The input data for the gate tensor. 
       * \param[in] verbose Whether to print out the runtime information regarding truncation. 
       * \param[in] stream The CUDA stream on which the computation is performed.
       */
      cutensornetStatus_t applyGate(uint32_t siteA, 
                                    uint32_t siteB, 
                                    void* dataInA, 
                                    void* dataInB, 
                                    const void* dataInG, 
                                    bool verbose,
                                    cudaStream_t stream);
      
      /**
       * \brief Free all the tensor descriptors in mpsHelper.
       */
      ~MPSHelper()
      {
         if (inited_)
         {
            for (auto& descTensor: descTensors_)
            {
               cutensornetDestroyTensorDescriptor(descTensor);
            }
            cutensornetDestroy(handle_);
            cutensornetDestroyWorkspaceDescriptor(workDesc_);
         }
         if (svdConfig_ != nullptr)
         {
            cutensornetDestroyTensorSVDConfig(svdConfig_);
         }
         if (svdInfo_ != nullptr)
         {
            cutensornetDestroyTensorSVDInfo(svdInfo_);
         }
      }

   private:
      int32_t numSites_; ///< Number of sites in the MPS
      int64_t physExtent_; ///< Extent for the physical index 
      int64_t maxVirtualExtent_{0}; ///< The maximal extent allowed for the virtual dimension
      cudaDataType_t typeData_; 
      cutensornetComputeType_t typeCompute_;
      
      bool inited_{false};
      std::vector<int32_t> physModes_; ///< A vector of length \p numSites_ storing the physical mode of each site.
      std::vector<int32_t> virtualModes_; ///< A vector of length \p numSites_+1; For site i, virtualModes_[i] and virtualModes_[i+1] represents the left and right virtual mode.
      std::vector<int64_t> extentsPerSite_; ///< A vector of length \p numSites_+1; For site i, extentsPerSite_[i] and extentsPerSite_[i+1] represents the left and right virtual extent. 

      cutensornetHandle_t handle_{nullptr};
      std::vector<cutensornetTensorDescriptor_t> descTensors_; /// A vector of length \p numSites_ storing the cutensornetTensorDescriptor_t for each site
      cutensornetWorkspaceDescriptor_t workDesc_{nullptr};
      cutensornetTensorSVDConfig_t svdConfig_{nullptr};
      cutensornetTensorSVDInfo_t svdInfo_{nullptr};
      cutensornetGateSplitAlgo_t gateAlgo_{CUTENSORNET_GATE_SPLIT_ALGO_DIRECT};
      int32_t nextMode_{0}; /// The next mode label to use for labelling site tensors and gates.
};

注意

有关所有方法的完整定义，请参阅此处的示例此处。

设置 MPS 模拟设置¶

接下来，在主函数中，我们需要为 MPS 模拟选择模拟设置（即，站点数、初始范围和数据类型）。

   /***********************************
   * Step 1: basic MPS setup
   ************************************/

   // setup the simulation setting for the MPS
   typedef std::complex<double> complexType;
   cudaDataType_t typeData = CUDA_C_64F;
   cutensornetComputeType_t typeCompute = CUTENSORNET_COMPUTE_64F;
   int32_t numSites = 16;
   int64_t physExtent = 2;
   int64_t maxVirtualExtent = 12;
   const std::vector<int64_t> initialVirtualExtents(numSites-1, 1);  // starting MPS with shared extent of 1;

   // initialize an MPSHelper to dynamically update tensor metadats   
   MPSHelper mpsHelper(numSites, physExtent, maxVirtualExtent, initialVirtualExtents, typeData, typeCompute);
   HANDLE_ERROR( mpsHelper.initialize() );

MPS 元数据和所有 cuTensorNet 库对象将由 MPSHelper 管理，而数据指针在主函数中显式管理。

分配内存和初始化数据¶

接下来，我们为 MPS 操作数和四个 2 量子比特门张量分配内存。每个 MPS 张量的最大张量大小可以通过 MPSHelper 类查询。MPS 张量被初始化为对应于 |00..000> 的状态，而门张量填充随机值。

   /***********************************
   * Step 2: data allocation 
   ************************************/

   // query largest tensor sizes for the MPS
   const std::vector<size_t> maxElementsPerSite = mpsHelper.getMaxTensorElements();
   std::vector<void*> tensors_h;
   std::vector<void*> tensors_d;
   for (int32_t i=0; i<numSites; i++)
   {
      size_t maxSize = sizeof(complexType) * maxElementsPerSite.at(i);
      void* data_h = malloc(maxSize);
      memset(data_h, 0, maxSize);
      // initialize state to |0000..0000>
      *(complexType*)(data_h) = complexType(1,0);  
      void* data_d;
      HANDLE_CUDA_ERROR( cudaMalloc(&data_d, maxSize) );
      // data transfer from host to device
      HANDLE_CUDA_ERROR( cudaMemcpy(data_d, data_h, maxSize, cudaMemcpyHostToDevice) );
      tensors_h.push_back(data_h);
      tensors_d.push_back(data_d);
   }

   // initialize 4 random gate tensors on host and copy them to device
   const int32_t numRandomGates = 4;
   const int64_t numGateElements = physExtent * physExtent * physExtent * physExtent;  // shape (2, 2, 2, 2)
   size_t gateSize = sizeof(complexType) * numGateElements;
   complexType* gates_h[numRandomGates];
   void* gates_d[numRandomGates];
   
   for (int i=0; i<numRandomGates; i++)
   {
      gates_h[i] = (complexType*) malloc(gateSize);
      HANDLE_CUDA_ERROR( cudaMalloc((void**) &gates_d[i], gateSize) );
      for (int j=0; j<numGateElements; j++)
      {
         gates_h[i][j] = complexType(((float) rand())/RAND_MAX, ((float) rand())/RAND_MAX);
      }
      HANDLE_CUDA_ERROR( cudaMemcpy(gates_d[i], gates_h[i], gateSize, cudaMemcpyHostToDevice) );
   }
   

设置门分裂选项¶

然后，我们设置 SVD 截断参数和算法 cutensornetGateSplitAlgo_t 用于门分裂过程。

   /*****************************************
   * Step 3: setup options for gate operation
   ******************************************/

   double absCutoff = 1e-2;
   double relCutoff = 1e-2;
   cutensornetTensorSVDNormalization_t renorm = CUTENSORNET_TENSOR_SVD_NORMALIZATION_L2; // renormalize the L2 norm of truncated singular values to 1. 
   cutensornetTensorSVDPartition_t partition = CUTENSORNET_TENSOR_SVD_PARTITION_UV_EQUAL; // equally partition the singular values onto U and V;
   HANDLE_ERROR( mpsHelper.setSVDConfig(absCutoff, relCutoff, renorm, partition));

   cutensornetGateSplitAlgo_t gateAlgo = CUTENSORNET_GATE_SPLIT_ALGO_REDUCED;
   mpsHelper.setGateAlgorithm(gateAlgo);

查询和分配所需的工作区¶

一旦设置了所有模拟设置，我们就可以查询所需的工作区大小。在 MPSHelper 内部，所需的工作区大小是根据模拟中涉及的最大张量大小估算的。

   /********************************************
   * Step 4: workspace size query and allocation
   *********************************************/

   int64_t workspaceSize;
   HANDLE_ERROR( mpsHelper.computeMaxWorkspaceSizes(&workspaceSize) );

   void *work = nullptr;
   std::cout << "Maximal workspace size required: " << workspaceSize << std::endl;
   HANDLE_CUDA_ERROR( cudaMalloc(&work, workspaceSize) );

   HANDLE_ERROR( mpsHelper.setWorkspace(work, workspaceSize));
   

执行¶

在此阶段，我们可以通过迭代所有门张量来执行模拟。MPS 的所有元数据将在 MPSHelper 内部进行管理和更新。

   /***********************************
   * Step 5: execution
   ************************************/

   cudaStream_t stream;
   HANDLE_CUDA_ERROR( cudaStreamCreate(&stream) );
   uint32_t numLayers = 10; // 10 layers of gate
   for (uint32_t i=0; i<numLayers; i++)
   {
      uint32_t start_site = i % 2;
      std::cout << "Cycle " << i << ":" << std::endl;
      bool verbose = (i == numLayers - 1);
      for (uint32_t j=start_site; j<numSites-1; j=j+2)
      {
         uint32_t gateIdx = rand() % numRandomGates; // pick a random gate tensor
         std::cout << "apply gate " << gateIdx << " on " << j << " and " << j+1<< std::endl;
         void *dataA = tensors_d[j];
         void *dataB = tensors_d[j+1];
         void *dataG = gates_d[gateIdx];
         HANDLE_ERROR( mpsHelper.applyGate(j, j+1, dataA, dataB, dataG, verbose, stream) );
      }
   }

   HANDLE_CUDA_ERROR( cudaStreamSynchronize(stream) );

释放资源¶

模拟完成后，我们释放主函数中分配的所有数据指针。

   /***********************************
   * Step 6: free resources
   ************************************/
   
   std::cout << "Free all resources" << std::endl;

   for (int i=0; i<numRandomGates; i++)
   {
      free(gates_h[i]);
      HANDLE_CUDA_ERROR( cudaFree(gates_d[i]) );
   }

   for (int32_t i=0; i<numSites; i++)
   {
      free(tensors_h.at(i));
      HANDLE_CUDA_ERROR( cudaFree(tensors_d.at(i)) );
   }

   HANDLE_CUDA_ERROR( cudaFree(work) );
   // The MPSHelper destructor will free all internal resources when out of scope
   return 0;   
}

MPSHelper 拥有的所有 cuTensorNet 库对象一旦超出作用域将被释放。