以下代码示例说明了如何集成 cuTensorNet 功能来执行基本的 MPS 模拟。该工作流程封装在 MPSHelper 类中。完整代码可以在 NVIDIA/cuQuantum 存储库中找到 (此处)。

定义 MPSHelper 类

我们首先定义一个 MPSHelper 类,以跟踪所有物理键和虚拟键的模式和范围。模拟设置也存储在此类中。一旦超出作用域,此类拥有的所有资源都将被释放。

 75class MPSHelper
 76{
 77   public:
 78      /**
 79       * \brief Construct an MPSHelper object for gate splitting algorithm.
 80       *        i       j       k
 81       *     -------A-------B-------                      i        j        k
 82       *           p|       |q            ------->     -------A`-------B`-------
 83       *            GGGGGGGGG                                r|        |s
 84       *           r|       |s
 85       * \param[in] numSites The number of sites in the MPS
 86       * \param[in] physExtent The extent for the physical mode where the gate tensors are acted on. 
 87       * \param[in] maxVirtualExtent The maximal extent allowed for the virtual mode shared between adjacent MPS tensors. 
 88       * \param[in] initialVirtualExtents A vector of size \p numSites-1 where the ith element denotes the extent of the shared mode for site i and site i+1 in the beginning of the simulation.
 89       * \param[in] typeData The data type for all tensors and gates
 90       * \param[in] typeCompute The compute type for all gate splitting process
 91       */
 92      MPSHelper(int32_t numSites, 
 93                int64_t physExtent,
 94                int64_t maxVirtualExtent,
 95                const std::vector<int64_t>& initialVirtualExtents,
 96                cudaDataType_t typeData, 
 97                cutensornetComputeType_t typeCompute);
 98      
 99      /**
100       * \brief Initialize the MPS metadata and cutensornet library.
101       */
102      cutensornetStatus_t initialize();
103
104      /**
105       * \brief Compute the maximal number of elements for each site.
106       */
107      std::vector<size_t> getMaxTensorElements() const;
108
109      /**
110       * \brief Update the SVD truncation setting.
111       * \param[in] absCutoff The cutoff value for absolute singular value truncation.
112       * \param[in] relCutoff The cutoff value for relative singular value truncation.
113       * \param[in] renorm The option for renormalization of the truncated singular values.
114       * \param[in] partition The option for partitioning of the singular values. 
115       */
116      cutensornetStatus_t setSVDConfig(double absCutoff, 
117                                       double relCutoff, 
118                                       cutensornetTensorSVDNormalization_t renorm,
119                                       cutensornetTensorSVDPartition_t partition);
120
121      /**
122       * \brief Update the algorithm to use for the gating process.
123       * \param[in] gateAlgo The gate algorithm to use for MPS simulation.
124       */
125      void setGateAlgorithm(cutensornetGateSplitAlgo_t gateAlgo) {gateAlgo_ = gateAlgo;}
126
127      /**
128       * \brief Compute the maximal workspace needed for MPS gating algorithm.
129       * \param[out] workspaceSize The required workspace size on the device. 
130       */
131      cutensornetStatus_t computeMaxWorkspaceSizes(int64_t* workspaceSize);
132
133      /**
134       * \brief Compute the maximal workspace needed for MPS gating algorithm.
135       * \param[in] work Pointer to the allocated workspace.
136       * \param[in] workspaceSize The required workspace size on the device. 
137       */
138      cutensornetStatus_t setWorkspace(void* work, int64_t workspaceSize);
139
140      /**
141       * \brief In-place execution of the apply gate algorithm on \p siteA and \p siteB.
142       * \param[in] siteA The first site where the gate is applied to.
143       * \param[in] siteB The second site where the gate is applied to. Must be adjacent to \p siteA.
144       * \param[in,out] dataInA The data for the MPS tensor at \p siteA. The input will be overwritten with output mps tensor data.
145       * \param[in,out] dataInB The data for the MPS tensor at \p siteB. The input will be overwritten with output mps tensor data.
146       * \param[in] dataInG The input data for the gate tensor. 
147       * \param[in] verbose Whether to print out the runtime information regarding truncation. 
148       * \param[in] stream The CUDA stream on which the computation is performed.
149       */
150      cutensornetStatus_t applyGate(uint32_t siteA, 
151                                    uint32_t siteB, 
152                                    void* dataInA, 
153                                    void* dataInB, 
154                                    const void* dataInG, 
155                                    bool verbose,
156                                    cudaStream_t stream);
157      
158      /**
159       * \brief Free all the tensor descriptors in mpsHelper.
160       */
161      ~MPSHelper()
162      {
163         if (inited_)
164         {
165            for (auto& descTensor: descTensors_)
166            {
167               cutensornetDestroyTensorDescriptor(descTensor);
168            }
169            cutensornetDestroy(handle_);
170            cutensornetDestroyWorkspaceDescriptor(workDesc_);
171         }
172         if (svdConfig_ != nullptr)
173         {
174            cutensornetDestroyTensorSVDConfig(svdConfig_);
175         }
176         if (svdInfo_ != nullptr)
177         {
178            cutensornetDestroyTensorSVDInfo(svdInfo_);
179         }
180      }
181
182   private:
183      int32_t numSites_; ///< Number of sites in the MPS
184      int64_t physExtent_; ///< Extent for the physical index 
185      int64_t maxVirtualExtent_{0}; ///< The maximal extent allowed for the virtual dimension
186      cudaDataType_t typeData_; 
187      cutensornetComputeType_t typeCompute_;
188      
189      bool inited_{false};
190      std::vector<int32_t> physModes_; ///< A vector of length \p numSites_ storing the physical mode of each site.
191      std::vector<int32_t> virtualModes_; ///< A vector of length \p numSites_+1; For site i, virtualModes_[i] and virtualModes_[i+1] represents the left and right virtual mode.
192      std::vector<int64_t> extentsPerSite_; ///< A vector of length \p numSites_+1; For site i, extentsPerSite_[i] and extentsPerSite_[i+1] represents the left and right virtual extent. 
193
194      cutensornetHandle_t handle_{nullptr};
195      std::vector<cutensornetTensorDescriptor_t> descTensors_; /// A vector of length \p numSites_ storing the cutensornetTensorDescriptor_t for each site
196      cutensornetWorkspaceDescriptor_t workDesc_{nullptr};
197      cutensornetTensorSVDConfig_t svdConfig_{nullptr};
198      cutensornetTensorSVDInfo_t svdInfo_{nullptr};
199      cutensornetGateSplitAlgo_t gateAlgo_{CUTENSORNET_GATE_SPLIT_ALGO_DIRECT};
200      int32_t nextMode_{0}; /// The next mode label to use for labelling site tensors and gates.
201};

注意

有关所有方法的完整定义,请参阅此处的示例 此处

设置 MPS 模拟设置

接下来,在主函数中,我们需要为 MPS 模拟选择模拟设置(即,站点数、初始范围和数据类型)。

558   /***********************************
559   * Step 1: basic MPS setup
560   ************************************/
561
562   // setup the simulation setting for the MPS
563   typedef std::complex<double> complexType;
564   cudaDataType_t typeData = CUDA_C_64F;
565   cutensornetComputeType_t typeCompute = CUTENSORNET_COMPUTE_64F;
566   int32_t numSites = 16;
567   int64_t physExtent = 2;
568   int64_t maxVirtualExtent = 12;
569   const std::vector<int64_t> initialVirtualExtents(numSites-1, 1);  // starting MPS with shared extent of 1;
570
571   // initialize an MPSHelper to dynamically update tensor metadats   
572   MPSHelper mpsHelper(numSites, physExtent, maxVirtualExtent, initialVirtualExtents, typeData, typeCompute);
573   HANDLE_ERROR( mpsHelper.initialize() );

MPS 元数据和所有 cuTensorNet 库对象将由 MPSHelper 管理,而数据指针在主函数中显式管理。

分配内存和初始化数据

接下来,我们为 MPS 操作数和四个 2 量子比特门张量分配内存。每个 MPS 张量的最大张量大小可以通过 MPSHelper 类查询。MPS 张量被初始化为对应于 |00..000> 的状态,而门张量填充随机值。

576   /***********************************
577   * Step 2: data allocation 
578   ************************************/
579
580   // query largest tensor sizes for the MPS
581   const std::vector<size_t> maxElementsPerSite = mpsHelper.getMaxTensorElements();
582   std::vector<void*> tensors_h;
583   std::vector<void*> tensors_d;
584   for (int32_t i=0; i<numSites; i++)
585   {
586      size_t maxSize = sizeof(complexType) * maxElementsPerSite.at(i);
587      void* data_h = malloc(maxSize);
588      memset(data_h, 0, maxSize);
589      // initialize state to |0000..0000>
590      *(complexType*)(data_h) = complexType(1,0);  
591      void* data_d;
592      HANDLE_CUDA_ERROR( cudaMalloc(&data_d, maxSize) );
593      // data transfer from host to device
594      HANDLE_CUDA_ERROR( cudaMemcpy(data_d, data_h, maxSize, cudaMemcpyHostToDevice) );
595      tensors_h.push_back(data_h);
596      tensors_d.push_back(data_d);
597   }
598
599   // initialize 4 random gate tensors on host and copy them to device
600   const int32_t numRandomGates = 4;
601   const int64_t numGateElements = physExtent * physExtent * physExtent * physExtent;  // shape (2, 2, 2, 2)
602   size_t gateSize = sizeof(complexType) * numGateElements;
603   complexType* gates_h[numRandomGates];
604   void* gates_d[numRandomGates];
605   
606   for (int i=0; i<numRandomGates; i++)
607   {
608      gates_h[i] = (complexType*) malloc(gateSize);
609      HANDLE_CUDA_ERROR( cudaMalloc((void**) &gates_d[i], gateSize) );
610      for (int j=0; j<numGateElements; j++)
611      {
612         gates_h[i][j] = complexType(((float) rand())/RAND_MAX, ((float) rand())/RAND_MAX);
613      }
614      HANDLE_CUDA_ERROR( cudaMemcpy(gates_d[i], gates_h[i], gateSize, cudaMemcpyHostToDevice) );
615   }
616   

设置门分裂选项

然后,我们设置 SVD 截断参数和算法 cutensornetGateSplitAlgo_t 用于门分裂过程。

618   /*****************************************
619   * Step 3: setup options for gate operation
620   ******************************************/
621
622   double absCutoff = 1e-2;
623   double relCutoff = 1e-2;
624   cutensornetTensorSVDNormalization_t renorm = CUTENSORNET_TENSOR_SVD_NORMALIZATION_L2; // renormalize the L2 norm of truncated singular values to 1. 
625   cutensornetTensorSVDPartition_t partition = CUTENSORNET_TENSOR_SVD_PARTITION_UV_EQUAL; // equally partition the singular values onto U and V;
626   HANDLE_ERROR( mpsHelper.setSVDConfig(absCutoff, relCutoff, renorm, partition));
627
628   cutensornetGateSplitAlgo_t gateAlgo = CUTENSORNET_GATE_SPLIT_ALGO_REDUCED;
629   mpsHelper.setGateAlgorithm(gateAlgo);

查询和分配所需的工作区

一旦设置了所有模拟设置,我们就可以查询所需的工作区大小。在 MPSHelper 内部,所需的工作区大小是根据模拟中涉及的最大张量大小估算的。

632   /********************************************
633   * Step 4: workspace size query and allocation
634   *********************************************/
635
636   int64_t workspaceSize;
637   HANDLE_ERROR( mpsHelper.computeMaxWorkspaceSizes(&workspaceSize) );
638
639   void *work = nullptr;
640   std::cout << "Maximal workspace size required: " << workspaceSize << std::endl;
641   HANDLE_CUDA_ERROR( cudaMalloc(&work, workspaceSize) );
642
643   HANDLE_ERROR( mpsHelper.setWorkspace(work, workspaceSize));
644   

执行

在此阶段,我们可以通过迭代所有门张量来执行模拟。MPS 的所有元数据将在 MPSHelper 内部进行管理和更新。

646   /***********************************
647   * Step 5: execution
648   ************************************/
649
650   cudaStream_t stream;
651   HANDLE_CUDA_ERROR( cudaStreamCreate(&stream) );
652   uint32_t numLayers = 10; // 10 layers of gate
653   for (uint32_t i=0; i<numLayers; i++)
654   {
655      uint32_t start_site = i % 2;
656      std::cout << "Cycle " << i << ":" << std::endl;
657      bool verbose = (i == numLayers - 1);
658      for (uint32_t j=start_site; j<numSites-1; j=j+2)
659      {
660         uint32_t gateIdx = rand() % numRandomGates; // pick a random gate tensor
661         std::cout << "apply gate " << gateIdx << " on " << j << " and " << j+1<< std::endl;
662         void *dataA = tensors_d[j];
663         void *dataB = tensors_d[j+1];
664         void *dataG = gates_d[gateIdx];
665         HANDLE_ERROR( mpsHelper.applyGate(j, j+1, dataA, dataB, dataG, verbose, stream) );
666      }
667   }
668
669   HANDLE_CUDA_ERROR( cudaStreamSynchronize(stream) );

释放资源

模拟完成后,我们释放主函数中分配的所有数据指针。

672   /***********************************
673   * Step 6: free resources
674   ************************************/
675   
676   std::cout << "Free all resources" << std::endl;
677
678   for (int i=0; i<numRandomGates; i++)
679   {
680      free(gates_h[i]);
681      HANDLE_CUDA_ERROR( cudaFree(gates_d[i]) );
682   }
683
684   for (int32_t i=0; i<numSites; i++)
685   {
686      free(tensors_h.at(i));
687      HANDLE_CUDA_ERROR( cudaFree(tensors_d.at(i)) );
688   }
689
690   HANDLE_CUDA_ERROR( cudaFree(work) );
691   // The MPSHelper destructor will free all internal resources when out of scope
692   return 0;   
693}

MPSHelper 拥有的所有 cuTensorNet 库对象一旦超出作用域将被释放。