以下代码示例说明了如何集成 cuTensorNet 功能来执行基本的 MPS 模拟。该工作流程封装在 MPSHelper
类中。完整代码可以在 NVIDIA/cuQuantum 存储库中找到 (此处)。
定义 MPSHelper 类¶
我们首先定义一个 MPSHelper
类,以跟踪所有物理键和虚拟键的模式和范围。模拟设置也存储在此类中。一旦超出作用域,此类拥有的所有资源都将被释放。
75class MPSHelper
76{
77 public:
78 /**
79 * \brief Construct an MPSHelper object for gate splitting algorithm.
80 * i j k
81 * -------A-------B------- i j k
82 * p| |q -------> -------A`-------B`-------
83 * GGGGGGGGG r| |s
84 * r| |s
85 * \param[in] numSites The number of sites in the MPS
86 * \param[in] physExtent The extent for the physical mode where the gate tensors are acted on.
87 * \param[in] maxVirtualExtent The maximal extent allowed for the virtual mode shared between adjacent MPS tensors.
88 * \param[in] initialVirtualExtents A vector of size \p numSites-1 where the ith element denotes the extent of the shared mode for site i and site i+1 in the beginning of the simulation.
89 * \param[in] typeData The data type for all tensors and gates
90 * \param[in] typeCompute The compute type for all gate splitting process
91 */
92 MPSHelper(int32_t numSites,
93 int64_t physExtent,
94 int64_t maxVirtualExtent,
95 const std::vector<int64_t>& initialVirtualExtents,
96 cudaDataType_t typeData,
97 cutensornetComputeType_t typeCompute);
98
99 /**
100 * \brief Initialize the MPS metadata and cutensornet library.
101 */
102 cutensornetStatus_t initialize();
103
104 /**
105 * \brief Compute the maximal number of elements for each site.
106 */
107 std::vector<size_t> getMaxTensorElements() const;
108
109 /**
110 * \brief Update the SVD truncation setting.
111 * \param[in] absCutoff The cutoff value for absolute singular value truncation.
112 * \param[in] relCutoff The cutoff value for relative singular value truncation.
113 * \param[in] renorm The option for renormalization of the truncated singular values.
114 * \param[in] partition The option for partitioning of the singular values.
115 */
116 cutensornetStatus_t setSVDConfig(double absCutoff,
117 double relCutoff,
118 cutensornetTensorSVDNormalization_t renorm,
119 cutensornetTensorSVDPartition_t partition);
120
121 /**
122 * \brief Update the algorithm to use for the gating process.
123 * \param[in] gateAlgo The gate algorithm to use for MPS simulation.
124 */
125 void setGateAlgorithm(cutensornetGateSplitAlgo_t gateAlgo) {gateAlgo_ = gateAlgo;}
126
127 /**
128 * \brief Compute the maximal workspace needed for MPS gating algorithm.
129 * \param[out] workspaceSize The required workspace size on the device.
130 */
131 cutensornetStatus_t computeMaxWorkspaceSizes(int64_t* workspaceSize);
132
133 /**
134 * \brief Compute the maximal workspace needed for MPS gating algorithm.
135 * \param[in] work Pointer to the allocated workspace.
136 * \param[in] workspaceSize The required workspace size on the device.
137 */
138 cutensornetStatus_t setWorkspace(void* work, int64_t workspaceSize);
139
140 /**
141 * \brief In-place execution of the apply gate algorithm on \p siteA and \p siteB.
142 * \param[in] siteA The first site where the gate is applied to.
143 * \param[in] siteB The second site where the gate is applied to. Must be adjacent to \p siteA.
144 * \param[in,out] dataInA The data for the MPS tensor at \p siteA. The input will be overwritten with output mps tensor data.
145 * \param[in,out] dataInB The data for the MPS tensor at \p siteB. The input will be overwritten with output mps tensor data.
146 * \param[in] dataInG The input data for the gate tensor.
147 * \param[in] verbose Whether to print out the runtime information regarding truncation.
148 * \param[in] stream The CUDA stream on which the computation is performed.
149 */
150 cutensornetStatus_t applyGate(uint32_t siteA,
151 uint32_t siteB,
152 void* dataInA,
153 void* dataInB,
154 const void* dataInG,
155 bool verbose,
156 cudaStream_t stream);
157
158 /**
159 * \brief Free all the tensor descriptors in mpsHelper.
160 */
161 ~MPSHelper()
162 {
163 if (inited_)
164 {
165 for (auto& descTensor: descTensors_)
166 {
167 cutensornetDestroyTensorDescriptor(descTensor);
168 }
169 cutensornetDestroy(handle_);
170 cutensornetDestroyWorkspaceDescriptor(workDesc_);
171 }
172 if (svdConfig_ != nullptr)
173 {
174 cutensornetDestroyTensorSVDConfig(svdConfig_);
175 }
176 if (svdInfo_ != nullptr)
177 {
178 cutensornetDestroyTensorSVDInfo(svdInfo_);
179 }
180 }
181
182 private:
183 int32_t numSites_; ///< Number of sites in the MPS
184 int64_t physExtent_; ///< Extent for the physical index
185 int64_t maxVirtualExtent_{0}; ///< The maximal extent allowed for the virtual dimension
186 cudaDataType_t typeData_;
187 cutensornetComputeType_t typeCompute_;
188
189 bool inited_{false};
190 std::vector<int32_t> physModes_; ///< A vector of length \p numSites_ storing the physical mode of each site.
191 std::vector<int32_t> virtualModes_; ///< A vector of length \p numSites_+1; For site i, virtualModes_[i] and virtualModes_[i+1] represents the left and right virtual mode.
192 std::vector<int64_t> extentsPerSite_; ///< A vector of length \p numSites_+1; For site i, extentsPerSite_[i] and extentsPerSite_[i+1] represents the left and right virtual extent.
193
194 cutensornetHandle_t handle_{nullptr};
195 std::vector<cutensornetTensorDescriptor_t> descTensors_; /// A vector of length \p numSites_ storing the cutensornetTensorDescriptor_t for each site
196 cutensornetWorkspaceDescriptor_t workDesc_{nullptr};
197 cutensornetTensorSVDConfig_t svdConfig_{nullptr};
198 cutensornetTensorSVDInfo_t svdInfo_{nullptr};
199 cutensornetGateSplitAlgo_t gateAlgo_{CUTENSORNET_GATE_SPLIT_ALGO_DIRECT};
200 int32_t nextMode_{0}; /// The next mode label to use for labelling site tensors and gates.
201};
注意
有关所有方法的完整定义,请参阅此处的示例 此处。
设置 MPS 模拟设置¶
接下来,在主函数中,我们需要为 MPS 模拟选择模拟设置(即,站点数、初始范围和数据类型)。
558 /***********************************
559 * Step 1: basic MPS setup
560 ************************************/
561
562 // setup the simulation setting for the MPS
563 typedef std::complex<double> complexType;
564 cudaDataType_t typeData = CUDA_C_64F;
565 cutensornetComputeType_t typeCompute = CUTENSORNET_COMPUTE_64F;
566 int32_t numSites = 16;
567 int64_t physExtent = 2;
568 int64_t maxVirtualExtent = 12;
569 const std::vector<int64_t> initialVirtualExtents(numSites-1, 1); // starting MPS with shared extent of 1;
570
571 // initialize an MPSHelper to dynamically update tensor metadats
572 MPSHelper mpsHelper(numSites, physExtent, maxVirtualExtent, initialVirtualExtents, typeData, typeCompute);
573 HANDLE_ERROR( mpsHelper.initialize() );
MPS 元数据和所有 cuTensorNet 库对象将由 MPSHelper
管理,而数据指针在主函数中显式管理。
分配内存和初始化数据¶
接下来,我们为 MPS 操作数和四个 2 量子比特门张量分配内存。每个 MPS 张量的最大张量大小可以通过 MPSHelper
类查询。MPS 张量被初始化为对应于 |00..000>
的状态,而门张量填充随机值。
576 /***********************************
577 * Step 2: data allocation
578 ************************************/
579
580 // query largest tensor sizes for the MPS
581 const std::vector<size_t> maxElementsPerSite = mpsHelper.getMaxTensorElements();
582 std::vector<void*> tensors_h;
583 std::vector<void*> tensors_d;
584 for (int32_t i=0; i<numSites; i++)
585 {
586 size_t maxSize = sizeof(complexType) * maxElementsPerSite.at(i);
587 void* data_h = malloc(maxSize);
588 memset(data_h, 0, maxSize);
589 // initialize state to |0000..0000>
590 *(complexType*)(data_h) = complexType(1,0);
591 void* data_d;
592 HANDLE_CUDA_ERROR( cudaMalloc(&data_d, maxSize) );
593 // data transfer from host to device
594 HANDLE_CUDA_ERROR( cudaMemcpy(data_d, data_h, maxSize, cudaMemcpyHostToDevice) );
595 tensors_h.push_back(data_h);
596 tensors_d.push_back(data_d);
597 }
598
599 // initialize 4 random gate tensors on host and copy them to device
600 const int32_t numRandomGates = 4;
601 const int64_t numGateElements = physExtent * physExtent * physExtent * physExtent; // shape (2, 2, 2, 2)
602 size_t gateSize = sizeof(complexType) * numGateElements;
603 complexType* gates_h[numRandomGates];
604 void* gates_d[numRandomGates];
605
606 for (int i=0; i<numRandomGates; i++)
607 {
608 gates_h[i] = (complexType*) malloc(gateSize);
609 HANDLE_CUDA_ERROR( cudaMalloc((void**) &gates_d[i], gateSize) );
610 for (int j=0; j<numGateElements; j++)
611 {
612 gates_h[i][j] = complexType(((float) rand())/RAND_MAX, ((float) rand())/RAND_MAX);
613 }
614 HANDLE_CUDA_ERROR( cudaMemcpy(gates_d[i], gates_h[i], gateSize, cudaMemcpyHostToDevice) );
615 }
616
设置门分裂选项¶
然后,我们设置 SVD 截断参数和算法 cutensornetGateSplitAlgo_t
用于门分裂过程。
618 /*****************************************
619 * Step 3: setup options for gate operation
620 ******************************************/
621
622 double absCutoff = 1e-2;
623 double relCutoff = 1e-2;
624 cutensornetTensorSVDNormalization_t renorm = CUTENSORNET_TENSOR_SVD_NORMALIZATION_L2; // renormalize the L2 norm of truncated singular values to 1.
625 cutensornetTensorSVDPartition_t partition = CUTENSORNET_TENSOR_SVD_PARTITION_UV_EQUAL; // equally partition the singular values onto U and V;
626 HANDLE_ERROR( mpsHelper.setSVDConfig(absCutoff, relCutoff, renorm, partition));
627
628 cutensornetGateSplitAlgo_t gateAlgo = CUTENSORNET_GATE_SPLIT_ALGO_REDUCED;
629 mpsHelper.setGateAlgorithm(gateAlgo);
查询和分配所需的工作区¶
一旦设置了所有模拟设置,我们就可以查询所需的工作区大小。在 MPSHelper 内部,所需的工作区大小是根据模拟中涉及的最大张量大小估算的。
632 /********************************************
633 * Step 4: workspace size query and allocation
634 *********************************************/
635
636 int64_t workspaceSize;
637 HANDLE_ERROR( mpsHelper.computeMaxWorkspaceSizes(&workspaceSize) );
638
639 void *work = nullptr;
640 std::cout << "Maximal workspace size required: " << workspaceSize << std::endl;
641 HANDLE_CUDA_ERROR( cudaMalloc(&work, workspaceSize) );
642
643 HANDLE_ERROR( mpsHelper.setWorkspace(work, workspaceSize));
644
执行¶
在此阶段,我们可以通过迭代所有门张量来执行模拟。MPS 的所有元数据将在 MPSHelper
内部进行管理和更新。
646 /***********************************
647 * Step 5: execution
648 ************************************/
649
650 cudaStream_t stream;
651 HANDLE_CUDA_ERROR( cudaStreamCreate(&stream) );
652 uint32_t numLayers = 10; // 10 layers of gate
653 for (uint32_t i=0; i<numLayers; i++)
654 {
655 uint32_t start_site = i % 2;
656 std::cout << "Cycle " << i << ":" << std::endl;
657 bool verbose = (i == numLayers - 1);
658 for (uint32_t j=start_site; j<numSites-1; j=j+2)
659 {
660 uint32_t gateIdx = rand() % numRandomGates; // pick a random gate tensor
661 std::cout << "apply gate " << gateIdx << " on " << j << " and " << j+1<< std::endl;
662 void *dataA = tensors_d[j];
663 void *dataB = tensors_d[j+1];
664 void *dataG = gates_d[gateIdx];
665 HANDLE_ERROR( mpsHelper.applyGate(j, j+1, dataA, dataB, dataG, verbose, stream) );
666 }
667 }
668
669 HANDLE_CUDA_ERROR( cudaStreamSynchronize(stream) );
释放资源¶
模拟完成后,我们释放主函数中分配的所有数据指针。
672 /***********************************
673 * Step 6: free resources
674 ************************************/
675
676 std::cout << "Free all resources" << std::endl;
677
678 for (int i=0; i<numRandomGates; i++)
679 {
680 free(gates_h[i]);
681 HANDLE_CUDA_ERROR( cudaFree(gates_d[i]) );
682 }
683
684 for (int32_t i=0; i<numSites; i++)
685 {
686 free(tensors_h.at(i));
687 HANDLE_CUDA_ERROR( cudaFree(tensors_d.at(i)) );
688 }
689
690 HANDLE_CUDA_ERROR( cudaFree(work) );
691 // The MPSHelper destructor will free all internal resources when out of scope
692 return 0;
693}
MPSHelper 拥有的所有 cuTensorNet 库对象一旦超出作用域将被释放。