计算矩阵乘积态 (MPS) 幅值¶
以下代码示例说明了如何定义张量网络状态,将其分解为矩阵乘积态 (MPS),然后计算分解后的 MPS 状态的幅值切片。 完整代码可以在 NVIDIA/cuQuantum 存储库中找到 (此处)。
头文件和错误处理¶
7#include <cstdlib>
8#include <cstdio>
9#include <cassert>
10#include <complex>
11#include <vector>
12#include <bitset>
13#include <iostream>
14
15#include <cuda_runtime.h>
16#include <cutensornet.h>
17
18
19#define HANDLE_CUDA_ERROR(x) \
20{ const auto err = x; \
21 if( err != cudaSuccess ) \
22 { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
23};
24
25#define HANDLE_CUTN_ERROR(x) \
26{ const auto err = x; \
27 if( err != CUTENSORNET_STATUS_SUCCESS ) \
28 { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
29};
30
31
32int main()
33{
34 static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
35
36 constexpr std::size_t fp64size = sizeof(double);
定义张量网络状态和所需的状态幅值切片¶
让我们定义一个对应于 6 量子比特量子电路的张量网络状态,并请求一个状态幅值切片,其中量子比特 0 和 1 固定为值 1。
40 // Quantum state configuration
41 constexpr int32_t numQubits = 6; // number of qubits
42 const std::vector<int64_t> qubitDims(numQubits,2); // qubit dimensions
43 const std::vector<int32_t> fixedModes({0,1}); // fixed modes in the output amplitude tensor (must be in acsending order)
44 const std::vector<int64_t> fixedValues({1,1}); // values of the fixed modes in the output amplitude tensor
45 const int32_t numFixedModes = fixedModes.size(); // number of fixed modes in the output amplitude tensor
46 std::cout << "Quantum circuit: " << numQubits << " qubits\n";
初始化 cuTensorNet 库句柄¶
50 // Initialize the cuTensorNet library
51 HANDLE_CUDA_ERROR(cudaSetDevice(0));
52 cutensornetHandle_t cutnHandle;
53 HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
54 std::cout << "Initialized cuTensorNet library on GPU 0\n";
在 GPU 上定义量子门¶
58 // Define necessary quantum gate tensors in Host memory
59 const double invsq2 = 1.0 / std::sqrt(2.0);
60 // Hadamard gate
61 const std::vector<std::complex<double>> h_gateH {{invsq2, 0.0}, {invsq2, 0.0},
62 {invsq2, 0.0}, {-invsq2, 0.0}};
63 // CX gate
64 const std::vector<std::complex<double>> h_gateCX {{1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
65 {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
66 {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0},
67 {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}};
68
69 // Copy quantum gates to Device memory
70 void *d_gateH{nullptr}, *d_gateCX{nullptr};
71 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, 4 * (2 * fp64size)));
72 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateCX, 16 * (2 * fp64size)));
73 std::cout << "Allocated quantum gate memory on GPU\n";
74 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
75 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCX, h_gateCX.data(), 16 * (2 * fp64size), cudaMemcpyHostToDevice));
76 std::cout << "Copied quantum gates to GPU memory\n";
分配 MPS 张量¶
这里我们设置 MPS 张量的形状,并为其存储分配 GPU 内存。
80 // Determine the MPS representation and allocate buffers for the MPS tensors
81 const int64_t maxExtent = 2; // GHZ state can be exactly represented with max bond dimension of 2
82 std::vector<std::vector<int64_t>> extents;
83 std::vector<int64_t*> extentsPtr(numQubits);
84 std::vector<void*> d_mpsTensors(numQubits, nullptr);
85 for (int32_t i = 0; i < numQubits; i++) {
86 if (i == 0) { // left boundary MPS tensor
87 extents.push_back({2, maxExtent});
88 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
89 }
90 else if (i == numQubits-1) { // right boundary MPS tensor
91 extents.push_back({maxExtent, 2});
92 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
93 }
94 else { // middle MPS tensors
95 extents.push_back({maxExtent, 2, maxExtent});
96 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * maxExtent * 2 * fp64size));
97 }
98 extentsPtr[i] = extents[i].data();
99 }
在 GPU 上分配幅值切片张量¶
这里我们为请求的幅值切片张量分配 GPU 内存。
103 // Allocate Device memory for the specified slice of the quantum circuit amplitudes tensor
104 void *d_amp{nullptr};
105 std::size_t ampSize = 1;
106 for(const auto & qubitDim: qubitDims) ampSize *= qubitDim; // all state modes (full size)
107 for(const auto & fixedMode: fixedModes) ampSize /= qubitDims[fixedMode]; // fixed state modes reduce the slice size
108 HANDLE_CUDA_ERROR(cudaMalloc(&d_amp, ampSize * (2 * fp64size)));
109 std::cout << "Allocated memory for the specified slice of the quantum circuit amplitude tensor of size "
110 << ampSize << " elements\n";
在 GPU 上分配暂存缓冲区¶
114 // Query the free memory on Device
115 std::size_t freeSize{0}, totalSize{0};
116 HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
117 const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
118 void *d_scratch{nullptr};
119 HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
120 std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU\n";
创建纯张量网络状态¶
现在让我们为 6 量子比特量子电路创建一个纯张量网络状态。
124 // Create the initial quantum state
125 cutensornetState_t quantumState;
126 HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQubits, qubitDims.data(),
127 CUDA_C_64F, &quantumState));
128 std::cout << "Created the initial quantum state\n";
应用量子门¶
让我们通过应用相应的量子门来构建 GHZ 量子电路。
132 // Construct the final quantum circuit state (apply quantum gates) for the GHZ circuit
133 int64_t id;
134 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
135 d_gateH, nullptr, 1, 0, 1, &id));
136 for(int32_t i = 1; i < numQubits; ++i) {
137 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{i-1,i}}.data(),
138 d_gateCX, nullptr, 1, 0, 1, &id));
139 }
140 std::cout << "Applied quantum gates\n";
请求最终量子电路状态的 MPS 分解¶
这里我们表达了使用 MPS 分解来分解最终量子电路状态的意图。 所提供的 MPS 张量的形状指的是 MPS 重整化过程中的最大尺寸限制。 最终 MPS 张量的实际计算形状可能更小。 这里尚未进行任何计算。
144 // Specify the final target MPS representation (use default fortran strides)
145 HANDLE_CUTN_ERROR(cutensornetStateFinalizeMPS(cutnHandle, quantumState,
146 CUTENSORNET_BOUNDARY_CONDITION_OPEN, extentsPtr.data(), /*strides=*/nullptr));
147 std::cout << "Requested the final MPS factorization of the quantum circuit state\n";
配置 MPS 分解过程¶
在表达了执行最终量子电路状态的 MPS 分解的意图之后,我们还可以通过重置不同的选项(例如 SVD 算法)来配置 MPS 分解过程。
151 // Optional, set up the SVD method for MPS truncation.
152 cutensornetTensorSVDAlgo_t algo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ;
153 HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, quantumState,
154 CUTENSORNET_STATE_CONFIG_MPS_SVD_ALGO, &algo, sizeof(algo)));
155 std::cout << "Configured the MPS factorization computation\n";
准备 MPS 分解的计算¶
让我们创建一个工作区描述符并准备 MPS 分解的计算。
159 // Prepare the MPS computation and attach workspace
160 cutensornetWorkspaceDescriptor_t workDesc;
161 HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
162 std::cout << "Created the workspace descriptor\n";
163 HANDLE_CUTN_ERROR(cutensornetStatePrepare(cutnHandle, quantumState, scratchSize, workDesc, 0x0));
164 std::cout << "Prepared the computation of the quantum circuit state\n";
165 double flops {0.0};
166 HANDLE_CUTN_ERROR(cutensornetStateGetInfo(cutnHandle, quantumState,
167 CUTENSORNET_STATE_INFO_FLOPS, &flops, sizeof(flops)));
168 if(flops > 0.0) {
169 std::cout << "Total flop count = " << (flops/1e9) << " GFlop\n";
170 }else if(flops < 0.0) {
171 std::cout << "ERROR: Negative Flop count!\n";
172 std::abort();
173 }
174
175 int64_t worksize {0};
176 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
177 workDesc,
178 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
179 CUTENSORNET_MEMSPACE_DEVICE,
180 CUTENSORNET_WORKSPACE_SCRATCH,
181 &worksize));
182 std::cout << "Scratch GPU workspace size (bytes) for MPS computation = " << worksize << std::endl;
183 if(worksize <= scratchSize) {
184 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
185 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
186 }else{
187 std::cout << "ERROR: Insufficient workspace size on Device!\n";
188 std::abort();
189 }
190 std::cout << "Set the workspace buffer for the MPS factorization computation\n";
计算 MPS 分解¶
一旦 MPS 分解过程已配置和准备就绪,让我们计算最终量子电路状态的 MPS 分解。
194 // Execute MPS computation
195 HANDLE_CUTN_ERROR(cutensornetStateCompute(cutnHandle, quantumState,
196 workDesc, extentsPtr.data(), /*strides=*/nullptr, d_mpsTensors.data(), 0));
197 std::cout << "Computed the MPS factorization\n";
创建状态幅值访问器¶
一旦计算出最终量子电路状态的分解 MPS 表示,让我们创建幅值访问器对象,它将计算请求的状态幅值切片。
201 // Specify the quantum circuit amplitudes accessor
202 cutensornetStateAccessor_t accessor;
203 HANDLE_CUTN_ERROR(cutensornetCreateAccessor(cutnHandle, quantumState, numFixedModes, fixedModes.data(),
204 nullptr, &accessor)); // using default strides
205 std::cout << "Created the specified quantum circuit amplitudes accessor\n";
配置状态幅值访问器¶
现在我们可以通过设置张量网络收缩路径查找器要使用的超样本数量来配置状态幅值访问器对象。
209 // Configure the computation of the slice of the specified quantum circuit amplitudes tensor
210 const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
211 HANDLE_CUTN_ERROR(cutensornetAccessorConfigure(cutnHandle, accessor,
212 CUTENSORNET_ACCESSOR_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));
准备状态幅值切片张量的计算¶
让我们准备状态幅值切片张量的计算。
216 // Prepare the computation of the specified slice of the quantum circuit amplitudes tensor
217 HANDLE_CUTN_ERROR(cutensornetAccessorPrepare(cutnHandle, accessor, scratchSize, workDesc, 0x0));
218 std::cout << "Prepared the computation of the specified slice of the quantum circuit amplitudes tensor\n";
219 flops = 0.0;
220 HANDLE_CUTN_ERROR(cutensornetAccessorGetInfo(cutnHandle, accessor,
221 CUTENSORNET_ACCESSOR_INFO_FLOPS, &flops, sizeof(flops)));
222 std::cout << "Total flop count = " << (flops/1e9) << " GFlop\n";
223 if(flops <= 0.0) {
224 std::cout << "ERROR: Invalid Flop count!\n";
225 std::abort();
226 }
设置工作区¶
现在我们可以设置所需的工作区缓冲区。
230 // Attach the workspace buffer
231 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
232 workDesc,
233 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
234 CUTENSORNET_MEMSPACE_DEVICE,
235 CUTENSORNET_WORKSPACE_SCRATCH,
236 &worksize));
237 std::cout << "Required scratch GPU workspace size (bytes) = " << worksize << std::endl;
238 if(worksize <= scratchSize) {
239 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
240 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
241 }else{
242 std::cout << "ERROR: Insufficient workspace size on Device!\n";
243 std::abort();
244 }
245 std::cout << "Set the workspace buffer\n";
计算指定的状态幅值切片¶
一旦一切都设置好,我们计算请求的状态幅值切片,将其复制回主机内存,并打印出来。
249 // Compute the specified slice of the quantum circuit amplitudes tensor
250 std::complex<double> stateNorm2{0.0,0.0};
251 HANDLE_CUTN_ERROR(cutensornetAccessorCompute(cutnHandle, accessor, fixedValues.data(),
252 workDesc, d_amp, static_cast<void*>(&stateNorm2), 0x0));
253 std::cout << "Computed the specified slice of the quantum circuit amplitudes tensor\n";
254 std::vector<std::complex<double>> h_amp(ampSize);
255 HANDLE_CUDA_ERROR(cudaMemcpy(h_amp.data(), d_amp, ampSize * (2 * fp64size), cudaMemcpyDeviceToHost));
256 std::cout << "Amplitudes slice for " << (numQubits - numFixedModes) << " qubits:\n";
257 for(std::size_t i = 0; i < ampSize; ++i) {
258 std::cout << " " << h_amp[i] << std::endl;
259 }
260 std::cout << "Squared 2-norm of the state = (" << stateNorm2.real() << ", " << stateNorm2.imag() << ")\n";
释放资源¶
264 // Destroy the workspace descriptor
265 HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
266 std::cout << "Destroyed the workspace descriptor\n";
267
268 // Destroy the quantum circuit amplitudes accessor
269 HANDLE_CUTN_ERROR(cutensornetDestroyAccessor(accessor));
270 std::cout << "Destroyed the quantum circuit amplitudes accessor\n";
271
272 // Destroy the quantum circuit state
273 HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
274 std::cout << "Destroyed the quantum circuit state\n";
275
276 for (int32_t i = 0; i < numQubits; i++) {
277 HANDLE_CUDA_ERROR(cudaFree(d_mpsTensors[i]));
278 }
279 HANDLE_CUDA_ERROR(cudaFree(d_scratch));
280 HANDLE_CUDA_ERROR(cudaFree(d_amp));
281 HANDLE_CUDA_ERROR(cudaFree(d_gateCX));
282 HANDLE_CUDA_ERROR(cudaFree(d_gateH));
283 std::cout << "Freed memory on GPU\n";
284
285 // Finalize the cuTensorNet library
286 HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
287 std::cout << "Finalized the cuTensorNet library\n";
288
289 return 0;
290}