计算矩阵乘积态期望值¶
以下代码示例说明了如何定义张量网络状态,然后计算其矩阵乘积态 (MPS) 分解,然后计算 MPS 分解状态上张量网络算符的期望值。 完整的代码可以在 NVIDIA/cuQuantum 仓库中找到(此处)。
头文件和错误处理¶
7#include <cstdlib>
8#include <cstdio>
9#include <cassert>
10#include <complex>
11#include <vector>
12#include <bitset>
13#include <iostream>
14
15#include <cuda_runtime.h>
16#include <cutensornet.h>
17
18
19#define HANDLE_CUDA_ERROR(x) \
20{ const auto err = x; \
21 if( err != cudaSuccess ) \
22 { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
23};
24
25#define HANDLE_CUTN_ERROR(x) \
26{ const auto err = x; \
27 if( err != CUTENSORNET_STATUS_SUCCESS ) \
28 { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
29};
30
31
32int main()
33{
34 static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
35
36 constexpr std::size_t fp64size = sizeof(double);
定义张量网络状态¶
让我们定义一个对应于 16 量子比特量子电路的张量网络状态。
40 // Quantum state configuration
41 constexpr int32_t numQubits = 16; // number of qubits
42 const std::vector<int64_t> qubitDims(numQubits,2); // qubit dimensions
43 std::cout << "Quantum circuit: " << numQubits << " qubits\n";
初始化 cuTensorNet 库句柄¶
47 // Initialize the cuTensorNet library
48 HANDLE_CUDA_ERROR(cudaSetDevice(0));
49 cutensornetHandle_t cutnHandle;
50 HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
51 std::cout << "Initialized cuTensorNet library on GPU 0\n";
在 GPU 上定义量子门¶
55 // Define necessary quantum gate tensors in Host memory
56 const double invsq2 = 1.0 / std::sqrt(2.0);
57 // Hadamard gate
58 const std::vector<std::complex<double>> h_gateH {{invsq2, 0.0}, {invsq2, 0.0},
59 {invsq2, 0.0}, {-invsq2, 0.0}};
60 // Pauli X gate
61 const std::vector<std::complex<double>> h_gateX {{0.0, 0.0}, {1.0, 0.0},
62 {1.0, 0.0}, {0.0, 0.0}};
63 // Pauli Y gate
64 const std::vector<std::complex<double>> h_gateY {{0.0, 0.0}, {0.0, -1.0},
65 {0.0, 1.0}, {0.0, 0.0}};
66 // Pauli Z gate
67 const std::vector<std::complex<double>> h_gateZ {{1.0, 0.0}, {0.0, 0.0},
68 {0.0, 0.0}, {-1.0, 0.0}};
69 // CX gate
70 const std::vector<std::complex<double>> h_gateCX {{1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
71 {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
72 {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0},
73 {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}};
74
75 // Copy quantum gates to Device memory
76 void *d_gateH{nullptr}, *d_gateX{nullptr}, *d_gateY{nullptr}, *d_gateZ{nullptr}, *d_gateCX{nullptr};
77 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, 4 * (2 * fp64size)));
78 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateX, 4 * (2 * fp64size)));
79 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateY, 4 * (2 * fp64size)));
80 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateZ, 4 * (2 * fp64size)));
81 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateCX, 16 * (2 * fp64size)));
82 std::cout << "Allocated quantum gate memory on GPU\n";
83 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
84 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateX, h_gateX.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
85 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateY, h_gateY.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
86 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateZ, h_gateZ.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
87 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCX, h_gateCX.data(), 16 * (2 * fp64size), cudaMemcpyHostToDevice));
88 std::cout << "Copied quantum gates to GPU memory\n";
分配 MPS 张量¶
这里我们设置 MPS 张量的形状,并为它们的存储分配 GPU 内存。
92 // Determine the MPS representation and allocate buffers for the MPS tensors
93 const int64_t maxExtent = 2; // GHZ state can be exactly represented with max bond dimension of 2
94 std::vector<std::vector<int64_t>> extents;
95 std::vector<int64_t*> extentsPtr(numQubits);
96 std::vector<void*> d_mpsTensors(numQubits, nullptr);
97 for (int32_t i = 0; i < numQubits; i++) {
98 if (i == 0) { // left boundary MPS tensor
99 extents.push_back({2, maxExtent});
100 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
101 }
102 else if (i == numQubits-1) { // right boundary MPS tensor
103 extents.push_back({maxExtent, 2});
104 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
105 }
106 else { // middle MPS tensors
107 extents.push_back({maxExtent, 2, maxExtent});
108 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * maxExtent * 2 * fp64size));
109 }
110 extentsPtr[i] = extents[i].data();
111 }
在 GPU 上分配暂存缓冲区¶
115 // Query the free memory on Device
116 std::size_t freeSize{0}, totalSize{0};
117 HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
118 const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
119 void *d_scratch{nullptr};
120 HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
121 std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU\n";
创建纯张量网络状态¶
现在让我们为 16 量子比特量子电路创建一个纯张量网络状态。
125 // Create the initial quantum state
126 cutensornetState_t quantumState;
127 HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQubits, qubitDims.data(),
128 CUDA_C_64F, &quantumState));
129 std::cout << "Created the initial quantum state\n";
应用量子门¶
让我们通过应用相应的量子门来构造 GHZ 量子电路。
133 // Construct the final quantum circuit state (apply quantum gates) for the GHZ circuit
134 int64_t id;
135 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
136 d_gateH, nullptr, 1, 0, 1, &id));
137 for(int32_t i = 1; i < numQubits; ++i) {
138 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{i-1,i}}.data(),
139 d_gateCX, nullptr, 1, 0, 1, &id));
140 }
141 std::cout << "Applied quantum gates\n";
请求最终量子电路状态的 MPS 分解¶
这里我们表达了我们使用 MPS 分解来分解最终量子电路状态的意图。 提供的 MPS 张量的形状指的是 MPS 重整化过程中它们的最大尺寸限制。 最终 MPS 张量的实际计算形状可能更小。 这里尚未进行任何计算。
145 // Specify the final target MPS representation (use default fortran strides)
146 HANDLE_CUTN_ERROR(cutensornetStateFinalizeMPS(cutnHandle, quantumState,
147 CUTENSORNET_BOUNDARY_CONDITION_OPEN, extentsPtr.data(), /*strides=*/nullptr ));
配置 MPS 分解过程¶
在表达了我们执行最终量子电路状态的 MPS 分解的意图之后,我们还可以通过重置不同的选项(例如 SVD 算法)来配置 MPS 分解过程。
151 // Optional, set up the SVD method for truncation.
152 cutensornetTensorSVDAlgo_t algo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ;
153 HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, quantumState,
154 CUTENSORNET_STATE_CONFIG_MPS_SVD_ALGO, &algo, sizeof(algo)));
155 std::cout << "Configured the MPS computation\n";
准备 MPS 分解的计算¶
让我们创建一个工作区描述符并准备 MPS 分解的计算。
159 // Prepare the MPS computation and attach workspace
160 cutensornetWorkspaceDescriptor_t workDesc;
161 HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
162 std::cout << "Created the workspace descriptor\n";
163 HANDLE_CUTN_ERROR(cutensornetStatePrepare(cutnHandle, quantumState, scratchSize, workDesc, 0x0));
164 std::cout << "Prepared the computation of the quantum circuit state\n";
165 double flops {0.0};
166 HANDLE_CUTN_ERROR(cutensornetStateGetInfo(cutnHandle, quantumState,
167 CUTENSORNET_STATE_INFO_FLOPS, &flops, sizeof(flops)));
168 if(flops > 0.0) {
169 std::cout << "Total flop count = " << (flops/1e9) << " GFlop\n";
170 }else if(flops < 0.0) {
171 std::cout << "ERROR: Negative Flop count!\n";
172 std::abort();
173 }
174
175 int64_t worksize {0};
176 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
177 workDesc,
178 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
179 CUTENSORNET_MEMSPACE_DEVICE,
180 CUTENSORNET_WORKSPACE_SCRATCH,
181 &worksize));
182 std::cout << "Scratch GPU workspace size (bytes) for MPS computation = " << worksize << std::endl;
183 if(worksize <= scratchSize) {
184 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
185 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
186 }else{
187 std::cout << "ERROR: Insufficient workspace size on Device!\n";
188 std::abort();
189 }
190 std::cout << "Set the workspace buffer for MPS computation\n";
计算 MPS 分解¶
一旦 MPS 分解过程被配置和准备好,让我们计算最终量子电路状态的 MPS 分解。
194 // Execute MPS computation
195 HANDLE_CUTN_ERROR(cutensornetStateCompute(cutnHandle, quantumState,
196 workDesc, extentsPtr.data(), /*strides=*/nullptr, d_mpsTensors.data(), 0));
构造张量网络算符¶
现在让我们为 16 量子比特状态创建一个空的张量网络算符,然后向其附加三个组件,其中每个组件都是 Pauli 矩阵的直积,并按某个复系数缩放(如 Jordan-Wigner 表示法中所示)。
200 // Create an empty tensor network operator
201 cutensornetNetworkOperator_t hamiltonian;
202 HANDLE_CUTN_ERROR(cutensornetCreateNetworkOperator(cutnHandle, numQubits, qubitDims.data(), CUDA_C_64F, &hamiltonian));
203 // Append component (0.5 * Z1 * Z2) to the tensor network operator
204 {
205 const int32_t numModes[] = {1, 1}; // Z1 acts on 1 mode, Z2 acts on 1 mode
206 const int32_t modesZ1[] = {1}; // state modes Z1 acts on
207 const int32_t modesZ2[] = {2}; // state modes Z2 acts on
208 const int32_t * stateModes[] = {modesZ1, modesZ2}; // state modes (Z1 * Z2) acts on
209 const void * gateData[] = {d_gateZ, d_gateZ}; // GPU pointers to gate data
210 HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{0.5,0.0},
211 2, numModes, stateModes, NULL, gateData, &id));
212 }
213 // Append component (0.25 * Y3) to the tensor network operator
214 {
215 const int32_t numModes[] = {1}; // Y3 acts on 1 mode
216 const int32_t modesY3[] = {3}; // state modes Y3 acts on
217 const int32_t * stateModes[] = {modesY3}; // state modes (Y3) acts on
218 const void * gateData[] = {d_gateY}; // GPU pointers to gate data
219 HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{0.25,0.0},
220 1, numModes, stateModes, NULL, gateData, &id));
221 }
222 // Append component (0.13 * Y0 X2 Z3) to the tensor network operator
223 {
224 const int32_t numModes[] = {1, 1, 1}; // Y0 acts on 1 mode, X2 acts on 1 mode, Z3 acts on 1 mode
225 const int32_t modesY0[] = {0}; // state modes Y0 acts on
226 const int32_t modesX2[] = {2}; // state modes X2 acts on
227 const int32_t modesZ3[] = {3}; // state modes Z3 acts on
228 const int32_t * stateModes[] = {modesY0, modesX2, modesZ3}; // state modes (Y0 * X2 * Z3) acts on
229 const void * gateData[] = {d_gateY, d_gateX, d_gateZ}; // GPU pointers to gate data
230 HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{0.13,0.0},
231 3, numModes, stateModes, NULL, gateData, &id));
232 }
233 std::cout << "Constructed a tensor network operator: (0.5 * Z1 * Z2) + (0.25 * Y3) + (0.13 * Y0 * X2 * Z3)" << std::endl;
创建期望值对象¶
一旦量子电路和张量网络算符被构造,并且最终量子电路状态已使用 MPS 表示分解,让我们创建期望值对象,它将计算指定张量网络算符在指定量子电路的最终 MPS 分解状态上的期望值。
237 // Specify the quantum circuit expectation value
238 cutensornetStateExpectation_t expectation;
239 HANDLE_CUTN_ERROR(cutensornetCreateExpectation(cutnHandle, quantumState, hamiltonian, &expectation));
240 std::cout << "Created the specified quantum circuit expectation value\n";
配置期望值计算¶
现在我们可以通过设置张量网络收缩路径查找器要使用的超样本数量来配置期望值对象。
244 // Configure the computation of the specified quantum circuit expectation value
245 const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
246 HANDLE_CUTN_ERROR(cutensornetExpectationConfigure(cutnHandle, expectation,
247 CUTENSORNET_EXPECTATION_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));
准备期望值计算¶
让我们准备所需期望值的计算。
251 // Prepare the specified quantum circuit expectation value for computation
252 HANDLE_CUTN_ERROR(cutensornetExpectationPrepare(cutnHandle, expectation, scratchSize, workDesc, 0x0));
253 std::cout << "Prepared the specified quantum circuit expectation value\n";
254 flops = 0.0;
255 HANDLE_CUTN_ERROR(cutensornetExpectationGetInfo(cutnHandle, expectation,
256 CUTENSORNET_EXPECTATION_INFO_FLOPS, &flops, sizeof(flops)));
257 std::cout << "Total flop count = " << (flops/1e9) << " GFlop\n";
258 if(flops <= 0.0) {
259 std::cout << "ERROR: Invalid Flop count!\n";
260 std::abort();
261 }
设置工作区¶
现在我们可以设置所需的工作区缓冲区。
265 // Attach the workspace buffer
266 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
267 workDesc,
268 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
269 CUTENSORNET_MEMSPACE_DEVICE,
270 CUTENSORNET_WORKSPACE_SCRATCH,
271 &worksize));
272 std::cout << "Required scratch GPU workspace size (bytes) = " << worksize << std::endl;
273 if(worksize <= scratchSize) {
274 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
275 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
276 }else{
277 std::cout << "ERROR: Insufficient workspace size on Device!\n";
278 std::abort();
279 }
280 std::cout << "Set the workspace buffer\n";
计算请求的期望值¶
一旦一切都设置好,我们计算请求的期望值并打印它。 请注意,返回的期望值未归一化。 张量网络状态的 2-范数作为单独的参数返回。
284 // Compute the specified quantum circuit expectation value
285 std::complex<double> expectVal{0.0,0.0}, stateNorm2{0.0,0.0};
286 HANDLE_CUTN_ERROR(cutensornetExpectationCompute(cutnHandle, expectation, workDesc,
287 static_cast<void*>(&expectVal), static_cast<void*>(&stateNorm2), 0x0));
288 std::cout << "Computed the specified quantum circuit expectation value\n";
289 expectVal /= stateNorm2;
290 std::cout << "Expectation value = (" << expectVal.real() << ", " << expectVal.imag() << ")\n";
291 std::cout << "Squared 2-norm of the state = (" << stateNorm2.real() << ", " << stateNorm2.imag() << ")\n";
释放资源¶
295 // Destroy the workspace descriptor
296 HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
297 std::cout << "Destroyed the workspace descriptor\n";
298
299 // Destroy the quantum circuit expectation value
300 HANDLE_CUTN_ERROR(cutensornetDestroyExpectation(expectation));
301 std::cout << "Destroyed the quantum circuit state expectation value\n";
302
303 // Destroy the tensor network operator
304 HANDLE_CUTN_ERROR(cutensornetDestroyNetworkOperator(hamiltonian));
305 std::cout << "Destroyed the tensor network operator\n";
306
307 // Destroy the quantum circuit state
308 HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
309 std::cout << "Destroyed the quantum circuit state\n";
310
311 for (int32_t i = 0; i < numQubits; i++) {
312 HANDLE_CUDA_ERROR(cudaFree(d_mpsTensors[i]));
313 }
314 HANDLE_CUDA_ERROR(cudaFree(d_scratch));
315 HANDLE_CUDA_ERROR(cudaFree(d_gateCX));
316 HANDLE_CUDA_ERROR(cudaFree(d_gateZ));
317 HANDLE_CUDA_ERROR(cudaFree(d_gateY));
318 HANDLE_CUDA_ERROR(cudaFree(d_gateX));
319 HANDLE_CUDA_ERROR(cudaFree(d_gateH));
320 std::cout << "Freed memory on GPU\n";
321
322 // Finalize the cuTensorNet library
323 HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
324 std::cout << "Finalized the cuTensorNet library\n";
325
326 return 0;
327}