计算矩阵乘积态期望值

以下代码示例说明了如何定义张量网络状态,然后计算其矩阵乘积态 (MPS) 分解,然后计算 MPS 分解状态上张量网络算符的期望值。 完整的代码可以在 NVIDIA/cuQuantum 仓库中找到(此处)。

头文件和错误处理

 7#include <cstdlib>
 8#include <cstdio>
 9#include <cassert>
10#include <complex>
11#include <vector>
12#include <bitset>
13#include <iostream>
14
15#include <cuda_runtime.h>
16#include <cutensornet.h>
17
18
19#define HANDLE_CUDA_ERROR(x) \
20{ const auto err = x; \
21  if( err != cudaSuccess ) \
22  { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
23};
24
25#define HANDLE_CUTN_ERROR(x) \
26{ const auto err = x; \
27  if( err != CUTENSORNET_STATUS_SUCCESS ) \
28  { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
29};
30
31
32int main()
33{
34  static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
35
36  constexpr std::size_t fp64size = sizeof(double);

定义张量网络状态

让我们定义一个对应于 16 量子比特量子电路的张量网络状态。

40  // Quantum state configuration
41  constexpr int32_t numQubits = 16; // number of qubits
42  const std::vector<int64_t> qubitDims(numQubits,2); // qubit dimensions
43  std::cout << "Quantum circuit: " << numQubits << " qubits\n";

初始化 cuTensorNet 库句柄

47  // Initialize the cuTensorNet library
48  HANDLE_CUDA_ERROR(cudaSetDevice(0));
49  cutensornetHandle_t cutnHandle;
50  HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
51  std::cout << "Initialized cuTensorNet library on GPU 0\n";

在 GPU 上定义量子门

55  // Define necessary quantum gate tensors in Host memory
56  const double invsq2 = 1.0 / std::sqrt(2.0);
57  //  Hadamard gate
58  const std::vector<std::complex<double>> h_gateH {{invsq2, 0.0},  {invsq2, 0.0},
59                                                   {invsq2, 0.0}, {-invsq2, 0.0}};
60  //  Pauli X gate
61  const std::vector<std::complex<double>> h_gateX {{0.0, 0.0}, {1.0, 0.0},
62                                                   {1.0, 0.0}, {0.0, 0.0}};
63  //  Pauli Y gate
64  const std::vector<std::complex<double>> h_gateY {{0.0, 0.0}, {0.0, -1.0},
65                                                   {0.0, 1.0}, {0.0, 0.0}};
66  //  Pauli Z gate
67  const std::vector<std::complex<double>> h_gateZ {{1.0, 0.0}, {0.0, 0.0},
68                                                   {0.0, 0.0}, {-1.0, 0.0}};
69  //  CX gate
70  const std::vector<std::complex<double>> h_gateCX {{1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
71                                                    {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
72                                                    {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0},
73                                                    {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}};
74
75  // Copy quantum gates to Device memory
76  void *d_gateH{nullptr}, *d_gateX{nullptr}, *d_gateY{nullptr}, *d_gateZ{nullptr}, *d_gateCX{nullptr};
77  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, 4 * (2 * fp64size)));
78  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateX, 4 * (2 * fp64size)));
79  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateY, 4 * (2 * fp64size)));
80  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateZ, 4 * (2 * fp64size)));
81  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateCX, 16 * (2 * fp64size)));
82  std::cout << "Allocated quantum gate memory on GPU\n";
83  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
84  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateX, h_gateX.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
85  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateY, h_gateY.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
86  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateZ, h_gateZ.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
87  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCX, h_gateCX.data(), 16 * (2 * fp64size), cudaMemcpyHostToDevice));
88  std::cout << "Copied quantum gates to GPU memory\n";

分配 MPS 张量

这里我们设置 MPS 张量的形状,并为它们的存储分配 GPU 内存。

 92  // Determine the MPS representation and allocate buffers for the MPS tensors
 93  const int64_t maxExtent = 2; // GHZ state can be exactly represented with max bond dimension of 2
 94  std::vector<std::vector<int64_t>> extents;
 95  std::vector<int64_t*> extentsPtr(numQubits); 
 96  std::vector<void*> d_mpsTensors(numQubits, nullptr);
 97  for (int32_t i = 0; i < numQubits; i++) {
 98    if (i == 0) { // left boundary MPS tensor
 99      extents.push_back({2, maxExtent});
100      HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
101    } 
102    else if (i == numQubits-1) { // right boundary MPS tensor
103      extents.push_back({maxExtent, 2});
104      HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
105    }
106    else { // middle MPS tensors
107      extents.push_back({maxExtent, 2, maxExtent});
108      HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * maxExtent * 2 * fp64size));
109    }
110    extentsPtr[i] = extents[i].data();
111  }

在 GPU 上分配暂存缓冲区

115  // Query the free memory on Device
116  std::size_t freeSize{0}, totalSize{0};
117  HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
118  const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
119  void *d_scratch{nullptr};
120  HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
121  std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU\n";

创建纯张量网络状态

现在让我们为 16 量子比特量子电路创建一个纯张量网络状态。

125  // Create the initial quantum state
126  cutensornetState_t quantumState;
127  HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQubits, qubitDims.data(),
128                    CUDA_C_64F, &quantumState));
129  std::cout << "Created the initial quantum state\n";

应用量子门

让我们通过应用相应的量子门来构造 GHZ 量子电路。

133  // Construct the final quantum circuit state (apply quantum gates) for the GHZ circuit
134  int64_t id;
135  HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
136                    d_gateH, nullptr, 1, 0, 1, &id));
137  for(int32_t i = 1; i < numQubits; ++i) {
138    HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{i-1,i}}.data(),
139                      d_gateCX, nullptr, 1, 0, 1, &id));
140  }
141  std::cout << "Applied quantum gates\n";

请求最终量子电路状态的 MPS 分解

这里我们表达了我们使用 MPS 分解来分解最终量子电路状态的意图。 提供的 MPS 张量的形状指的是 MPS 重整化过程中它们的最大尺寸限制。 最终 MPS 张量的实际计算形状可能更小。 这里尚未进行任何计算。

145  // Specify the final target MPS representation (use default fortran strides)
146  HANDLE_CUTN_ERROR(cutensornetStateFinalizeMPS(cutnHandle, quantumState, 
147                    CUTENSORNET_BOUNDARY_CONDITION_OPEN, extentsPtr.data(), /*strides=*/nullptr ));

配置 MPS 分解过程

在表达了我们执行最终量子电路状态的 MPS 分解的意图之后,我们还可以通过重置不同的选项(例如 SVD 算法)来配置 MPS 分解过程。

151  // Optional, set up the SVD method for truncation.
152  cutensornetTensorSVDAlgo_t algo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ; 
153  HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, quantumState, 
154                    CUTENSORNET_STATE_CONFIG_MPS_SVD_ALGO, &algo, sizeof(algo)));
155  std::cout << "Configured the MPS computation\n";

准备 MPS 分解的计算

让我们创建一个工作区描述符并准备 MPS 分解的计算。

159  // Prepare the MPS computation and attach workspace
160  cutensornetWorkspaceDescriptor_t workDesc;
161  HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
162  std::cout << "Created the workspace descriptor\n";
163  HANDLE_CUTN_ERROR(cutensornetStatePrepare(cutnHandle, quantumState, scratchSize, workDesc, 0x0));
164  std::cout << "Prepared the computation of the quantum circuit state\n";
165  double flops {0.0};
166  HANDLE_CUTN_ERROR(cutensornetStateGetInfo(cutnHandle, quantumState,
167                    CUTENSORNET_STATE_INFO_FLOPS, &flops, sizeof(flops)));
168  if(flops > 0.0) {
169    std::cout << "Total flop count = " << (flops/1e9) << " GFlop\n";
170  }else if(flops < 0.0) {
171    std::cout << "ERROR: Negative Flop count!\n";
172    std::abort();
173  }
174
175  int64_t worksize {0};
176  HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
177                                                      workDesc,
178                                                      CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
179                                                      CUTENSORNET_MEMSPACE_DEVICE,
180                                                      CUTENSORNET_WORKSPACE_SCRATCH,
181                                                      &worksize));
182  std::cout << "Scratch GPU workspace size (bytes) for MPS computation = " << worksize << std::endl;
183  if(worksize <= scratchSize) {
184    HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
185                      CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
186  }else{
187    std::cout << "ERROR: Insufficient workspace size on Device!\n";
188    std::abort();
189  }
190  std::cout << "Set the workspace buffer for MPS computation\n";

计算 MPS 分解

一旦 MPS 分解过程被配置和准备好,让我们计算最终量子电路状态的 MPS 分解。

194  // Execute MPS computation
195  HANDLE_CUTN_ERROR(cutensornetStateCompute(cutnHandle, quantumState, 
196                    workDesc, extentsPtr.data(), /*strides=*/nullptr, d_mpsTensors.data(), 0));

构造张量网络算符

现在让我们为 16 量子比特状态创建一个空的张量网络算符,然后向其附加三个组件,其中每个组件都是 Pauli 矩阵的直积,并按某个复系数缩放(如 Jordan-Wigner 表示法中所示)。

200  // Create an empty tensor network operator
201  cutensornetNetworkOperator_t hamiltonian;
202  HANDLE_CUTN_ERROR(cutensornetCreateNetworkOperator(cutnHandle, numQubits, qubitDims.data(), CUDA_C_64F, &hamiltonian));
203  // Append component (0.5 * Z1 * Z2) to the tensor network operator
204  {
205    const int32_t numModes[] = {1, 1}; // Z1 acts on 1 mode, Z2 acts on 1 mode
206    const int32_t modesZ1[] = {1}; // state modes Z1 acts on
207    const int32_t modesZ2[] = {2}; // state modes Z2 acts on
208    const int32_t * stateModes[] = {modesZ1, modesZ2}; // state modes (Z1 * Z2) acts on
209    const void * gateData[] = {d_gateZ, d_gateZ}; // GPU pointers to gate data
210    HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{0.5,0.0},
211                      2, numModes, stateModes, NULL, gateData, &id));
212  }
213  // Append component (0.25 * Y3) to the tensor network operator
214  {
215    const int32_t numModes[] = {1}; // Y3 acts on 1 mode
216    const int32_t modesY3[] = {3}; // state modes Y3 acts on
217    const int32_t * stateModes[] = {modesY3}; // state modes (Y3) acts on
218    const void * gateData[] = {d_gateY}; // GPU pointers to gate data
219    HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{0.25,0.0},
220                      1, numModes, stateModes, NULL, gateData, &id));
221  }
222  // Append component (0.13 * Y0 X2 Z3) to the tensor network operator
223  {
224    const int32_t numModes[] = {1, 1, 1}; // Y0 acts on 1 mode, X2 acts on 1 mode, Z3 acts on 1 mode
225    const int32_t modesY0[] = {0}; // state modes Y0 acts on
226    const int32_t modesX2[] = {2}; // state modes X2 acts on
227    const int32_t modesZ3[] = {3}; // state modes Z3 acts on
228    const int32_t * stateModes[] = {modesY0, modesX2, modesZ3}; // state modes (Y0 * X2 * Z3) acts on
229    const void * gateData[] = {d_gateY, d_gateX, d_gateZ}; // GPU pointers to gate data
230    HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{0.13,0.0},
231                      3, numModes, stateModes, NULL, gateData, &id));
232  }
233  std::cout << "Constructed a tensor network operator: (0.5 * Z1 * Z2) + (0.25 * Y3) + (0.13 * Y0 * X2 * Z3)" << std::endl;

创建期望值对象

一旦量子电路和张量网络算符被构造,并且最终量子电路状态已使用 MPS 表示分解,让我们创建期望值对象,它将计算指定张量网络算符在指定量子电路的最终 MPS 分解状态上的期望值。

237  // Specify the quantum circuit expectation value
238  cutensornetStateExpectation_t expectation;
239  HANDLE_CUTN_ERROR(cutensornetCreateExpectation(cutnHandle, quantumState, hamiltonian, &expectation));
240  std::cout << "Created the specified quantum circuit expectation value\n";

配置期望值计算

现在我们可以通过设置张量网络收缩路径查找器要使用的超样本数量来配置期望值对象。

244  // Configure the computation of the specified quantum circuit expectation value
245  const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
246  HANDLE_CUTN_ERROR(cutensornetExpectationConfigure(cutnHandle, expectation,
247                    CUTENSORNET_EXPECTATION_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));

准备期望值计算

让我们准备所需期望值的计算。

251  // Prepare the specified quantum circuit expectation value for computation
252  HANDLE_CUTN_ERROR(cutensornetExpectationPrepare(cutnHandle, expectation, scratchSize, workDesc, 0x0));
253  std::cout << "Prepared the specified quantum circuit expectation value\n";
254  flops = 0.0;
255  HANDLE_CUTN_ERROR(cutensornetExpectationGetInfo(cutnHandle, expectation,
256                    CUTENSORNET_EXPECTATION_INFO_FLOPS, &flops, sizeof(flops)));
257  std::cout << "Total flop count = " << (flops/1e9) << " GFlop\n";
258  if(flops <= 0.0) {
259    std::cout << "ERROR: Invalid Flop count!\n";
260    std::abort();
261  }

设置工作区

现在我们可以设置所需的工作区缓冲区。

265  // Attach the workspace buffer
266  HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
267                                                      workDesc,
268                                                      CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
269                                                      CUTENSORNET_MEMSPACE_DEVICE,
270                                                      CUTENSORNET_WORKSPACE_SCRATCH,
271                                                      &worksize));
272  std::cout << "Required scratch GPU workspace size (bytes) = " << worksize << std::endl;
273  if(worksize <= scratchSize) {
274    HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
275                      CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
276  }else{
277    std::cout << "ERROR: Insufficient workspace size on Device!\n";
278    std::abort();
279  }
280  std::cout << "Set the workspace buffer\n";

计算请求的期望值

一旦一切都设置好,我们计算请求的期望值并打印它。 请注意,返回的期望值未归一化。 张量网络状态的 2-范数作为单独的参数返回。

284  // Compute the specified quantum circuit expectation value
285  std::complex<double> expectVal{0.0,0.0}, stateNorm2{0.0,0.0};
286  HANDLE_CUTN_ERROR(cutensornetExpectationCompute(cutnHandle, expectation, workDesc,
287                    static_cast<void*>(&expectVal), static_cast<void*>(&stateNorm2), 0x0));
288  std::cout << "Computed the specified quantum circuit expectation value\n";
289  expectVal /= stateNorm2;
290  std::cout << "Expectation value = (" << expectVal.real() << ", " << expectVal.imag() << ")\n";
291  std::cout << "Squared 2-norm of the state = (" << stateNorm2.real() << ", " << stateNorm2.imag() << ")\n";

释放资源

295  // Destroy the workspace descriptor
296  HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
297  std::cout << "Destroyed the workspace descriptor\n";
298
299  // Destroy the quantum circuit expectation value
300  HANDLE_CUTN_ERROR(cutensornetDestroyExpectation(expectation));
301  std::cout << "Destroyed the quantum circuit state expectation value\n";
302
303  // Destroy the tensor network operator
304  HANDLE_CUTN_ERROR(cutensornetDestroyNetworkOperator(hamiltonian));
305  std::cout << "Destroyed the tensor network operator\n";
306
307  // Destroy the quantum circuit state
308  HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
309  std::cout << "Destroyed the quantum circuit state\n";
310
311  for (int32_t i = 0; i < numQubits; i++) {
312    HANDLE_CUDA_ERROR(cudaFree(d_mpsTensors[i]));
313  }
314  HANDLE_CUDA_ERROR(cudaFree(d_scratch));
315  HANDLE_CUDA_ERROR(cudaFree(d_gateCX));
316  HANDLE_CUDA_ERROR(cudaFree(d_gateZ));
317  HANDLE_CUDA_ERROR(cudaFree(d_gateY));
318  HANDLE_CUDA_ERROR(cudaFree(d_gateX));
319  HANDLE_CUDA_ERROR(cudaFree(d_gateH));
320  std::cout << "Freed memory on GPU\n";
321
322  // Finalize the cuTensorNet library
323  HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
324  std::cout << "Finalized the cuTensorNet library\n";
325
326  return 0;
327}