矩阵乘积态采样

以下代码示例说明了如何为给定的量子电路定义张量网络状态,然后计算其矩阵乘积态 (MPS) 分解,最后对 MPS 分解状态进行采样。完整的代码可以在 NVIDIA/cuQuantum 存储库中找到(此处)。

头文件和错误处理

 7#include <cstdlib>
 8#include <cstdio>
 9#include <cassert>
10#include <complex>
11#include <vector>
12#include <iostream>
13
14#include <cuda_runtime.h>
15#include <cutensornet.h>
16
17#define HANDLE_CUDA_ERROR(x) \
18{ const auto err = x; \
19  if( err != cudaSuccess ) \
20  { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
21};
22
23#define HANDLE_CUTN_ERROR(x) \
24{ const auto err = x; \
25  if( err != CUTENSORNET_STATUS_SUCCESS ) \
26  { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
27};
28
29
30int main()
31{
32  static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
33
34  constexpr std::size_t fp64size = sizeof(double);

定义张量网络状态和要生成的所需输出样本数

让我们定义一个对应于 16 量子比特量子电路的张量网络状态,并请求为完整量子比特寄存器生成 100 个输出样本。

38  // Quantum state configuration
39  const int64_t numSamples = 100;
40  const int32_t numQubits = 16;
41  const std::vector<int64_t> qubitDims(numQubits, 2); // qubit size
42  std::cout << "Quantum circuit: " << numQubits << " qubits; " << numSamples << " samples\n";

初始化 cuTensorNet 库句柄

46  // Initialize the cuTensorNet library
47  HANDLE_CUDA_ERROR(cudaSetDevice(0));
48  cutensornetHandle_t cutnHandle;
49  HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
50  std::cout << "Initialized cuTensorNet library on GPU 0\n";

在 GPU 上定义量子门

54  // Define necessary quantum gate tensors in Host memory
55  const double invsq2 = 1.0 / std::sqrt(2.0);
56  //  Hadamard gate
57  const std::vector<std::complex<double>> h_gateH {{invsq2, 0.0},  {invsq2, 0.0},
58                                                   {invsq2, 0.0}, {-invsq2, 0.0}};
59  //  CX gate
60  const std::vector<std::complex<double>> h_gateCX {{1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
61                                                    {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
62                                                    {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0},
63                                                    {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}};
64
65  // Copy quantum gates to Device memory
66  void *d_gateH{nullptr}, *d_gateCX{nullptr};
67  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, 4 * (2 * fp64size)));
68  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateCX, 16 * (2 * fp64size)));
69  std::cout << "Allocated GPU memory for quantum gates\n";
70  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
71  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCX, h_gateCX.data(), 16 * (2 * fp64size), cudaMemcpyHostToDevice));
72  std::cout << "Copied quantum gates to GPU memory\n";

分配 MPS 张量

在这里,我们设置 MPS 张量的形状,并为其存储分配 GPU 内存。

76  // Determine the MPS representation and allocate buffer for the MPS tensors
77  const int64_t maxExtent = 2; // GHZ state can be exactly represented with max bond dimension of 2
78  std::vector<std::vector<int64_t>> extents;
79  std::vector<int64_t*> extentsPtr(numQubits); 
80  std::vector<void*> d_mpsTensors(numQubits, nullptr);
81  for (int32_t i = 0; i < numQubits; i++) {
82    if (i == 0) { // left boundary MPS tensor
83      extents.push_back({2, maxExtent});
84      HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
85    } 
86    else if (i == numQubits-1) { // right boundary MPS tensor
87      extents.push_back({maxExtent, 2});
88      HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
89    }
90    else { // middle MPS tensors
91      extents.push_back({maxExtent, 2, maxExtent});
92      HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * maxExtent * 2 * fp64size));
93    }
94    extentsPtr[i] = extents[i].data();
95  }

在 GPU 上分配暂存缓冲区

 99  // Query the free memory on Device
100  std::size_t freeSize {0}, totalSize {0};
101  HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
102  const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
103  void *d_scratch {nullptr};
104  HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
105  std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU: "
106            << "[" << d_scratch << ":" << (void*)(((char*)(d_scratch))  + scratchSize) << ")\n";

创建纯张量网络状态

现在让我们为 16 量子比特量子电路创建一个纯张量网络状态。

110  // Create the initial quantum state
111  cutensornetState_t quantumState;
112  HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQubits, qubitDims.data(),
113                    CUDA_C_64F, &quantumState));
114  std::cout << "Created the initial quantum state\n";

应用量子门

让我们通过应用相应的量子门来构建 GHZ 量子电路。

118  // Construct the quantum circuit state (apply quantum gates)
119  int64_t id;
120  HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
121                    d_gateH, nullptr, 1, 0, 1, &id));
122  for(int32_t i = 1; i < numQubits; ++i) {
123    HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{i-1,i}}.data(),
124                      d_gateCX, nullptr, 1, 0, 1, &id));
125  }
126  std::cout << "Applied quantum gates\n";

请求最终量子电路状态的 MPS 分解

在这里,我们表达了使用 MPS 分解来分解最终量子电路状态的意图。提供的 MPS 张量的形状是指在 MPS 重整化过程中的最大尺寸限制。最终 MPS 张量的实际计算形状可能更小。此处尚未进行任何计算。

130  // Specify the final target MPS representation (use default fortran strides)
131  HANDLE_CUTN_ERROR(cutensornetStateFinalizeMPS(cutnHandle, quantumState, 
132                    CUTENSORNET_BOUNDARY_CONDITION_OPEN, extentsPtr.data(), /*strides=*/nullptr ));

配置 MPS 分解过程

在表达了执行最终量子电路状态的 MPS 分解的意图之后,我们还可以通过重置不同的选项(例如 SVD 算法)来配置 MPS 分解过程。

136  // Optional, set up the SVD method for truncation.
137  cutensornetTensorSVDAlgo_t algo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ; 
138  HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, quantumState, 
139                    CUTENSORNET_STATE_CONFIG_MPS_SVD_ALGO, &algo, sizeof(algo)));
140  std::cout << "Configured the MPS computation\n";

准备 MPS 分解的计算

让我们创建一个工作区描述符并准备 MPS 分解的计算。

144  // Prepare the MPS computation and attach workspace
145  cutensornetWorkspaceDescriptor_t workDesc;
146  HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
147  std::cout << "Created the workspace descriptor\n";
148  HANDLE_CUTN_ERROR(cutensornetStatePrepare(cutnHandle, quantumState, scratchSize, workDesc, 0x0));
149  std::cout << "Prepared the computation of the quantum circuit state\n";
150  double flops {0.0};
151  HANDLE_CUTN_ERROR(cutensornetStateGetInfo(cutnHandle, quantumState,
152                    CUTENSORNET_STATE_INFO_FLOPS, &flops, sizeof(flops)));
153  if(flops > 0.0) {
154    std::cout << "Total flop count = " << (flops/1e9) << " GFlop\n";
155  }else if(flops < 0.0) {
156    std::cout << "ERROR: Negative Flop count!\n";
157    std::abort();
158  }
159
160  int64_t worksize {0};
161  HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
162                                                      workDesc,
163                                                      CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
164                                                      CUTENSORNET_MEMSPACE_DEVICE,
165                                                      CUTENSORNET_WORKSPACE_SCRATCH,
166                                                      &worksize));
167  std::cout << "Scratch GPU workspace size (bytes) for MPS computation = " << worksize << std::endl;
168  if(worksize <= scratchSize) {
169    HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
170                      CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
171  }else{
172    std::cout << "ERROR: Insufficient workspace size on Device!\n";
173    std::abort();
174  }
175  std::cout << "Set the workspace buffer for MPS computation\n";

计算 MPS 分解

一旦 MPS 分解过程已配置和准备就绪,让我们计算最终量子电路状态的 MPS 分解。

179  // Execute MPS computation
180  HANDLE_CUTN_ERROR(cutensornetStateCompute(cutnHandle, quantumState, 
181                    workDesc, extentsPtr.data(), /*strides=*/nullptr, d_mpsTensors.data(), 0));
182  std::cout << "Computed MPS factorization\n";

创建张量网络状态采样器

一旦使用 MPS 表示构建和分解了量子电路,让我们为完整量子比特寄存器(所有量子比特)创建张量网络状态采样器。

186  // Create the quantum circuit sampler
187  cutensornetStateSampler_t sampler;
188  HANDLE_CUTN_ERROR(cutensornetCreateSampler(cutnHandle, quantumState, numQubits, nullptr, &sampler));
189  std::cout << "Created the quantum circuit sampler\n";

配置张量网络状态采样器

现在,我们可以通过设置张量网络收缩路径查找器要使用的超样本数量来配置张量网络状态采样器。

193  // Configure the quantum circuit sampler
194  const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
195  HANDLE_CUTN_ERROR(cutensornetSamplerConfigure(cutnHandle, sampler,
196                    CUTENSORNET_SAMPLER_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));
197  std::cout << "Configured the quantum circuit sampler\n";

准备张量网络状态采样器

让我们准备张量网络状态采样器。

201  // Prepare the quantum circuit sampler
202  HANDLE_CUTN_ERROR(cutensornetSamplerPrepare(cutnHandle, sampler, scratchSize, workDesc, 0x0));
203  std::cout << "Prepared the quantum circuit state sampler\n";
204  flops = 0.0;
205  HANDLE_CUTN_ERROR(cutensornetSamplerGetInfo(cutnHandle, sampler,
206                    CUTENSORNET_SAMPLER_INFO_FLOPS, &flops, sizeof(flops)));
207  std::cout << "Total flop count per sample = " << (flops/1e9) << " GFlop\n";
208  if(flops <= 0.0) {
209    std::cout << "ERROR: Invalid Flop count!\n";
210    std::abort();
211  }

设置工作区

现在我们可以设置所需的工作区缓冲区。

215  // Attach the workspace buffer
216  HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
217                                                      workDesc,
218                                                      CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
219                                                      CUTENSORNET_MEMSPACE_DEVICE,
220                                                      CUTENSORNET_WORKSPACE_SCRATCH,
221                                                      &worksize));
222  std::cout << "Scratch GPU workspace size (bytes) for MPS Sampling = " << worksize << std::endl;
223  assert(worksize > 0);
224  if(worksize <= scratchSize) {
225    HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
226                      CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
227  }else{
228    std::cout << "ERROR: Insufficient workspace size on Device!\n";
229    std::abort();
230  }
231  std::cout << "Set the workspace buffer\n";

执行最终量子电路状态的采样

一旦一切都设置好,我们就可以执行量子电路状态的采样并打印输出样本。

235  // Sample the quantum circuit state
236  std::vector<int64_t> samples(numQubits * numSamples); // samples[SampleId][QubitId] reside in Host memory
237  HANDLE_CUTN_ERROR(cutensornetSamplerSample(cutnHandle, sampler, numSamples, workDesc, samples.data(), 0));
238  std::cout << "Performed quantum circuit state sampling\n";
239  std::cout << "Bit-string samples:\n";
240  for(int64_t i = 0; i < numSamples; ++i) {
241    for(int64_t j = 0; j < numQubits; ++j) std::cout << " " << samples[i * numQubits + j];
242    std::cout << std::endl;
243  }

释放资源

247  // Destroy the workspace descriptor
248  HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
249  std::cout << "Destroyed the workspace descriptor\n";
250
251  // Destroy the quantum circuit sampler
252  HANDLE_CUTN_ERROR(cutensornetDestroySampler(sampler));
253  std::cout << "Destroyed the quantum circuit state sampler\n";
254
255  // Destroy the quantum circuit state
256  HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
257  std::cout << "Destroyed the quantum circuit state\n";
258
259  for (int32_t i = 0; i < numQubits; i++) {
260    HANDLE_CUDA_ERROR(cudaFree(d_mpsTensors[i]));
261  }
262  HANDLE_CUDA_ERROR(cudaFree(d_scratch));
263  HANDLE_CUDA_ERROR(cudaFree(d_gateCX));
264  HANDLE_CUDA_ERROR(cudaFree(d_gateH));
265  std::cout << "Freed memory on GPU\n";
266
267  // Finalize the cuTensorNet library
268  HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
269  std::cout << "Finalized the cuTensorNet library\n";
270
271  return 0;
272}