矩阵乘积态采样 (QFT 电路)

以下代码示例说明了如何为给定的量子线路 (QFT) 定义张量网络状态,然后计算其矩阵乘积态 (MPS) 分解,最后对 MPS 分解状态进行采样。完整的代码可以在 NVIDIA/cuQuantum 仓库中找到 (此处)。

头文件和错误处理

 7#include <cstdlib>
 8#include <cstdio>
 9#include <cassert>
10#include <complex>
11#include <cmath>
12#include <vector>
13#include <iostream>
14
15#include <cuda_runtime.h>
16#include <cutensornet.h>
17
18#define HANDLE_CUDA_ERROR(x) \
19{ const auto err = x; \
20  if( err != cudaSuccess ) \
21  { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
22};
23
24#define HANDLE_CUTN_ERROR(x) \
25{ const auto err = x; \
26  if( err != CUTENSORNET_STATUS_SUCCESS ) \
27  { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
28};
29
30
31int main()
32{
33  static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
34
35  constexpr std::size_t fp64size = sizeof(double);

定义张量网络状态和要生成的所需输出样本数

让我们为具有给定量子比特数的量子线路定义一个张量网络状态,并请求为完整的量子比特寄存器生成给定数量的输出样本。

39  // Quantum state configuration
40  const int64_t numSamples = 128; // number of samples to generate
41  const int32_t numQubits = 16;   // number of qubits
42  const std::vector<int64_t> qubitDims(numQubits, 2); // qubit dimensions
43  std::cout << "Quantum circuit: " << numQubits << " qubits; " << numSamples << " samples\n";

初始化 cuTensorNet 库句柄

47  // Initialize the cuTensorNet library
48  HANDLE_CUDA_ERROR(cudaSetDevice(0));
49  cutensornetHandle_t cutnHandle;
50  HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
51  std::cout << "Initialized cuTensorNet library on GPU 0\n";

在 GPU 内存中定义量子门

55  // Define necessary quantum gate tensors in Host memory
56  const double invsq2 = 1.0 / std::sqrt(2.0);
57  const double pi = 3.14159265358979323846;
58  using GateData = std::vector<std::complex<double>>;
59  //  CR(k) gate generator
60  auto cRGate = [&pi] (int32_t k) {
61    const double phi = pi * 2.0 / std::exp2(k);
62    const GateData cr {{1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
63                       {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
64                       {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0},
65                       {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {std::cos(phi), std::sin(phi)}};
66    return cr;
67  };
68  //  Hadamard gate
69  const GateData h_gateH {{invsq2, 0.0}, {invsq2, 0.0},
70                          {invsq2, 0.0}, {-invsq2, 0.0}};
71  //  CR(k) gates (controlled rotations)
72  std::vector<GateData> h_gateCR(numQubits);
73  for(int32_t k = 0; k < numQubits; ++k) {
74    h_gateCR[k] = cRGate(k+1);
75  }
76
77  // Copy quantum gates into Device memory
78  void *d_gateH {nullptr};
79  std::vector<void*> d_gateCR(numQubits, nullptr);
80  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, 4 * (2 * fp64size)));
81  for(int32_t k = 0; k < numQubits; ++k) {
82    HANDLE_CUDA_ERROR(cudaMalloc(&(d_gateCR[k]), 16 * (2 * fp64size)));
83  }
84  std::cout << "Allocated GPU memory for quantum gates\n";
85  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
86  for(int32_t k = 0; k < numQubits; ++k) {
87    HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCR[k], h_gateCR[k].data(), 16 * (2 * fp64size), cudaMemcpyHostToDevice));
88  }
89  std::cout << "Copied quantum gates into GPU memory\n";

在 GPU 内存中分配 MPS 张量

在这里,我们设置 MPS 张量的形状并为其存储分配 GPU 内存。

 93  // Define the MPS representation and allocate memory buffers for the MPS tensors
 94  const int64_t maxExtent = 8; // max bond dimension (level of low-rank MPS approximation)
 95  std::vector<std::vector<int64_t>> extents;
 96  std::vector<int64_t*> extentsPtr(numQubits);
 97  std::vector<void*> d_mpsTensors(numQubits, nullptr);
 98  for (int32_t i = 0; i < numQubits; ++i) {
 99    if (i == 0) { // left boundary MPS tensor
100      extents.push_back({2, maxExtent});
101      HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * (2 * fp64size)));
102    } 
103    else if (i == numQubits-1) { // right boundary MPS tensor
104      extents.push_back({maxExtent, 2});
105      HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * (2 * fp64size)));
106    }
107    else { // middle MPS tensors
108      extents.push_back({maxExtent, 2, maxExtent});
109      HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * maxExtent * (2 * fp64size)));
110    }
111    extentsPtr[i] = extents[i].data();
112  }
113  std::cout << "Allocated GPU memory for MPS tensors\n";

在 GPU 上分配暂存缓冲区

117  // Query free memory on Device and allocate a scratch buffer
118  std::size_t freeSize {0}, totalSize {0};
119  HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
120  const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
121  void *d_scratch {nullptr};
122  HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
123  std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU: "
124            << "[" << d_scratch << ":" << (void*)(((char*)(d_scratch))  + scratchSize) << ")\n";

创建纯张量网络状态

现在让我们为具有给定量子比特数的量子线路创建一个纯张量网络状态。

128  // Create the initial quantum state
129  cutensornetState_t quantumState;
130  HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQubits, qubitDims.data(),
131                    CUDA_C_64F, &quantumState));
132  std::cout << "Created the initial (vacuum) quantum state\n";

应用量子门

让我们构建没有位反转的 QFT 量子线路,方法是应用相应的量子门。

136  // Construct the QFT quantum circuit state (apply quantum gates)
137  //  Example of a QFT circuit with 3 qubits (with no bit reversal):
138  //  Q0--H--CR2--CR3-------------
139  //         |    |
140  //  Q1-----o----|----H--CR2-----
141  //              |       |
142  //  Q2----------o-------o----H--
143  int64_t id;
144  for(int32_t i = 0; i < numQubits; ++i) {
145    HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{i}}.data(),
146                      d_gateH, nullptr, 1, 0, 1, &id));
147    for(int32_t j = (i+1); j < numQubits; ++j) {
148      HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{j,i}}.data(),
149                        d_gateCR[j-i], nullptr, 1, 0, 1, &id));
150    }
151  }
152  std::cout << "Applied quantum gates\n";

请求最终量子线路状态的 MPS 分解

在这里,我们表达了使用 MPS 分解来分解最终量子线路状态的意图。提供的 MPS 张量的形状(模式范围)是指 MPS 重整化过程中它们的最大尺寸限制。最终 MPS 张量的实际计算形状(模式范围)可能小于其限制。请注意,此处尚未进行任何计算。

156  // Specify the target MPS representation (use default strides)
157  HANDLE_CUTN_ERROR(cutensornetStateFinalizeMPS(cutnHandle, quantumState, 
158                    CUTENSORNET_BOUNDARY_CONDITION_OPEN, extentsPtr.data(), /*strides=*/nullptr ));
159  std::cout << "Finalized the form of the MPS representation\n";

配置 MPS 分解过程

在表达了执行最终量子线路状态的 MPS 分解的意图之后,我们还可以通过重置不同的选项(例如,SVD 算法)来配置 MPS 分解过程。

163  // Set up the SVD method for bonds truncation (optional)
164  cutensornetTensorSVDAlgo_t algo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ; 
165  HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, quantumState, 
166                    CUTENSORNET_STATE_MPS_SVD_CONFIG_ALGO, &algo, sizeof(algo)));
167  std::cout << "Configured the MPS computation\n";

准备 MPS 分解的计算

让我们创建一个工作区描述符并准备最终量子线路状态的 MPS 分解的计算。

171  // Prepare the MPS computation and attach a workspace buffer
172  cutensornetWorkspaceDescriptor_t workDesc;
173  HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
174  std::cout << "Created the workspace descriptor\n";
175  HANDLE_CUTN_ERROR(cutensornetStatePrepare(cutnHandle, quantumState, scratchSize, workDesc, 0x0));
176  int64_t worksize {0};
177  HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
178                                                      workDesc,
179                                                      CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
180                                                      CUTENSORNET_MEMSPACE_DEVICE,
181                                                      CUTENSORNET_WORKSPACE_SCRATCH,
182                                                      &worksize));
183  std::cout << "Scratch GPU workspace size (bytes) for MPS computation = " << worksize << std::endl;
184  if(worksize <= scratchSize) {
185    HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
186                      CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
187  }else{
188    std::cout << "ERROR: Insufficient workspace size on Device!\n";
189    std::abort();
190  }
191  std::cout << "Set the workspace buffer for MPS computation\n";

计算 MPS 分解

一旦 MPS 分解过程已配置和准备好,让我们计算最终量子线路状态的 MPS 分解。

195  // Compute the MPS factorization of the quantum circuit state
196  HANDLE_CUTN_ERROR(cutensornetStateCompute(cutnHandle, quantumState, 
197                    workDesc, extentsPtr.data(), /*strides=*/nullptr, d_mpsTensors.data(), 0));
198  std::cout << "Computed the MPS factorization of the quantum circuit state\n";
199  

创建张量网络状态采样器

一旦量子线路状态已经构建并使用 MPS 表示进行分解,让我们为完整的量子比特寄存器(所有量子比特)创建张量网络状态采样器。

202  // Create the quantum circuit sampler
203  cutensornetStateSampler_t sampler;
204  HANDLE_CUTN_ERROR(cutensornetCreateSampler(cutnHandle, quantumState, numQubits, nullptr, &sampler));
205  std::cout << "Created the quantum circuit sampler\n";

配置张量网络状态采样器

可选地,我们可以通过设置张量网络收缩路径查找器要使用的超样本数量来配置张量网络状态采样器。

209  // Configure the quantum circuit sampler
210  const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
211  HANDLE_CUTN_ERROR(cutensornetSamplerConfigure(cutnHandle, sampler,
212                    CUTENSORNET_SAMPLER_OPT_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));

准备张量网络状态采样器

现在让我们准备张量网络状态采样器。

216  // Prepare the quantum circuit sampler
217  HANDLE_CUTN_ERROR(cutensornetSamplerPrepare(cutnHandle, sampler, scratchSize, workDesc, 0x0));
218  std::cout << "Prepared the quantum circuit state sampler\n";

设置工作区

现在我们可以设置所需的工作区缓冲区。

222  // Attach the workspace buffer
223  HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
224                                                      workDesc,
225                                                      CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
226                                                      CUTENSORNET_MEMSPACE_DEVICE,
227                                                      CUTENSORNET_WORKSPACE_SCRATCH,
228                                                      &worksize));
229  std::cout << "Scratch GPU workspace size (bytes) for MPS Sampling = " << worksize << std::endl;
230  assert(worksize > 0);
231  if(worksize <= scratchSize) {
232    HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
233                      CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
234  }else{
235    std::cout << "ERROR: Insufficient workspace size on Device!\n";
236    std::abort();
237  }
238  std::cout << "Set the workspace buffer\n";

执行最终量子线路状态的采样

一旦一切都设置好,我们执行量子线路状态的采样并打印输出样本。

242  // Sample the quantum circuit state
243  std::vector<int64_t> samples(numQubits * numSamples); // samples[SampleId][QubitId] reside in Host memory
244  HANDLE_CUTN_ERROR(cutensornetSamplerSample(cutnHandle, sampler, numSamples, workDesc, samples.data(), 0));
245  std::cout << "Performed quantum circuit state sampling\n";
246  std::cout << "Bit-string samples:\n";
247  for(int64_t i = 0; i < numSamples; ++i) {
248    for(int64_t j = 0; j < numQubits; ++j) std::cout << " " << samples[i * numQubits + j];
249    std::cout << std::endl;
250  }

释放资源

254  // Destroy the workspace descriptor
255  HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
256  std::cout << "Destroyed the workspace descriptor\n";
257
258  // Destroy the quantum circuit sampler
259  HANDLE_CUTN_ERROR(cutensornetDestroySampler(sampler));
260  std::cout << "Destroyed the quantum circuit state sampler\n";
261
262  // Destroy the quantum circuit state
263  HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
264  std::cout << "Destroyed the quantum circuit state\n";
265
266  // Free GPU buffers
267  for (int32_t i = 0; i < numQubits; i++) {
268    HANDLE_CUDA_ERROR(cudaFree(d_mpsTensors[i]));
269  }
270  HANDLE_CUDA_ERROR(cudaFree(d_scratch));
271  for(auto ptr: d_gateCR) HANDLE_CUDA_ERROR(cudaFree(ptr));
272  HANDLE_CUDA_ERROR(cudaFree(d_gateH));
273  std::cout << "Freed memory on GPU\n";
274
275  // Finalize the cuTensorNet library
276  HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
277  std::cout << "Finalized the cuTensorNet library\n";
278
279  return 0;
280}