采样矩阵乘积态(使用矩阵乘积算子的电路)

以下代码示例说明了如何通过使用矩阵乘积算子 (MPO) 的电路来定义张量网络状态,然后计算其矩阵乘积态 (MPS) 分解,最后对 MPS 分解状态进行采样。完整代码可以在 NVIDIA/cuQuantum 存储库 (此处) 中找到。

头文件和错误处理

 7#include <cstdlib>
 8#include <cstdio>
 9#include <cmath>
10#include <cassert>
11#include <complex>
12#include <random>
13#include <functional>
14#include <vector>
15#include <iostream>
16
17#include <cuda_runtime.h>
18#include <cutensornet.h>
19
20#define HANDLE_CUDA_ERROR(x) \
21{ const auto err = x; \
22  if( err != cudaSuccess ) \
23  { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
24};
25
26#define HANDLE_CUTN_ERROR(x) \
27{ const auto err = x; \
28  if( err != CUTENSORNET_STATUS_SUCCESS ) \
29  { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
30};
31
32
33int main()
34{
35  static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
36
37  constexpr std::size_t fp64size = sizeof(double);
38  constexpr cudaDataType_t dataType = CUDA_C_64F;

定义张量网络状态和要生成的所需输出样本数

让我们为具有给定量子比特数的量子电路定义张量网络状态的所有参数,并请求为完整量子比特寄存器生成给定数量的输出样本。我们还配置了此示例中使用的 MPS 和 MPO 张量网络表示的最大键维度。对于 MPO,我们还定义了它作用的站点(量子比特)数量以及 MPO 的层数。

42  // Quantum state configuration
43  const int64_t numSamples = 128; // number of samples to generate
44  const int32_t numQudits = 6;    // number of qudits
45  const int64_t quditDim = 2;     // dimension of all qudits
46  const std::vector<int64_t> quditDims(numQudits, quditDim); // qudit dimensions
47  const int64_t mpsBondDim = 8;   // maximum MPS bond dimension
48  const int64_t mpoBondDim = 2;   // maximum MPO bond dimension
49  const int32_t mpoNumLayers = 5; // number of MPO layers
50  constexpr int32_t mpoNumSites = 4;  // number of MPO sites
51  std::cout << "Quantum circuit: " << numQudits << " qudits; " << numSamples << " samples\n";
52
53  /* Action of five alternating four-site MPO gates (operators)
54     on the 6-quqit quantum register (illustration):
55
56  Q----X---------X---------X----
57       |         |         |
58  Q----X---------X---------X----
59       |         |         |
60  Q----X----X----X----X----X----
61       |    |    |    |    |
62  Q----X----X----X----X----X----
63            |         |
64  Q---------X---------X---------
65            |         |
66  Q---------X---------X---------
67
68    |layer|
69  */
70  static_assert(mpoNumSites > 1, "Number of MPO sites must be larger than one!");
71
72  // Random number generator
73  std::default_random_engine generator;
74  std::uniform_real_distribution<double> distribution(-1.0, 1.0);
75  auto rnd = std::bind(distribution, generator);

初始化 cuTensorNet 库句柄

79  // Initialize the cuTensorNet library
80  HANDLE_CUDA_ERROR(cudaSetDevice(0));
81  cutensornetHandle_t cutnHandle;
82  HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
83  std::cout << "Initialized cuTensorNet library on GPU 0\n";

定义和分配 MPO 张量

在这里,我们定义了 MPO 张量的形状,用主机上的随机数据填充它们,最后将它们传输到设备内存中。

 87  /* MPO tensor mode numeration (open boundary condition):
 88
 89     2            3                   2
 90     |            |                   |
 91     X--1------0--X--2---- ... ----0--X
 92     |            |                   |
 93     0            1                   1
 94
 95  */
 96  // Define shapes of the MPO tensors
 97  using GateData = std::vector<std::complex<double>>;
 98  using ModeExtents = std::vector<int64_t>;
 99  std::vector<GateData> mpoTensors(mpoNumSites);        // MPO tensors
100  std::vector<ModeExtents> mpoModeExtents(mpoNumSites); // mode extents for MPO tensors
101  int64_t upperBondDim = 1;
102  for(int tensId = 0; tensId < (mpoNumSites / 2); ++tensId) {
103    const auto leftBondDim = std::min(mpoBondDim, upperBondDim);
104    const auto rightBondDim = std::min(mpoBondDim, leftBondDim * (quditDim * quditDim));
105    if(tensId == 0) {
106      mpoModeExtents[tensId] = {quditDim, rightBondDim, quditDim};
107    }else{
108      mpoModeExtents[tensId] = {leftBondDim, quditDim, rightBondDim, quditDim};
109    }
110    upperBondDim = rightBondDim;
111  }
112  auto centralBondDim = upperBondDim;
113  if(mpoNumSites % 2 != 0) {
114    const int tensId = mpoNumSites / 2;
115    mpoModeExtents[tensId] = {centralBondDim, quditDim, centralBondDim, quditDim};
116  }
117  upperBondDim = 1;
118  for(int tensId = (mpoNumSites-1); tensId >= (mpoNumSites / 2) + (mpoNumSites % 2); --tensId) {
119    const auto rightBondDim = std::min(mpoBondDim, upperBondDim);
120    const auto leftBondDim = std::min(mpoBondDim, rightBondDim * (quditDim * quditDim));
121    if(tensId == (mpoNumSites-1)) {
122      mpoModeExtents[tensId] = {leftBondDim, quditDim, quditDim};
123    }else{
124      mpoModeExtents[tensId] = {leftBondDim, quditDim, rightBondDim, quditDim};
125    }
126    upperBondDim = leftBondDim;
127  }
128  // Fill in the MPO tensors with random data on Host
129  std::vector<const int64_t*> mpoModeExtentsPtr(mpoNumSites, nullptr);
130  for(int tensId = 0; tensId < mpoNumSites; ++tensId) {
131    mpoModeExtentsPtr[tensId] = mpoModeExtents[tensId].data();
132    const auto tensRank = mpoModeExtents[tensId].size();
133    int64_t tensVol = 1;
134    for(int i = 0; i < tensRank; ++i) {
135      tensVol *= mpoModeExtents[tensId][i];
136    }
137    mpoTensors[tensId].resize(tensVol);
138    for(int64_t i = 0; i < tensVol; ++i) {
139      mpoTensors[tensId][i] = std::complex<double>(rnd(), rnd());
140    }
141  }
142  // Allocate and copy MPO tensors into Device memory
143  std::vector<void*> d_mpoTensors(mpoNumSites, nullptr);
144  for(int tensId = 0; tensId < mpoNumSites; ++tensId) {
145    const auto tensRank = mpoModeExtents[tensId].size();
146    int64_t tensVol = 1;
147    for(int i = 0; i < tensRank; ++i) {
148      tensVol *= mpoModeExtents[tensId][i];
149    }
150    HANDLE_CUDA_ERROR(cudaMalloc(&(d_mpoTensors[tensId]), 
151                                 std::size_t(tensVol) * (2 * fp64size)));
152    HANDLE_CUDA_ERROR(cudaMemcpy(d_mpoTensors[tensId], mpoTensors[tensId].data(),
153                                 std::size_t(tensVol) * (2 * fp64size), cudaMemcpyHostToDevice));
154  }
155  std::cout << "Allocated and defined MPO tensors in GPU memory\n";

定义和分配 MPS 张量

在这里,我们定义了 MPS 张量的形状,并为其存储分配了设备内存。

159  // Define the MPS representation and allocate memory buffers for the MPS tensors
160  std::vector<ModeExtents> mpsModeExtents(numQudits);
161  std::vector<int64_t*> mpsModeExtentsPtr(numQudits, nullptr);
162  std::vector<void*> d_mpsTensors(numQudits, nullptr);
163  upperBondDim = 1;
164  for (int tensId = 0; tensId < (numQudits / 2); ++tensId) {
165    const auto leftBondDim = std::min(mpsBondDim, upperBondDim);
166    const auto rightBondDim = std::min(mpsBondDim, leftBondDim * quditDim);
167    if (tensId == 0) { // left boundary MPS tensor  
168      mpsModeExtents[tensId] = {quditDim, rightBondDim};
169      HANDLE_CUDA_ERROR(cudaMalloc(&(d_mpsTensors[tensId]),
170                                   std::size_t(quditDim * rightBondDim) * (2 * fp64size)));
171    } else { // middle MPS tensors
172      mpsModeExtents[tensId] = {leftBondDim, quditDim, rightBondDim};
173      HANDLE_CUDA_ERROR(cudaMalloc(&(d_mpsTensors[tensId]),
174                                   std::size_t(leftBondDim * quditDim * rightBondDim) * (2 * fp64size)));
175    }
176    upperBondDim = rightBondDim;
177    mpsModeExtentsPtr[tensId] = mpsModeExtents[tensId].data();
178  }
179  centralBondDim = upperBondDim;
180  if(numQudits % 2 != 0) {
181    const int tensId = numQudits / 2;
182    mpsModeExtents[tensId] = {centralBondDim, quditDim, centralBondDim};
183    mpsModeExtentsPtr[tensId] = mpsModeExtents[tensId].data();
184  }
185  upperBondDim = 1;
186  for (int tensId = (numQudits-1); tensId >= (numQudits / 2) + (numQudits % 2); --tensId) {
187    const auto rightBondDim = std::min(mpsBondDim, upperBondDim);
188    const auto leftBondDim = std::min(mpsBondDim, rightBondDim * quditDim);
189    if (tensId == (numQudits-1)) { // right boundary MPS tensor  
190      mpsModeExtents[tensId] = {leftBondDim, quditDim};
191      HANDLE_CUDA_ERROR(cudaMalloc(&(d_mpsTensors[tensId]),
192                                   std::size_t(leftBondDim * quditDim) * (2 * fp64size)));
193    } else { // middle MPS tensors
194      mpsModeExtents[tensId] = {leftBondDim, quditDim, rightBondDim};
195      HANDLE_CUDA_ERROR(cudaMalloc(&(d_mpsTensors[tensId]),
196                                   std::size_t(leftBondDim * quditDim * rightBondDim) * (2 * fp64size)));
197    }
198    upperBondDim = leftBondDim;
199    mpsModeExtentsPtr[tensId] = mpsModeExtents[tensId].data();
200  }
201  std::cout << "Allocated MPS tensors in GPU memory\n";

在 GPU 上分配暂存缓冲区

205  // Query free memory on Device and allocate a scratch buffer
206  std::size_t freeSize {0}, totalSize {0};
207  HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
208  const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
209  void *d_scratch {nullptr};
210  HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
211  std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU: "
212            << "[" << d_scratch << ":" << (void*)(((char*)(d_scratch))  + scratchSize) << ")\n";

创建纯张量网络状态

现在,让我们为具有给定量子比特数(真空态)的量子电路创建一个纯张量网络状态。

216  // Create the initial quantum state
217  cutensornetState_t quantumState;
218  HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQudits, quditDims.data(),
219                    dataType, &quantumState));
220  std::cout << "Created the initial (vacuum) quantum state\n";

构建必要的 MPO 张量网络算符

让我们构建两个作用于量子比特不同子集的 MPO 张量网络算符。

224  // Construct the MPO tensor network operators
225  int64_t componentId;
226  cutensornetNetworkOperator_t tnOperator1;
227  HANDLE_CUTN_ERROR(cutensornetCreateNetworkOperator(cutnHandle, numQudits, quditDims.data(),
228                    dataType, &tnOperator1));
229  HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendMPO(cutnHandle, tnOperator1, make_cuDoubleComplex(1.0, 0.0),
230                    mpoNumSites, std::vector<int32_t>({0,1,2,3}).data(),
231                    mpoModeExtentsPtr.data(), /*strides=*/nullptr,
232                    std::vector<const void*>(d_mpoTensors.cbegin(), d_mpoTensors.cend()).data(),
233                    CUTENSORNET_BOUNDARY_CONDITION_OPEN, &componentId));
234  cutensornetNetworkOperator_t tnOperator2;
235  HANDLE_CUTN_ERROR(cutensornetCreateNetworkOperator(cutnHandle, numQudits, quditDims.data(),
236                    dataType, &tnOperator2));
237  HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendMPO(cutnHandle, tnOperator2, make_cuDoubleComplex(1.0, 0.0),
238                    mpoNumSites, std::vector<int32_t>({2,3,4,5}).data(),
239                    mpoModeExtentsPtr.data(), /*strides=*/nullptr,
240                    std::vector<const void*>(d_mpoTensors.cbegin(), d_mpoTensors.cend()).data(),
241                    CUTENSORNET_BOUNDARY_CONDITION_OPEN, &componentId));
242  std::cout << "Constructed two MPO tensor network operators\n";

将 MPO 张量网络算符应用于量子电路状态

让我们通过交替应用 MPO 张量网络算符来构建目标量子电路。

246  // Apply the MPO tensor network operators to the quantum state
247  for(int layer = 0; layer < mpoNumLayers; ++layer) {
248    int64_t operatorId;
249    if(layer % 2 == 0) {
250      HANDLE_CUTN_ERROR(cutensornetStateApplyNetworkOperator(cutnHandle, quantumState, tnOperator1,
251                        1, 0, 0, &operatorId));
252    }else{
253      HANDLE_CUTN_ERROR(cutensornetStateApplyNetworkOperator(cutnHandle, quantumState, tnOperator2,
254        1, 0, 0, &operatorId));
255    }
256  }
257  std::cout << "Applied " << mpoNumLayers << " MPO gates to the quantum state\n";

请求最终量子电路状态的 MPS 分解

在这里,我们表达了使用 MPS 分解来分解量子电路最终状态的意图。提供的 MPS 张量的形状(模式范围)表示重整化期间的最大尺寸限制。最终 MPS 张量的计算模式范围小于或等于这些限制。注意:此处尚未进行任何计算。

261  // Specify the target MPS representation (use default strides)
262  HANDLE_CUTN_ERROR(cutensornetStateFinalizeMPS(cutnHandle, quantumState, 
263                    CUTENSORNET_BOUNDARY_CONDITION_OPEN, mpsModeExtentsPtr.data(), /*strides=*/nullptr ));
264  std::cout << "Finalized the form of the desired MPS representation\n";

配置 MPS 分解过程

在表达了我们执行最终量子电路状态的 MPS 分解的意图后,我们还可以通过设置不同的选项(例如 SVD 算法)来配置 MPS 分解 + 过程。

268  // Set up the SVD method for bonds truncation (optional)
269  cutensornetTensorSVDAlgo_t algo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ; 
270  HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, quantumState, 
271                    CUTENSORNET_STATE_MPS_SVD_CONFIG_ALGO, &algo, sizeof(algo)));
272  cutensornetStateMPOApplication_t mpsApplication = CUTENSORNET_STATE_MPO_APPLICATION_EXACT; 
273  // Use exact MPS-MPO application as extent of 8 can faithfully represent a 6 qubit state (optional)
274  HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, quantumState, 
275                    CUTENSORNET_STATE_CONFIG_MPS_MPO_APPLICATION, &mpsApplication, sizeof(mpsApplication)));
276  std::cout << "Configured the MPS computation\n";

准备 MPS 分解的计算

让我们创建一个工作空间描述符,并准备最终量子电路状态的 MPS 分解的计算。

280  // Prepare the MPS computation and attach a workspace buffer
281  cutensornetWorkspaceDescriptor_t workDesc;
282  HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
283  std::cout << "Created the workspace descriptor\n";
284  HANDLE_CUTN_ERROR(cutensornetStatePrepare(cutnHandle, quantumState, scratchSize, workDesc, 0x0));
285  int64_t worksize {0};
286  HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
287                                                      workDesc,
288                                                      CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
289                                                      CUTENSORNET_MEMSPACE_DEVICE,
290                                                      CUTENSORNET_WORKSPACE_SCRATCH,
291                                                      &worksize));
292  std::cout << "Scratch GPU workspace size (bytes) for the MPS computation = " << worksize << std::endl;
293  if(worksize <= scratchSize) {
294    HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
295                      CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
296  }else{
297    std::cout << "ERROR: Insufficient workspace size on Device!\n";
298    std::abort();
299  }
300  std::cout << "Set the workspace buffer and prepared the MPS computation\n";

计算 MPS 分解

一旦 MPS 分解过程配置和准备就绪,让我们计算最终量子电路状态的 MPS 分解。

304  // Compute the MPS factorization of the quantum circuit state
305  HANDLE_CUTN_ERROR(cutensornetStateCompute(cutnHandle, quantumState, 
306                    workDesc, mpsModeExtentsPtr.data(), /*strides=*/nullptr, d_mpsTensors.data(), 0));
307  std::cout << "Computed the MPS factorization of the quantum circuit state\n";
308  

创建张量网络状态采样器

一旦量子电路状态构建完成并使用 MPS 表示进行分解,让我们为所有量子比特创建张量网络状态采样器。

311  // Create the quantum circuit sampler
312  cutensornetStateSampler_t sampler;
313  HANDLE_CUTN_ERROR(cutensornetCreateSampler(cutnHandle, quantumState, numQudits, nullptr, &sampler));
314  std::cout << "Created the quantum circuit sampler\n";

配置张量网络状态采样器

可选地,我们可以通过设置张量网络收缩路径查找器要使用的超样本数量来配置张量网络状态采样器。

318  // Configure the quantum circuit sampler
319  const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
320  HANDLE_CUTN_ERROR(cutensornetSamplerConfigure(cutnHandle, sampler,
321                    CUTENSORNET_SAMPLER_OPT_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));

准备张量网络状态采样器

现在让我们准备张量网络状态采样器。

325  // Prepare the quantum circuit sampler
326  HANDLE_CUTN_ERROR(cutensornetSamplerPrepare(cutnHandle, sampler, scratchSize, workDesc, 0x0));
327  std::cout << "Prepared the quantum circuit state sampler\n";

设置工作空间

现在我们可以设置所需的工作空间缓冲区。

331  // Attach the workspace buffer
332  HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
333                                                      workDesc,
334                                                      CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
335                                                      CUTENSORNET_MEMSPACE_DEVICE,
336                                                      CUTENSORNET_WORKSPACE_SCRATCH,
337                                                      &worksize));
338  std::cout << "Scratch GPU workspace size (bytes) for MPS Sampling = " << worksize << std::endl;
339  assert(worksize > 0);
340  if(worksize <= scratchSize) {
341    HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
342                      CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
343  }else{
344    std::cout << "ERROR: Insufficient workspace size on Device!\n";
345    std::abort();
346  }
347  std::cout << "Set the workspace buffer\n";

执行最终量子电路状态的采样

一旦一切都设置好,我们执行最终 MPS 分解量子电路状态的采样,并打印输出样本。

351  // Sample the quantum circuit state
352  std::vector<int64_t> samples(numQudits * numSamples); // samples[SampleId][QuditId] reside in Host memory
353  HANDLE_CUTN_ERROR(cutensornetSamplerSample(cutnHandle, sampler, numSamples, workDesc, samples.data(), 0));
354  std::cout << "Performed quantum circuit state sampling\n";
355  std::cout << "Generated samples:\n";
356  for(int64_t i = 0; i < numSamples; ++i) {
357    for(int64_t j = 0; j < numQudits; ++j) std::cout << " " << samples[i * numQudits + j];
358    std::cout << std::endl;
359  }

释放资源

363  // Destroy the quantum circuit sampler
364  HANDLE_CUTN_ERROR(cutensornetDestroySampler(sampler));
365  std::cout << "Destroyed the quantum circuit state sampler\n";
366
367  // Destroy the workspace descriptor
368  HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
369  std::cout << "Destroyed the workspace descriptor\n";
370
371  // Destroy the MPO tensor network operators
372  HANDLE_CUTN_ERROR(cutensornetDestroyNetworkOperator(tnOperator2));
373  HANDLE_CUTN_ERROR(cutensornetDestroyNetworkOperator(tnOperator1));
374  std::cout << "Destroyed the MPO tensor network operators\n";
375
376  // Destroy the quantum circuit state
377  HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
378  std::cout << "Destroyed the quantum circuit state\n";
379
380  // Free GPU buffers
381  HANDLE_CUDA_ERROR(cudaFree(d_scratch));
382  for (int32_t i = 0; i < numQudits; ++i) {
383    HANDLE_CUDA_ERROR(cudaFree(d_mpsTensors[i]));
384  }
385  for (int32_t i = 0; i < mpoNumSites; ++i) {
386    HANDLE_CUDA_ERROR(cudaFree(d_mpoTensors[i]));
387  }
388  std::cout << "Freed memory on GPU\n";
389  
390  // Finalize the cuTensorNet library
391  HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
392  std::cout << "Finalized the cuTensorNet library\n";
393
394  return 0;
395}