概述

此应用程序跟踪输入视频中的特征点，在每帧上绘制这些特征点，并将它们保存到磁盘。您可以定义将用于处理的后端。

注意: 输出将为灰度，因为该算法目前不支持彩色输入。

说明

命令行参数为

<后端> <输入视频> <金字塔层级> <输出帧>

其中

后端：cpu、cuda 或 pva 之一；它定义了将执行处理的后端。
输入视频：输入视频文件名，它接受 OpenCV 的 cv::VideoCapture 接受的所有视频类型。
金字塔层级：指定算法中使用的金字塔层数。
输出帧：将用于输出帧的文件名。例如：output.png 将生成帧 output_0000.png、output_0001.png、output_0002.png，依此类推。

这是一个示例

C++
./vpi_sample_12_optflow_lk cuda ../assets/dashcam.mp4 5 frame.png
Python
python3 main.py cuda ../assets/dashcam.mp4 5 frame.png

这是使用 CUDA 后端和一个提供的示例视频，金字塔层级为 5。

结果

帧 0009

源代码

为了方便起见，这里提供了也安装在示例目录中的代码。

语言 C++ Python

 import sys
 import vpi
 import numpy as np
 from os import path
 from argparse import ArgumentParser
 from contextlib import contextmanager
 import cv2
 
 
 # --------------------------------------
 # Some definitions and utility functions
 
 # Maximum number of keypoints that will be tracked
 MAX_KEYPOINTS = 100
 
 def update_mask(mask, trackColors, prevFeatures, curFeatures, status = None)
  '''Draw keypoint path from previous frame to current one'''
 
  numTrackedKeypoints = 0
 
  def none_context(a=None): return contextmanager(lambda: (x for x in [a]))()
 
  with curFeatures.rlock_cpu(), \
  (status.rlock_cpu() if status else none_context()), \
  (prevFeatures.rlock_cpu() if prevFeatures else none_context())
 
  for i in range(curFeatures.size)
  # keypoint is being tracked?
  if not status or status.cpu()[i] == 0
  color = tuple(trackColors[i,0].tolist())
 
  # OpenCV 4.5+ wants integers in the tuple arguments below
  cf = tuple(np.round(curFeatures.cpu()[i]).astype(int))
 
  # draw the tracks
  if prevFeatures
  pf = tuple(np.round(prevFeatures.cpu()[i]).astype(int))
  cv2.line(mask, pf, cf, color, 2)
 
  cv2.circle(mask, cf, 5, color, -1)
 
  numTrackedKeypoints += 1
 
  return numTrackedKeypoints
 
 def save_file_to_disk(frame, mask, baseFileName, frameCounter)
  '''Apply mask on frame and save it to disk'''
 
  frame = frame.convert(vpi.Format.BGR8, backend=vpi.Backend.CUDA)
  with frame.rlock_cpu() as frameData
  frame = cv2.add(frameData, mask)
 
  name, ext = path.splitext(baseFileName)
  fname = "{}_{:04d}{}".format(name, frameCounter, ext)
 
  cv2.imwrite(fname, frame, [cv2.IMWRITE_JPEG_QUALITY, 70])
 
 # ----------------------------
 # Parse command line arguments
 
 parser = ArgumentParser()
 parser.add_argument('backend', choices=['cpu', 'cuda', 'pva'],
  help='Backend to be used for processing')
 
 parser.add_argument('input',
  help='Input video to be processed')
 
 parser.add_argument('pyramid_levels', type=int,
  help='Number of levels in the pyramid used with the algorithm')
 
 parser.add_argument('output',
  help='Output file name')
 
 args = parser.parse_args();
 
 if args.backend == 'cuda'
  backend = vpi.Backend.CUDA
 elif args.backend == 'pva'
  backend = vpi.Backend.PVA
 else
  assert args.backend == 'cpu'
  backend = vpi.Backend.CPU
 
 # adjust output file name to take into account backend used and python version
 name, ext = path.splitext(args.output)
 args.output = "{}_python{}_{}{}".format(name, sys.version_info[0], args.backend, ext)
 
 # ----------------
 # Open input video
 
 inVideo = cv2.VideoCapture(args.input)
 
 # Read first input frame
 ok, cvFrame = inVideo.read()
 if not ok
  exit('Cannot read first input frame')
 
 # ---------------------------
 # Perform some pre-processing
 
 # Retrieve features to be tracked from first frame using
 # Harris Corners Detector
 with vpi.Backend.CPU
  frame = vpi.asimage(cvFrame, vpi.Format.BGR8).convert(vpi.Format.U8)
  curFeatures, scores = frame.harriscorners(strength=0.1, sensitivity=0.01)
 
 # Limit the number of features we'll track and calculate their colors on the
 # output image
 with curFeatures.lock_cpu() as featData, scores.rlock_cpu() as scoresData
  # Sort features in descending scores order and keep the first MAX_KEYPOINTS
  ind = np.argsort(scoresData, kind='mergesort')[::-1]
  featData[:] = np.take(featData, ind, axis=0)
  curFeatures.size = min(curFeatures.size, MAX_KEYPOINTS)
 
  # Keypoints' have different hues, calculated from their position in the first frame
  trackColors = np.array([[(int(p[0]) ^ int(p[1])) % 180,255,255] for p in featData], np.uint8).reshape(-1,1,3)
  # Convert colors from HSV to RGB
  trackColors = cv2.cvtColor(trackColors, cv2.COLOR_HSV2BGR).astype(int)
 
 with backend
  optflow = vpi.OpticalFlowPyrLK(frame, curFeatures, args.pyramid_levels)
 
 # Counter for the frames
 idFrame = 0
 
 # Create mask with features' tracks over time
 mask = np.zeros((frame.height, frame.width, 3), np.uint8)
 numTrackedKeypoints = update_mask(mask, trackColors, None, curFeatures)
 
 while True
  # Apply mask to frame and save it to disk
  save_file_to_disk(frame, mask, args.output, idFrame)
 
  print("Frame id={}: {} points tracked.".format(idFrame, numTrackedKeypoints))
 
  prevFeatures = curFeatures
 
  # Read one input frame
  ret, cvFrame = inVideo.read()
  if not ret
  print("Video ended.")
  break
  idFrame += 1
 
  # Convert frame to grayscale
  with vpi.Backend.CUDA
  frame = vpi.asimage(cvFrame, vpi.Format.BGR8).convert(vpi.Format.U8);
 
  # Calculate where keypoints are in current frame
  curFeatures, status = optflow(frame)
 
  # Update the mask with the current keypoints' position
  numTrackedKeypoints = update_mask(mask, trackColors, prevFeatures, curFeatures, status)
 
  # No more keypoints to track?
  if numTrackedKeypoints == 0
  print("No keypoints to track.")
  break # nothing else to do

 #include <opencv2/core/version.hpp>
 #if CV_MAJOR_VERSION >= 3
 # include <opencv2/imgcodecs.hpp>
 # include <opencv2/videoio.hpp>
 #else
 # include <opencv2/highgui/highgui.hpp>
 #endif
 
 #include <opencv2/imgproc/imgproc.hpp>
 #include <vpi/OpenCVInterop.hpp>
 
 #include <vpi/Array.h>
 #include <vpi/Image.h>
 #include <vpi/Pyramid.h>
 #include <vpi/Status.h>
 #include <vpi/Stream.h>
 #include <vpi/algo/ConvertImageFormat.h>
 #include <vpi/algo/GaussianPyramid.h>
 #include <vpi/algo/HarrisCorners.h>
 #include <vpi/algo/OpticalFlowPyrLK.h>
 
 #include <algorithm>
 #include <cstring> // for memset
 #include <fstream>
 #include <iostream>
 #include <map>
 #include <numeric>
 #include <sstream>
 #include <vector>
 
 // Max number of corners detected by harris corner algo
 constexpr int MAX_HARRIS_CORNERS = 8192;
 
 // Max number of keypoints to be tracked
 constexpr int MAX_KEYPOINTS = 100;
 
 #define CHECK_STATUS(STMT) \
  do \
  { \
  VPIStatus status__ = (STMT); \
  if (status__ != VPI_SUCCESS) \
  { \
  char buffer[VPI_MAX_STATUS_MESSAGE_LENGTH]; \
  vpiGetLastStatusMessage(buffer, sizeof(buffer)); \
  std::ostringstream ss; \
  ss << vpiStatusGetName(status__) << ": " << buffer; \
  throw std::runtime_error(ss.str()); \
  } \
  } while (0);
 
 static void SaveFileToDisk(VPIImage img, cv::Mat cvMask, std::string baseFileName, int32_t frameCounter)
 {
  VPIImageData imgData;
  CHECK_STATUS(vpiImageLockData(img, VPI_LOCK_READ, VPI_IMAGE_BUFFER_HOST_PITCH_LINEAR, &imgData));
 
  cv::Mat cvImage;
  try
  {
  cv::Mat tmp;
  CHECK_STATUS(vpiImageDataExportOpenCVMat(imgData, &tmp));
  cvtColor(tmp, cvImage, cv::COLOR_GRAY2BGR);
 
  CHECK_STATUS(vpiImageUnlock(img));
  }
  catch (...)
  {
  CHECK_STATUS(vpiImageUnlock(img));
  throw;
  }
 
  add(cvImage, cvMask, cvImage);
 
  // Create the output file name
  std::string fname = baseFileName;
  int ext = fname.rfind('.');
 
  char buffer[512] = {};
  snprintf(buffer, sizeof(buffer) - 1, "%s_%04d%s", fname.substr(0, ext).c_str(), frameCounter,
  fname.substr(ext).c_str());
 
  // Finally, write frame to disk
  if (!imwrite(buffer, cvImage, {cv::IMWRITE_JPEG_QUALITY, 70}))
  {
  throw std::runtime_error("Can't write to " + std::string(buffer));
  }
 }
 
 // Sort keypoints by decreasing score, and retain only the first 'max'
 static void SortKeypoints(VPIArray keypoints, VPIArray scores, int max)
 {
  VPIArrayData ptsData, scoresData;
  CHECK_STATUS(vpiArrayLockData(keypoints, VPI_LOCK_READ_WRITE, VPI_ARRAY_BUFFER_HOST_AOS, &ptsData));
  CHECK_STATUS(vpiArrayLockData(scores, VPI_LOCK_READ_WRITE, VPI_ARRAY_BUFFER_HOST_AOS, &scoresData));
 
  VPIArrayBufferAOS &aosKeypoints = ptsData.buffer.aos;
  VPIArrayBufferAOS &aosScores = scoresData.buffer.aos;
 
  std::vector<int> indices(*aosKeypoints.sizePointer);
  std::iota(indices.begin(), indices.end(), 0);
 
  stable_sort(indices.begin(), indices.end(), [&aosScores](int a, int b) {
  uint32_t *score = reinterpret_cast<uint32_t *>(aosScores.data);
  return score[a] >= score[b]; // decreasing score order
  });
 
  // keep the only 'max' indexes.
  indices.resize(std::min<size_t>(indices.size(), max));
 
  VPIKeypointF32 *kptData = reinterpret_cast<VPIKeypointF32 *>(aosKeypoints.data);
 
  // reorder the keypoints to keep the first 'max' with highest scores.
  std::vector<VPIKeypointF32> kpt;
  std::transform(indices.begin(), indices.end(), std::back_inserter(kpt),
  [kptData](int idx) { return kptData[idx]; });
  std::copy(kpt.begin(), kpt.end(), kptData);
 
  // update keypoint array size.
  *aosKeypoints.sizePointer = kpt.size();
 
  vpiArrayUnlock(scores);
  vpiArrayUnlock(keypoints);
 }
 
 static int UpdateMask(cv::Mat &cvMask, const std::vector<cv::Scalar> &trackColors, VPIArray prevFeatures,
  VPIArray curFeatures, VPIArray status)
 {
  // Now that optical flow is completed, there are usually two approaches to take:
  // 1. Add new feature points from current frame using a feature detector such as
  // \ref algo_harris_corners "Harris Corner Detector"
  // 2. Keep using the points that are being tracked.
  //
  // The sample app uses the valid feature point and continue to do the tracking.
 
  // Lock the input and output arrays to draw the tracks to the output mask.
  VPIArrayData curFeaturesData, statusData;
  CHECK_STATUS(vpiArrayLockData(curFeatures, VPI_LOCK_READ, VPI_ARRAY_BUFFER_HOST_AOS, &curFeaturesData));
  CHECK_STATUS(vpiArrayLockData(status, VPI_LOCK_READ, VPI_ARRAY_BUFFER_HOST_AOS, &statusData));
 
  const VPIArrayBufferAOS &aosCurFeatures = curFeaturesData.buffer.aos;
  const VPIArrayBufferAOS &aosStatus = statusData.buffer.aos;
 
  const VPIKeypointF32 *pCurFeatures = (VPIKeypointF32 *)aosCurFeatures.data;
  const uint8_t *pStatus = (uint8_t *)aosStatus.data;
 
  const VPIKeypointF32 *pPrevFeatures;
  if (prevFeatures)
  {
  VPIArrayData prevFeaturesData;
  CHECK_STATUS(vpiArrayLockData(prevFeatures, VPI_LOCK_READ, VPI_ARRAY_BUFFER_HOST_AOS, &prevFeaturesData));
  pPrevFeatures = (VPIKeypointF32 *)prevFeaturesData.buffer.aos.data;
  }
  else
  {
  pPrevFeatures = NULL;
  }
 
  int numTrackedKeypoints = 0;
  int totKeypoints = *curFeaturesData.buffer.aos.sizePointer;
 
  for (int i = 0; i < totKeypoints; i++)
  {
  // keypoint is being tracked?
  if (pStatus[i] == 0)
  {
  // draw the tracks
  cv::Point curPoint{(int)round(pCurFeatures[i].x), (int)round(pCurFeatures[i].y)};
  if (pPrevFeatures != NULL)
  {
  cv::Point2f prevPoint{pPrevFeatures[i].x, pPrevFeatures[i].y};
  line(cvMask, prevPoint, curPoint, trackColors[i], 2);
  }
 
  circle(cvMask, curPoint, 5, trackColors[i], -1);
 
  numTrackedKeypoints++;
  }
  }
 
  // 我们完成了对数组的操作。
  if (prevFeatures)
  {
  CHECK_STATUS(vpiArrayUnlock(prevFeatures));
  }
  CHECK_STATUS(vpiArrayUnlock(curFeatures));
  CHECK_STATUS(vpiArrayUnlock(status));
 
  return numTrackedKeypoints;
 }
 
 int main(int argc, char *argv[])
 {
  // 将被 VPIImage 封装的 OpenCV 图像。
  // 在此处定义，以便在 wrapper 销毁*之后*销毁它
  cv::Mat cvFrame;
 
  // 将要使用的 VPI 对象
  VPIStream stream = NULL;
  VPIImage imgTempFrame = NULL;
  VPIImage imgFrame = NULL;
  VPIPyramid pyrPrevFrame = NULL, pyrCurFrame = NULL;
  VPIArray prevFeatures = NULL, curFeatures = NULL, status = NULL;
  VPIPayload optflow = NULL;
  VPIArray scores = NULL;
  VPIPayload harris = NULL;
 
  int retval = 0;
 
  try
  {
  // ============================
  // 解析命令行参数
 
  if (argc != 5)
  {
  throw std::runtime_error(std::string("Usage: ") + argv[0] +
  " 用法： ");
  }
 
  std::string strBackend = argv[1];
  std::string strInputVideo = argv[2];
  int32_t pyrLevel = std::stoi(argv[3]);
  std::string strOutputFiles = argv[4];
 
  // 现在解析后端
  VPIBackend backend;
 
  if (strBackend == "cpu")
  {
  backend = VPI_BACKEND_CPU;
  }
  else if (strBackend == "cuda")
  {
  backend = VPI_BACKEND_CUDA;
  }
  else if (strBackend == "pva")
  {
  backend = VPI_BACKEND_PVA;
  }
  else
  {
  "后端 '" + strBackend +
  "' 无法识别，它必须是 cpu、cuda 或 pva 之一。");
  }
 
  {
  int ext = strOutputFiles.rfind('.');
  strOutputFiles = strOutputFiles.substr(0, ext) + "_" + strBackend + strOutputFiles.substr(ext);
  }
 
  // ====================
  // 加载输入视频
  cv::VideoCapture invid;
  if (!invid.open(strInputVideo))
  {
  "无法打开 '" + strInputVideo + "'");
  }
 
  // 获取第一帧并将其封装到 VPIImage 中。
  // 稍后将从此帧收集要跟踪的点。
  if (!invid.read(cvFrame))
  {
  "无法从 '" + strInputVideo + "' 中检索第一帧");
  }
 
  // =================================================
  // 分配 VPI 资源并进行一些预处理
 
  // 创建将要进行处理的流。
  CHECK_STATUS(vpiStreamCreate(0, &stream));
 
  CHECK_STATUS(vpiImageCreateWrapperOpenCVMat(cvFrame, 0, &imgTempFrame));
 
  // 创建输入的灰度图像表示。
  CHECK_STATUS(vpiImageCreate(cvFrame.cols, cvFrame.rows, VPI_IMAGE_FORMAT_U8, 0, &imgFrame));
 
  // 创建算法使用的图像金字塔
  CHECK_STATUS(
  vpiPyramidCreate(cvFrame.cols, cvFrame.rows, VPI_IMAGE_FORMAT_U8, pyrLevel, 0.5, 0, &pyrPrevFrame));
  CHECK_STATUS(vpiPyramidCreate(cvFrame.cols, cvFrame.rows, VPI_IMAGE_FORMAT_U8, pyrLevel, 0.5, 0, &pyrCurFrame));
 
  // 创建输入和输出数组
  CHECK_STATUS(vpiArrayCreate(MAX_HARRIS_CORNERS, VPI_ARRAY_TYPE_KEYPOINT_F32, 0, &prevFeatures));
  CHECK_STATUS(vpiArrayCreate(MAX_HARRIS_CORNERS, VPI_ARRAY_TYPE_KEYPOINT_F32, 0, &curFeatures));
  CHECK_STATUS(vpiArrayCreate(MAX_HARRIS_CORNERS, VPI_ARRAY_TYPE_U8, 0, &status));
 
  // 创建光流负载
  CHECK_STATUS(vpiCreateOpticalFlowPyrLK(backend, cvFrame.cols, cvFrame.rows, VPI_IMAGE_FORMAT_U8, pyrLevel, 0.5,
  &optflow));
 
  // 我们将要使用的参数。无需动态更改它们，因此只需在此处定义它们即可。
  // 我们正在使用默认参数。
  VPIOpticalFlowPyrLKParams lkParams;
  CHECK_STATUS(vpiInitOpticalFlowPyrLKParams(backend, &lkParams));
 
  // 创建用于绘制目的的掩码图像
  cv::Mat cvMask = cv::Mat::zeros(cvFrame.size(), CV_8UC3);
 
  // 使用 CPU 上的 Harris 角点检测器从第一帧收集特征点。
  {
  CHECK_STATUS(vpiArrayCreate(MAX_HARRIS_CORNERS, VPI_ARRAY_TYPE_U32, 0, &scores));
 
  VPIHarrisCornerDetectorParams harrisParams;
  CHECK_STATUS(vpiInitHarrisCornerDetectorParams(&harrisParams));
  harrisParams.strengthThresh = 0;
  harrisParams.sensitivity = 0.01;
 
  CHECK_STATUS(vpiCreateHarrisCornerDetector(VPI_BACKEND_CPU, cvFrame.cols, cvFrame.rows, &harris));
 
  // 将输入转换为灰度以符合 Harris 角点检测器的限制
  CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_CUDA, imgTempFrame, imgFrame, NULL));
 
  CHECK_STATUS(vpiSubmitHarrisCornerDetector(stream, VPI_BACKEND_CPU, harris, imgFrame, curFeatures, scores,
  &harrisParams));
 
  CHECK_STATUS(vpiStreamSync(stream));
 
  SortKeypoints(curFeatures, scores, MAX_KEYPOINTS);
  }
 
  // 创建一些随机颜色
  std::vector<cv::Scalar> trackColors;
  {
  std::vector<cv::Vec3b> tmpTrackColors;
 
  VPIArrayData ptsData;
  CHECK_STATUS(vpiArrayLockData(curFeatures, VPI_LOCK_READ, VPI_ARRAY_BUFFER_HOST_AOS, &ptsData));
 
  const VPIArrayBufferAOS &aosKeypoints = ptsData.buffer.aos;
 
  const VPIKeypointF32 *pts = (VPIKeypointF32 *)aosKeypoints.data;
 
  for (int i = 0; i < *aosKeypoints.sizePointer; i++)
  {
  // 轨迹色调取决于其初始位置
  int hue = ((int)pts[i].x ^ (int)pts[i].y) % 180;
 
  tmpTrackColors.push_back(cv::Vec3b(hue, 255, 255));
  }
  CHECK_STATUS(vpiArrayUnlock(curFeatures));
 
  cvtColor(tmpTrackColors, tmpTrackColors, cv::COLOR_HSV2BGR);
 
  for (size_t i = 0; i < tmpTrackColors.size(); i++)
  {
  trackColors.push_back(cv::Scalar(tmpTrackColors[i]));
  }
  }
 
  // 使用第一帧的信息更新掩码。
  int numTrackedKeypoints = UpdateMask(cvMask, trackColors, NULL, curFeatures, status);
 
  // =================================================
  // 主要处理阶段
 
  // 为第一帧生成金字塔。
  CHECK_STATUS(vpiSubmitGaussianPyramidGenerator(stream, backend, imgFrame, pyrCurFrame, VPI_BORDER_CLAMP));
 
  // 帧计数器
  int idxFrame = 0;
 
  while (true)
  {
  // 将帧保存到磁盘
  SaveFileToDisk(imgFrame, cvMask, strOutputFiles, idxFrame);
 
  printf("帧 ID=%d: 跟踪了 %d 个点。\n", idxFrame, numTrackedKeypoints);
 
  // 上一次迭代的当前帧/特征变为本次迭代的上一帧/特征。
  // 前者将包含在此迭代中收集的信息。
  std::swap(prevFeatures, curFeatures);
  std::swap(pyrPrevFrame, pyrCurFrame);
 
  // 获取新帧
  if (!invid.read(cvFrame))
  {
  printf("视频结束。\n");
  break;
  }
 
  ++idxFrame;
 
  // 将帧封装到 VPIImage 中，重用现有的 imgFrame。
  CHECK_STATUS(vpiImageSetWrappedOpenCVMat(imgTempFrame, cvFrame));
 
  // 将其转换为灰度
  CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_CUDA, imgTempFrame, imgFrame, NULL))
 
  // 从中生成金字塔
  CHECK_STATUS(vpiSubmitGaussianPyramidGenerator(stream, backend, imgFrame, pyrCurFrame, VPI_BORDER_CLAMP));
 
  // 根据特征点在上一帧中的位置，估计其在当前帧中的位置
  CHECK_STATUS(vpiSubmitOpticalFlowPyrLK(stream, 0, optflow, pyrPrevFrame, pyrCurFrame, prevFeatures,
  curFeatures, status, &lkParams));
 
  // 等待处理完成。
  CHECK_STATUS(vpiStreamSync(stream));
 
  // 更新输出掩码
  numTrackedKeypoints = UpdateMask(cvMask, trackColors, prevFeatures, curFeatures, status);
 
  // 没有更多关键点被跟踪了吗？
  if (numTrackedKeypoints == 0)
  {
  printf("没有关键点可以跟踪。\n");
  break; // 我们可以结束处理。
  }
  }
  }
  catch (std::exception &e)
  {
  std::cerr << e.what() << std::endl;
  retval = 1;
  }
 
  vpiStreamDestroy(stream);
  vpiPayloadDestroy(harris);
  vpiPayloadDestroy(optflow);
 
  vpiPyramidDestroy(pyrPrevFrame);
  vpiImageDestroy(imgTempFrame);
  vpiImageDestroy(imgFrame);
  vpiArrayDestroy(prevFeatures);
  vpiArrayDestroy(curFeatures);
  vpiArrayDestroy(status);
  vpiArrayDestroy(scores);
 
  return retval;
 }

VPI - 视觉编程接口

3.2 版本

概述

说明

结果

源代码