概述

此应用程序从输入视频源获取帧，在前一帧和当前帧上运行算法，然后计算每个 4x4 像素块的运动矢量。输出运动矢量将映射到 HSV 颜色空间，其中色调与运动角度相关，值与运动速度相关，结果将保存到视频文件。

指令

命令行参数为

<后端> <输入视频> <质量> <网格大小> <金字塔层数>

其中

backend: 定义将执行处理的后端。仅支持 OFA 后端。ofa 仅在 Jetson AGX Orin 上受支持。
input video: 输入视频文件名，它接受 .mp4、.avi 以及可能取决于 OpenCV 支持的其他格式。
quality: 指定算法将使用的质量。可用选项有：low（最快）、medium（性能和质量平衡）和 high（最慢）。
gridsize: 图像上规则网格的大小，每个单元格将产生一个运动矢量。使用 1 表示稠密网格。
numlevels: 使用的金字塔层数。

以下是 Jetson AGX Orin 的一个示例。

C++
./vpi_sample_13_optflow_dense ofa ../assets/pedestrians.mp4 high 1 5
Python
python3 main.py ofa ../assets/pedestrians.mp4 high 2

该应用程序将处理 pedestrians.mp4 并创建 denseoptflow_mv_ofa.mp4。

结果

输入视频	运动矢量视频

源代码

为了方便起见，以下代码也安装在 samples 目录中。

语言 C++ Python

 import sys
 import vpi
 import numpy as np
 from os import path
 from argparse import ArgumentParser
 from contextlib import contextmanager
 import cv2
 
 
 # ----------------------------
 # Some utility functions
 
 def process_motion_vectors(mv)
  with mv.rlock_cpu() as data
  # convert S10.5 format to float
  flow = np.float32(data)/(1<<5)
 
  # Create an image where the motion vector angle is
  # mapped to a color hue, and intensity is proportional
  # to vector's magnitude
  magnitude, angle = cv2.cartToPolar(flow[:,:,0], flow[:,:,1], angleInDegrees=True)
 
  clip = 5.0
  cv2.threshold(magnitude, clip, clip, cv2.THRESH_TRUNC, magnitude)
 
  # build the hsv image
  hsv = np.ndarray([flow.shape[0], flow.shape[1], 3], np.float32)
  hsv[:,:,0] = angle
  hsv[:,:,1] = np.ones((angle.shape[0], angle.shape[1]), np.float32)
  hsv[:,:,2] = magnitude / clip
 
  # Convert HSV to BGR8
  bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
  return np.uint8(bgr*255)
 
 # ----------------------------
 # Parse command line arguments
 
 parser = ArgumentParser()
 parser.add_argument('backend', choices=['ofa'],
  help='Backend to be used for processing')
 
 parser.add_argument('input',
  help='Input video to be processed')
 
 parser.add_argument('quality', choices=['low', 'medium', 'high'],
  help='Quality setting')
 
 parser.add_argument('gridSize', type=int, choices=[1,2,4,8],
  help='Grid size')
 
 parser.add_argument('numLevels', type=int, choices=[1,2,3,4,5],
  help='Number of pyramid levels')
 
 args = parser.parse_args();
 
 assert args.backend == 'ofa'
 if args.backend == 'ofa'
  backend = vpi.Backend.OFA
 
 if args.quality == "low"
  quality = vpi.OptFlowQuality.LOW
 elif args.quality == "medium"
  quality = vpi.OptFlowQuality.MEDIUM
 else
  assert args.quality == "high"
  quality = vpi.OptFlowQuality.HIGH
 
 # -----------------------------
 # Open input and output videos
 
 inVideo = cv2.VideoCapture(args.input)
 
 fourcc = cv2.VideoWriter_fourcc(*'MPEG')
 inSize = (int(inVideo.get(cv2.CAP_PROP_FRAME_WIDTH)), int(inVideo.get(cv2.CAP_PROP_FRAME_HEIGHT)))
 fps = inVideo.get(cv2.CAP_PROP_FPS)
 
 # Calculate the output dimensions based on the input's and the chosen grid size
 outSize = ((inSize[0] + args.gridSize-1)//args.gridSize, (inSize[1]+args.gridSize-1)//args.gridSize)
 
 outVideo = cv2.VideoWriter('denseoptflow_mv_python'+str(sys.version_info[0])+'_'+args.backend+'.mp4',
  fourcc, fps, outSize)
 
 #---------------------------------
 # Main processing loop
 
 prevFrame = None
 
 idFrame = 0
 while True
  # Read one input frame
  ret, cvFrame = inVideo.read()
  if not ret
  break
 
  # Convert it to Y8_ER_BL pyramid format to be used by VPI
  # No single backend can convert from OpenCV's BGR8 to Y8_ER_BL
  # required by the algorithm. We must do in two steps using CUDA and VIC.
  curFrame = vpi.asimage(cvFrame, vpi.Format.BGR8) \
  .convert(vpi.Format.Y8_ER, backend=vpi.Backend.CUDA) \
  .gaussian_pyramid(args.numLevels, backend=vpi.Backend.CUDA) \
  .convert(vpi.Format.Y8_ER_BL, backend=vpi.Backend.VIC)
 
  # Need at least 2 frames to start processing
  if prevFrame is not None
  print("Processing frame {}".format(idFrame))
 
  # Calculate the motion vectors from previous to current frame
  with backend
  motion_vectors = vpi.optflow_dense(prevFrame, curFrame, quality = quality, gridsize = args.gridSize)
 
  # Turn motion vectors into an image
  motion_image = process_motion_vectors(motion_vectors)
 
  # Save it to output video
  outVideo.write(motion_image)
 
  # Prepare next iteration
  prevFrame = curFrame
  idFrame += 1

 #include <opencv2/core/version.hpp>
 #include <opencv2/imgcodecs.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
 #include <opencv2/videoio.hpp>
 #include <vpi/OpenCVInterop.hpp>
 
 #include <vpi/Array.h>
 #include <vpi/Image.h>
 #include <vpi/ImageFormat.h>
 #include <vpi/Pyramid.h>
 #include <vpi/Status.h>
 #include <vpi/Stream.h>
 #include <vpi/algo/ConvertImageFormat.h>
 #include <vpi/algo/GaussianPyramid.h>
 #include <vpi/algo/OpticalFlowDense.h>
 
 #include <iostream>
 #include <sstream>
 
 #define CHECK_STATUS(STMT) \
  do \
  { \
  VPIStatus status = (STMT); \
  if (status != VPI_SUCCESS) \
  { \
  char buffer[VPI_MAX_STATUS_MESSAGE_LENGTH]; \
  vpiGetLastStatusMessage(buffer, sizeof(buffer)); \
  std::ostringstream ss; \
  ss << "line " << __LINE__ << ": "; \
  ss << vpiStatusGetName(status) << ": " << buffer; \
  throw std::runtime_error(ss.str()); \
  } \
  } while (0);
 
 static void ProcessMotionVector(VPIImage mvImg, cv::Mat &outputImage)
 {
  // Lock the input image to access it from CPU
  VPIImageData mvData;
  CHECK_STATUS(vpiImageLockData(mvImg, VPI_LOCK_READ, VPI_IMAGE_BUFFER_HOST_PITCH_LINEAR, &mvData));
 
  // Create a cv::Mat that points to the input image data
  cv::Mat mvImage;
  CHECK_STATUS(vpiImageDataExportOpenCVMat(mvData, &mvImage));
 
  // Convert S10.5 format to float
  cv::Mat flow(mvImage.size(), CV_32FC2);
  mvImage.convertTo(flow, CV_32F, 1.0f / (1 << 5));
 
  // Image not needed anymore, we can unlock it.
  CHECK_STATUS(vpiImageUnlock(mvImg));
 
  // Create an image where the motion vector angle is
  # mapped to a color hue, and intensity is proportional
  # to vector's magnitude.
  cv::Mat magnitude, angle;
  {
  cv::Mat flowChannels[2];
  split(flow, flowChannels);
  cv::cartToPolar(flowChannels[0], flowChannels[1], magnitude, angle, true);
  }
 
  float clip = 5;
  cv::threshold(magnitude, magnitude, clip, clip, cv::THRESH_TRUNC);
 
  // build hsv image
  cv::Mat _hsv[3], hsv, bgr;
  _hsv[0] = angle;
  _hsv[1] = cv::Mat::ones(angle.size(), CV_32F);
  _hsv[2] = magnitude / clip; // intensity must vary from 0 to 1
  merge(_hsv, 3, hsv);
 
  cv::cvtColor(hsv, bgr, cv::COLOR_HSV2BGR);
  bgr.convertTo(outputImage, CV_8U, 255.0);
 }
 
 int main(int argc, char *argv[])
 {
  // OpenCV image that will be wrapped by a VPIImage.
  // Define it here so that it's destroyed *after* wrapper is destroyed
  cv::Mat cvPrevFrame, cvCurFrame;
 
  // VPI objects that will be used
  VPIStream stream = NULL;
  VPIImage imgPrevFramePL = NULL;
  VPIImage imgPrevFrameTmp = NULL;
  VPIImage imgCurFramePL = NULL;
  VPIImage imgCurFrameTmp = NULL;
  VPIImage imgMotionVecBL = NULL;
  VPIImage imgMotionVecPL = NULL;
 
  VPIPyramid prevPyrTmp = NULL;
  VPIPyramid prevPyrBL = NULL;
  VPIPyramid curPyrTmp = NULL;
  VPIPyramid curPyrBL = NULL;
 
  VPIPayload payload = NULL;
 
  int retval = 0;
 
  try
  {
  if (argc != 6)
  {
  throw std::runtime_error(std::string("Usage: ") + argv[0] +
  " <ofa> <input_video> <low|medium|high> <gridsize> <numlevels>");
  }
 
  // Parse input parameters
  std::string strBackend = argv[1];
  std::string strInputVideo = argv[2];
  std::string strQuality = argv[3];
  std::string strGridSize = argv[4];
  std::string strNumLevels = argv[5];
 
  VPIOpticalFlowQuality quality;
  if (strQuality == "low")
  {
  quality = VPI_OPTICAL_FLOW_QUALITY_LOW;
  }
  else if (strQuality == "medium")
  {
  quality = VPI_OPTICAL_FLOW_QUALITY_MEDIUM;
  }
  else if (strQuality == "high")
  {
  quality = VPI_OPTICAL_FLOW_QUALITY_HIGH;
  }
  else
  {
  throw std::runtime_error("Unknown quality provided");
  }
 
  VPIBackend backend;
  if (strBackend == "ofa")
  {
  backend = VPI_BACKEND_OFA;
  }
  else
  {
  throw std::runtime_error("Backend '" + strBackend + "' not recognized, it must be ofa.");
  }
 
  char *endptr;
  int gridSize = strtol(strGridSize.c_str(), &endptr, 10);
  if (*endptr != '\0')
  {
  throw std::runtime_error("Syntax error parsing gridsize " + strGridSize);
  }
 
  int numLevels = strtol(strNumLevels.c_str(), &endptr, 10);
  if (*endptr != '\0')
  {
  throw std::runtime_error("Syntax error parsing numlevels " + strNumLevels);
  }
 
  // Load the input video
  cv::VideoCapture invid;
  if (!invid.open(strInputVideo))
  {
  throw std::runtime_error("Can't open '" + strInputVideo + "'");
  }
 
  // Create the stream where processing will happen. We'll use user-provided backend
  // for Optical Flow, and CUDA/VIC for image format conversions.
  CHECK_STATUS(vpiStreamCreate(backend | VPI_BACKEND_CUDA | VPI_BACKEND_VIC, &stream));
 
  // Fetch the first frame
  if (!invid.read(cvPrevFrame))
  {
  throw std::runtime_error("Cannot read frame from input video");
  }
 
  // Create the previous and current frame wrapper using the first frame. This wrapper will
  // be set to point to every new frame in the main loop.
  CHECK_STATUS(vpiImageCreateWrapperOpenCVMat(cvPrevFrame, 0, &imgPrevFramePL));
  CHECK_STATUS(vpiImageCreateWrapperOpenCVMat(cvPrevFrame, 0, &imgCurFramePL));
 
  // Define the image formats we'll use throughout this sample.
  VPIImageFormat imgFmt = VPI_IMAGE_FORMAT_Y8_ER;
  VPIImageFormat imgFmtBL = VPI_IMAGE_FORMAT_Y8_ER_BL;
 
  int32_t width = cvPrevFrame.cols;
  int32_t height = cvPrevFrame.rows;
 
  // Create Dense Optical Flow payload to be executed on the given backend
  std::vector<int32_t> pyrGridSize(numLevels, gridSize); // all levels will have the same grid size
  CHECK_STATUS(vpiCreateOpticalFlowDense(backend, width, height, imgFmtBL, &pyrGridSize[0], pyrGridSize.size(),
  quality, &payload));
 
  // The Dense Optical Flow on NVENC or OFA backends expects input to be in block-linear format.
  // Since Convert Image Format algorithm doesn't currently support direct BGR
  // pitch-linear (from OpenCV) to Y8 block-linear conversion, it must be done in two
  // passes, first from BGR/PL to Y8/PL using CUDA, then from Y8/PL to Y8/BL using VIC.
  // The temporary image buffer below will store the intermediate Y8/PL representation.
  CHECK_STATUS(vpiImageCreate(width, height, imgFmt, 0, &imgPrevFrameTmp));
  CHECK_STATUS(vpiImageCreate(width, height, imgFmt, 0, &imgCurFrameTmp));
 
  // Now create the final block-linear buffer that'll be used as input to the
  // algorithm.
 
  CHECK_STATUS(vpiPyramidCreate(width, height, imgFmt, pyrGridSize.size(), 0.5, 0, &prevPyrTmp));
  CHECK_STATUS(vpiPyramidCreate(width, height, imgFmt, pyrGridSize.size(), 0.5, 0, &curPyrTmp));
 
  CHECK_STATUS(vpiPyramidCreate(width, height, imgFmtBL, pyrGridSize.size(), 0.5, 0, &prevPyrBL));
  CHECK_STATUS(vpiPyramidCreate(width, height, imgFmtBL, pyrGridSize.size(), 0.5, 0, &curPyrBL));
 
  // Motion vector image width and height, align to be multiple of gridSize
  int32_t mvWidth = (width + gridSize - 1) / gridSize;
  int32_t mvHeight = (height + gridSize - 1) / gridSize;
 
  // 输出视频将是运动向量图像的热图
  int fourcc = cv::VideoWriter::fourcc('M', 'P', 'E', 'G');
  double fps = invid.get(cv::CAP_PROP_FPS);
 
  cv::VideoWriter outVideo("denseoptflow_mv_" + strBackend + ".mp4", fourcc, fps, cv::Size(mvWidth, mvHeight));
  if (!outVideo.isOpened())
  {
  throw std::runtime_error("无法创建输出视频");
  }
 
  // 创建输出运动向量缓冲区
  CHECK_STATUS(vpiImageCreate(mvWidth, mvHeight, VPI_IMAGE_FORMAT_2S16_BL, 0, &imgMotionVecBL));
  CHECK_STATUS(vpiImageCreate(mvWidth, mvHeight, VPI_IMAGE_FORMAT_2S16, 0, &imgMotionVecPL));
 
  // 首先将第一帧转换为 Y8_BL 金字塔格式。当算法被调用时，它将被用作前一帧。
  CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_CUDA, imgPrevFramePL, imgPrevFrameTmp, nullptr));
  CHECK_STATUS(
  vpiSubmitGaussianPyramidGenerator(stream, VPI_BACKEND_CUDA, imgPrevFrameTmp, prevPyrTmp, VPI_BORDER_CLAMP));
  CHECK_STATUS(vpiSubmitConvertImageFormatPyramid(stream, VPI_BACKEND_VIC, prevPyrTmp, prevPyrBL, NULL));
 
  // 创建一个输出图像，用于保存渲染后的运动向量图像。
  cv::Mat mvOutputImage;
 
  // 获取新帧直到视频结束
  int idxFrame = 1;
  while (invid.read(cvCurFrame))
  {
  printf("正在处理帧 %d\n", idxFrame++);
  // 将帧封装到 VPIImage 中，重用现有的 imgCurFramePL。
  CHECK_STATUS(vpiImageSetWrappedOpenCVMat(imgCurFramePL, cvCurFrame));
 
  // 将当前帧转换为 Y8_BL 金字塔格式
  CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_CUDA, imgCurFramePL, imgCurFrameTmp, nullptr));
  CHECK_STATUS(vpiSubmitGaussianPyramidGenerator(stream, VPI_BACKEND_CUDA, imgCurFrameTmp, curPyrTmp,
  VPI_BORDER_CLAMP));
  CHECK_STATUS(vpiSubmitConvertImageFormatPyramid(stream, VPI_BACKEND_VIC, curPyrTmp, curPyrBL, NULL));
 
  CHECK_STATUS(
  vpiSubmitOpticalFlowDensePyramid(stream, backend, payload, prevPyrBL, curPyrBL, imgMotionVecBL));
 
  // 将 BL 格式的输出转换为 PL 格式。
  CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_VIC, imgMotionVecBL, imgMotionVecPL, NULL));
 
  // 等待处理完成。
  CHECK_STATUS(vpiStreamSync(stream));
 
  // 在输出图像中渲染生成的运动向量
  ProcessMotionVector(imgMotionVecPL, mvOutputImage);
 
  // 保存到输出视频
  outVideo << mvOutputImage;
 
  // 交换前一帧和下一帧
  std::swap(cvPrevFrame, cvCurFrame);
  std::swap(imgPrevFramePL, imgCurFramePL);
  std::swap(prevPyrBL, curPyrBL);
  }
  }
  catch (std::exception &e)
  {
  std::cerr << e.what() << std::endl;
  retval = 1;
  }
 
  // 销毁所有已使用的资源
  vpiStreamDestroy(stream);
  vpiPayloadDestroy(payload);
 
  vpiImageDestroy(imgPrevFramePL);
  vpiImageDestroy(imgPrevFrameTmp);
  vpiImageDestroy(imgCurFramePL);
  vpiImageDestroy(imgCurFrameTmp);
  vpiImageDestroy(imgMotionVecBL);
  vpiImageDestroy(imgMotionVecPL);
 
  vpiPyramidDestroy(prevPyrTmp);
  vpiPyramidDestroy(prevPyrBL);
  vpiPyramidDestroy(curPyrTmp);
  vpiPyramidDestroy(curPyrBL);
 
  return retval;
 }

VPI - Vision Programming Interface

3.2 版本

概述

指令

结果

源代码