概述

FFT 应用程序输出输入图像的频谱表示，并将其保存到磁盘上的图像文件中。您可以定义用于处理的后端。

指令

命令行参数为

<后端> <输入图像>

其中

后端：cpu 或 cuda；它定义了将执行处理的后端。
输入图像：输入图像文件名；它接受 png、jpeg 以及可能的其他格式。

这是一个示例

C++
./vpi_sample_07_fft cuda ../assets/kodim08.png
Python
python3 main.py cuda ../assets/kodim08.png

这是使用 CUDA 后端和提供的示例图像之一。您可以尝试其他图像，但需遵守算法施加的约束。

结果

输入图像	输出图像，频谱

源代码

为方便起见，以下代码也安装在示例目录中。

语言 C++ Python

 import sys
 import vpi
 import numpy as np
 from PIL import Image
 from argparse import ArgumentParser
 
 # ----------------------------
 # Parse command line arguments
 
 parser = ArgumentParser()
 parser.add_argument('backend', choices=['cpu','cuda'],
  help='Backend to be used for processing')
 
 parser.add_argument('input',
  help='Input image in space domain')
 
 args = parser.parse_args();
 
 if args.backend == 'cpu'
  backend = vpi.Backend.CPU
 else
  assert args.backend == 'cuda'
  backend = vpi.Backend.CUDA
 
 # --------------------------------------------------------------
 # Load input into a vpi.Image and convert it to float grayscale
 with vpi.Backend.CUDA
  try
  input = vpi.asimage(np.asarray(Image.open(args.input))).convert(vpi.Format.F32)
  except IOError
  sys.exit("Input file not found")
  except
  sys.exit("Error with input file")
 
 # --------------------------------------------------------------
 # Transform input into frequency domain
 with backend
  hfreq = input.fft()
 
 # --------------------------------------------------------------
 # Post-process results and save to disk
 
 # Transform [H,W,2] float array into [H,W] complex array
 hfreq = hfreq.cpu().view(dtype=np.complex64).squeeze(2)
 
 # Complete array into a full hermitian matrix
 if input.width%2==0
  wpad = input.width//2-1
  padmode = 'reflect'
 else
  wpad = input.width//2
  padmode='symmetric'
 freq = np.pad(hfreq, ((0,0),(0,wpad)), mode=padmode)
 freq[:,hfreq.shape[1]:] = np.conj(freq[:,hfreq.shape[1]:])
 freq[1:,hfreq.shape[1]:] = freq[1:,hfreq.shape[1]:][::-1]
 
 # Shift 0Hz to image center
 freq = np.fft.fftshift(freq)
 
 # Convert complex frequencies into log-magnitude
 lmag = np.log(1+np.absolute(freq))
 
 # Normalize into [0,255] range
 min = lmag.min()
 max = lmag.max()
 lmag = ((lmag-min)*255/(max-min)).round().astype(np.uint8)
 
 # -------------------
 # Save result to disk
 Image.fromarray(lmag).save('spectrum_python'+str(sys.version_info[0])+'_'+args.backend+'.png')

 #include <opencv2/core/version.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
 #if CV_MAJOR_VERSION >= 3
 # include <opencv2/imgcodecs.hpp>
 #else
 # include <opencv2/highgui/highgui.hpp>
 #endif
 
 #include <vpi/OpenCVInterop.hpp>
 
 #include <vpi/Image.h>
 #include <vpi/Status.h>
 #include <vpi/Stream.h>
 #include <vpi/algo/ConvertImageFormat.h>
 #include <vpi/algo/FFT.h>
 
 #include <cstring> // for memset
 #include <iostream>
 #include <sstream>
 
 #define CHECK_STATUS(STMT) \
  do \
  { \
  VPIStatus status = (STMT); \
  if (status != VPI_SUCCESS) \
  { \
  char buffer[VPI_MAX_STATUS_MESSAGE_LENGTH]; \
  vpiGetLastStatusMessage(buffer, sizeof(buffer)); \
  std::ostringstream ss; \
  ss << vpiStatusGetName(status) << ": " << buffer; \
  throw std::runtime_error(ss.str()); \
  } \
  } while (0);
 
 // Auxiliary functions to process spectrum before saving it to disk.
 cv::Mat LogMagnitude(cv::Mat cpx);
 cv::Mat CompleteFullHermitian(cv::Mat in, cv::Size fullSize);
 cv::Mat InplaceFFTShift(cv::Mat mag);
 
 int main(int argc, char *argv[])
 {
  // OpenCV image that will be wrapped by a VPIImage.
  // Define it here so that it's destroyed *after* wrapper is destroyed
  cv::Mat cvImage;
 
  // VPI objects that will be used
  VPIImage image = NULL;
  VPIImage imageF32 = NULL;
  VPIImage spectrum = NULL;
  VPIStream stream = NULL;
  VPIPayload fft = NULL;
 
  int retval = 0;
 
  try
  {
  // =============================
  // Parse command line parameters
 
  if (argc != 3)
  {
  throw std::runtime_error(std::string("Usage: ") + argv[0] + " <cpu|cuda> <input image>");
  }
 
  std::string strBackend = argv[1];
  std::string strInputFileName = argv[2];
 
  // Now parse the backend
  VPIBackend backend;
 
  if (strBackend == "cpu")
  {
  backend = VPI_BACKEND_CPU;
  }
  else if (strBackend == "cuda")
  {
  backend = VPI_BACKEND_CUDA;
  }
  else
  {
  throw std::runtime_error("Backend '" + strBackend + "' not recognized, it must be either cpu or cuda.");
  }
 
  // =====================
  // Load the input image
 
  cvImage = cv::imread(strInputFileName);
  if (cvImage.empty())
  {
  throw std::runtime_error("Can't open '" + strInputFileName + "'");
  }
 
  // =================================
  // Allocate all VPI resources needed
 
  // Create the stream for the given backend.
  CHECK_STATUS(vpiStreamCreate(backend, &stream));
 
  // We now wrap the loaded image into a VPIImage object to be used by VPI.
  // VPI won't make a copy of it, so the original
  // image must be in scope at all times.
  CHECK_STATUS(vpiImageCreateWrapperOpenCVMat(cvImage, 0, &image));
 
  // Temporary image that holds the float version of input
  CHECK_STATUS(vpiImageCreate(cvImage.cols, cvImage.rows, VPI_IMAGE_FORMAT_F32, 0, &imageF32));
 
  // Now create the output image. Note that for real inputs, the output spectrum is a Hermitian
  // matrix (conjugate-symmetric), so only the non-redundant components are output, basically the
  // left half. We adjust the output width accordingly.
  CHECK_STATUS(vpiImageCreate(cvImage.cols / 2 + 1, cvImage.rows, VPI_IMAGE_FORMAT_2F32, 0, &spectrum));
 
  // Create the FFT payload that does real (space) to complex (frequency) transformation
  CHECK_STATUS(
  vpiCreateFFT(backend, cvImage.cols, cvImage.rows, VPI_IMAGE_FORMAT_F32, VPI_IMAGE_FORMAT_2F32, &fft));
 
  // ================
  // Processing stage
 
  // Convert image to float
  CHECK_STATUS(vpiSubmitConvertImageFormat(stream, backend, image, imageF32, NULL));
 
  // Submit it for processing passing the image to be gradient and the result image
  CHECK_STATUS(vpiSubmitFFT(stream, backend, fft, imageF32, spectrum, 0));
 
  // Wait until the algorithm finishes processing
  CHECK_STATUS(vpiStreamSync(stream));
 
  // =======================================
  // Output processing and saving it to disk
 
  // Lock output image to retrieve its data on cpu memory
  VPIImageData outData;
  CHECK_STATUS(vpiImageLockData(spectrum, VPI_LOCK_READ, VPI_IMAGE_BUFFER_HOST_PITCH_LINEAR, &outData));
 
  assert(outData.bufferType == VPI_IMAGE_BUFFER_HOST_PITCH_LINEAR);
  VPIImageBufferPitchLinear &outPitch = outData.buffer.pitch;
 
  assert(outPitch.format == VPI_IMAGE_FORMAT_2F32);
 
  // Wrap spectrum to be used by OpenCV
  cv::Mat cvSpectrum(outPitch.planes[0].height, outPitch.planes[0].width, CV_32FC2, outPitch.planes[0].data,
  outPitch.planes[0].pitchBytes);
 
  // Process it
  cv::Mat mag = InplaceFFTShift(LogMagnitude(CompleteFullHermitian(cvSpectrum, cvImage.size())));
 
  // Normalize the result to fit in 8-bits
  normalize(mag, mag, 0, 255, cv::NORM_MINMAX);
 
  // Write to disk
  imwrite("spectrum_" + strBackend + ".png", mag);
 
  // Done handling output image, don't forget to unlock it.
  CHECK_STATUS(vpiImageUnlock(spectrum));
  }
  catch (std::exception &e)
  {
  std::cerr << e.what() << std::endl;
  retval = 1;
  }
 
  // ========
  // Clean up
 
  // Make sure stream is synchronized before destroying the objects
  // that might still be in use.
  if (stream != NULL)
  {
  vpiStreamSync(stream);
  }
 
  vpiImageDestroy(image);
  vpiImageDestroy(imageF32);
  vpiImageDestroy(spectrum);
  vpiStreamDestroy(stream);
 
  // Payload is owned by the stream, so it's already destroyed
  // since the stream is now destroyed.
 
  return retval;
 }
 
 // Auxiliary functions --------------------------------
 
 cv::Mat LogMagnitude(cv::Mat cpx)
 {
  // Split spectrum into real and imaginary parts
  cv::Mat reim[2];
  assert(cpx.channels() == 2);
  split(cpx, reim);
 
  // Calculate the magnitude
  cv::Mat mag;
  magnitude(reim[0], reim[1], mag);
 
  // Convert to logarithm scale
  mag += cv::Scalar::all(1);
  log(mag, mag);
  mag = mag(cv::Rect(0, 0, mag.cols & -2, mag.rows & -2));
 
  return mag;
 }
 
 cv::Mat CompleteFullHermitian(cv::Mat in, cv::Size fullSize)
 {
  assert(in.type() == CV_32FC2);
 
  cv::Mat out(fullSize, CV_32FC2);
  for (int i = 0; i < out.rows; ++i)
  {
  for (int j = 0; j < out.cols; ++j)
  {
  cv::Vec2f p;
  if (j < in.cols)
  {
  p = in.at<cv::Vec2f>(i, j);
  }
  else
  {
  p = in.at<cv::Vec2f>((out.rows - i) % out.rows, (out.cols - j) % out.cols);
  p[1] = -p[1];
  }
  out.at<cv::Vec2f>(i, j) = p;
  }
  }
 
  return out;
 }
 
 cv::Mat InplaceFFTShift(cv::Mat mag)
 {
  // Rearrange the quadrants of the fourier spectrum
  // so that the origin is at the image center.
 
  // Create a ROI for each 4 quadrants.
  int cx = mag.cols / 2;
  int cy = mag.rows / 2;
  cv::Mat qTL(mag, cv::Rect(0, 0, cx, cy)); // top-left
  cv::Mat qTR(mag, cv::Rect(cx, 0, cx, cy)); // top-right
  cv::Mat qBL(mag, cv::Rect(0, cy, cx, cy)); // bottom-left
  cv::Mat qBR(mag, cv::Rect(cx, cy, cx, cy)); // bottom-right
 
  // swap top-left with bottom-right quadrants
  cv::Mat tmp;
  qTL.copyTo(tmp);
  qBR.copyTo(qTL);
  tmp.copyTo(qBR);
 
  // swap top-right with bottom-left quadrants
  qTR.copyTo(tmp);
  qBL.copyTo(qTR);
  tmp.copyTo(qBL);
 
  return mag;
 }

VPI - Vision Programming Interface

3.2 版本发布

概述

指令

结果

源代码