2017년 1월 18일 수요일

CUDA 코덱 비디오 인코더 사용 소스코드 - CUDA Codec Video Encoder source code

NvEncoder 샘플소스를 수정한 소스코드, QT 기반으로 작성되었으며 yuv420 포멧으로 입력받은 영상 데이터를 h264로 인코딩한다.
인코딩된 프레임은 m_pEncodeBuffer 버퍼에 저장

< encodethread.h >
 #ifndef ENCODETHREAD_H  
 #define ENCODETHREAD_H  
   
 #include <QThread>  
 #include <opencv2/imgproc/imgproc_c.h>  
 #include "imagequeue.h"  
 #include "./common/inc/NvHWEncoder.h"  
   
   
 #define MAX_ENCODE_QUEUE 32  
 #define FRAME_QUEUE 240  
   
 #define SET_VER(configStruct, type) {configStruct.version = type##_VER;}  
   
 template<class T>  
 class CNvQueue {  
   T** m_pBuffer;  
   unsigned int m_uSize;  
   unsigned int m_uPendingCount;  
   unsigned int m_uAvailableIdx;  
   unsigned int m_uPendingndex;  
 public:  
   CNvQueue(): m_pBuffer(NULL), m_uSize(0), m_uPendingCount(0), m_uAvailableIdx(0),  
         m_uPendingndex(0)  
   {  
   }  
   
   ~CNvQueue()  
   {  
     delete[] m_pBuffer;  
   }  
   
   bool Initialize(T *pItems, unsigned int uSize)  
   {  
     m_uSize = uSize;  
     m_uPendingCount = 0;  
     m_uAvailableIdx = 0;  
     m_uPendingndex = 0;  
     m_pBuffer = new T *[m_uSize];  
     for (unsigned int i = 0; i < m_uSize; i++)  
     {  
       m_pBuffer[i] = &pItems[i];  
     }  
     return true;  
   }  
   
   
   T * GetAvailable()  
   {  
     T *pItem = NULL;  
     if (m_uPendingCount == m_uSize)  
     {  
       return NULL;  
     }  
     pItem = m_pBuffer[m_uAvailableIdx];  
     m_uAvailableIdx = (m_uAvailableIdx+1)%m_uSize;  
     m_uPendingCount += 1;  
     return pItem;  
   }  
   
   T* GetPending()  
   {  
     if (m_uPendingCount == 0)  
     {  
       return NULL;  
     }  
   
     T *pItem = m_pBuffer[m_uPendingndex];  
     m_uPendingndex = (m_uPendingndex+1)%m_uSize;  
     m_uPendingCount -= 1;  
     return pItem;  
   }  
 };  
   
 typedef struct _EncodeFrameConfig  
 {  
   uint8_t *yuv[3];  
   uint32_t stride[3];  
   uint32_t width;  
   uint32_t height;  
 }EncodeFrameConfig;  
   
 typedef enum  
 {  
   NV_ENC_DX9 = 0,  
   NV_ENC_DX11 = 1,  
   NV_ENC_CUDA = 2,  
   NV_ENC_DX10 = 3,  
 } NvEncodeDeviceType;  
   
 class EncodeThread : public QThread  
 {  
   Q_OBJECT  
 public:  
   explicit EncodeThread(QThread *parent = 0, ImageQueue<EncodeFrame> *queue = NULL);  
   virtual ~EncodeThread();  
   
   void start();  
   void stop();  
   
 protected:  
   virtual void run();  
   
 signals:  
   void started();  
   void finished();  
   
 private:  
   bool openEncoder(NV_ENC_BUFFER_FORMAT format, int width, int height);  
   void closeEncoder();  
   
   NVENCSTATUS deinitialize(uint32_t devicetype);  
   NVENCSTATUS encodeFrame(EncodeFrameConfig *pEncodeFrame, bool bFlush, uint32_t width, uint32_t height);  
   NVENCSTATUS initCuda(uint32_t deviceID = 0);  
   NVENCSTATUS allocateIOBuffers(uint32_t uInputWidth, uint32_t uInputHeight, NV_ENC_BUFFER_FORMAT inputFormat);  
   NVENCSTATUS releaseIOBuffers();  
   NVENCSTATUS flushEncoder();  
   
 private:  
   bool  m_bRun;  
   
 private:  
   ImageQueue<EncodeFrame>* m_pQueue;  
   
   EncodeConfig    m_stEncodeConfig;  
   
   CNvHWEncoder*    m_pNvHWEncoder;  
   uint32_t      m_uEncodeBufferCount;  
   uint32_t      m_uPicStruct;  
   void*        m_pDevice;  
   
   CUcontext              m_cuContext;  
   EncodeConfig            m_stEncoderInput;  
   EncodeBuffer            m_stEncodeBuffer[MAX_ENCODE_QUEUE];  
   CNvQueue<EncodeBuffer>       m_EncodeBufferQueue;  
   EncodeOutputBuffer         m_stEOSOutputBfr;  
   
   uint8_t*  m_pEncodeBuffer;  
   int     m_nEncodeBufferSize;  
   
   FILE*    m_pFile;  
 };  
   
 #endif // ENCODETHREAD_H  
   


< encodethread.cpp >
 #include "encodethread.h"  
 #include <QDebug>  
   
 #include "DXMediaPlayerCtrl.h"  
 #include "DXUtil.h"  
 #include "MediaBuffer.h"  
 #include "GlobalTimer.h"  
   
 #define BITSTREAM_BUFFER_SIZE 2 * 1024 * 1024  
   
 EncodeThread::EncodeThread(QThread *parent, ImageQueue<EncodeFrame> *queue) : QThread(parent)  
 {  
   m_bRun = false;  
   m_pQueue = queue;  
   
   m_pNvHWEncoder = new CNvHWEncoder;  
   m_cuContext = NULL;  
   
   m_uEncodeBufferCount = 0;  
   memset(&m_stEncoderInput, 0, sizeof(m_stEncoderInput));  
   memset(&m_stEOSOutputBfr, 0, sizeof(m_stEOSOutputBfr));  
   memset(&m_stEncodeBuffer, 0, sizeof(m_stEncodeBuffer));  
   
   m_pEncodeBuffer = new uint8_t[BITSTREAM_BUFFER_SIZE];  
   m_nEncodeBufferSize = 0;  
   
   m_pFile = NULL;  
 }  
   
 EncodeThread::~EncodeThread()  
 {  
   stop();  
   if (m_pNvHWEncoder) {  
     delete m_pNvHWEncoder;  
     m_pNvHWEncoder = NULL;  
   }  
   
   if (m_pEncodeBuffer) {  
     delete[] m_pEncodeBuffer;  
     m_pEncodeBuffer = NULL;  
   }  
   
   RTSPServer::destroy();  
   GlobalTimer::destroy();  
 }  
   
 void EncodeThread::start()  
 {  
   m_bRun = true;  
   QThread::start();  
 }  
   
 void EncodeThread::stop()  
 {  
   m_bRun = false;  
   wait();  
 }  
   
 void EncodeThread::run()  
 {  
   emit started();  
   
   bool bInit = false;  
   int count = 0;  
   
   EncodeFrameConfig stEncodeFrame;  
   
   CDXMediaPlayerCtrl *player = new CDXMediaPlayerCtrl(NULL, NULL);  
   player->openCaptureServerSession("stream1", AV_CODEC_ID_H264, AV_CODEC_ID_NONE);  
   player->playCaptureServerSession();  
   
   uint16_t port = 8554;  
   if (player->startServer(port) < 0)  
     qDebug() << "failed to start server, port : " << port;  
   
   uint64_t timestamp = 0;  
   MediaBuffer *pBuffer = NULL;  
   
   while (m_bRun) {  
     EncodeFrame *frame = m_pQueue->pop();  
     if (frame == NULL) {  
       QThread::usleep(1);  
       continue;  
     }  
   
     count = m_pQueue->count();  
     if (count > 0) qDebug() << "decode queue : " << count;  
   
     if (!bInit) {  
       NV_ENC_BUFFER_FORMAT format = NV_ENC_BUFFER_FORMAT_UNDEFINED;  
       if (frame->format == 0) format = NV_ENC_BUFFER_FORMAT_NV12;  
       else format = NV_ENC_BUFFER_FORMAT_YUV444;  
   
       if (openEncoder(format, frame->width, frame->height)) {  
         //m_pFile = fopen("output.264", "wb");  
         bInit = true;  
       } else {  
         closeEncoder();  
       }  
     }  
   
     if (bInit) {  
       memset(&stEncodeFrame, 0, sizeof(stEncodeFrame));  
   
       stEncodeFrame.yuv[0] = frame->yuv[0];  
       stEncodeFrame.yuv[1] = frame->yuv[1];  
       stEncodeFrame.yuv[2] = frame->yuv[2];  
   
       stEncodeFrame.stride[0] = frame->stride[0];  
       stEncodeFrame.stride[1] = frame->stride[1];  
       stEncodeFrame.stride[2] = frame->stride[2];  
   
       stEncodeFrame.width = frame->width;  
       stEncodeFrame.height = frame->height;  
   
       if (encodeFrame(&stEncodeFrame, false, frame->width, frame->height) == NV_ENC_SUCCESS) {  
         if (m_pFile) fwrite(m_pEncodeBuffer, 1, m_nEncodeBufferSize, m_pFile);  
   
         timestamp = GetTimeOfDay();  
         pBuffer = MediaBuffer::createBuffer(VideoMedia, m_pEncodeBuffer, m_nEncodeBufferSize, timestamp, timestamp);  
         if (player->pushCaptureInput(pBuffer) < 0) {  
           qDebug() << "cannot push capture input";  
           delete pBuffer;  
         }  
       }  
     }  
   
     delete frame;  
   }  
   
   if (bInit) encodeFrame(NULL, true, m_stEncodeConfig.width, m_stEncodeConfig.height);  
   
   player->close();  
   player->stopServer();  
   delete player;  
   
   closeEncoder();  
   
   if (m_pFile) {  
     fclose(m_pFile);  
     m_pFile = NULL;  
   }  
   
   emit finished();  
 }  
   
 bool EncodeThread::openEncoder(NV_ENC_BUFFER_FORMAT format, int width, int height)  
 {  
   memset(&m_stEncodeConfig, 0, sizeof(EncodeConfig));  
   
   m_stEncodeConfig.endFrameIdx = INT_MAX;  
   m_stEncodeConfig.bitrate = 5000000;  
   m_stEncodeConfig.rcMode = NV_ENC_PARAMS_RC_CONSTQP;  
   //m_stEncodeConfig.gopLength = NVENC_INFINITE_GOPLENGTH;  
   m_stEncodeConfig.deviceType = NV_ENC_CUDA;  
   m_stEncodeConfig.codec = NV_ENC_H264;  
   //m_stEncodeConfig.fps = 30;  
   m_stEncodeConfig.qp = 28;  
   m_stEncodeConfig.i_quant_factor = DEFAULT_I_QFACTOR;  
   m_stEncodeConfig.b_quant_factor = DEFAULT_B_QFACTOR;  
   m_stEncodeConfig.i_quant_offset = DEFAULT_I_QOFFSET;  
   m_stEncodeConfig.b_quant_offset = DEFAULT_B_QOFFSET;  
   m_stEncodeConfig.presetGUID = NV_ENC_PRESET_DEFAULT_GUID;  
   m_stEncodeConfig.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;  
   m_stEncodeConfig.inputFormat = format;  
   
   m_stEncodeConfig.repeatSPSPPS = 1;  
   m_stEncodeConfig.width = width;  
   m_stEncodeConfig.height = height;  
   m_stEncodeConfig.gopLength = 15;  
   m_stEncodeConfig.fps = 15;  
   //m_stEncodeConfig.encoderPreset = "hq";    
   
   switch (m_stEncodeConfig.deviceType)  
   {  
 #if defined(NV_WINDOWS)  
   case NV_ENC_DX9:  
     InitD3D9(m_stEncodeConfig.deviceID);  
     break;  
   
   case NV_ENC_DX10:  
     InitD3D10(m_stEncodeConfig.deviceID);  
     break;  
   
   case NV_ENC_DX11:  
     InitD3D11(m_stEncodeConfig.deviceID);  
     break;  
 #endif  
   case NV_ENC_CUDA:  
     initCuda(m_stEncodeConfig.deviceID);  
     break;  
   }  
   
   NVENCSTATUS nvStatus = NV_ENC_SUCCESS;  
   
   if (m_stEncodeConfig.deviceType != NV_ENC_CUDA)  
     nvStatus = m_pNvHWEncoder->Initialize(m_pDevice, NV_ENC_DEVICE_TYPE_DIRECTX);  
   else  
     nvStatus = m_pNvHWEncoder->Initialize(m_pDevice, NV_ENC_DEVICE_TYPE_CUDA);  
   
   if (nvStatus != NV_ENC_SUCCESS)  
     return false;  
   
   m_stEncodeConfig.presetGUID = m_pNvHWEncoder->GetPresetGUID(m_stEncodeConfig.encoderPreset, m_stEncodeConfig.codec);  
 #if 0  
   m_stEncodeConfig.fOutput = fopen("output.264", "wb");  
   if (!m_stEncodeConfig.fOutput)  
     qDebug() << "failed to open output file";    
 #endif  
   nvStatus = m_pNvHWEncoder->CreateEncoder(&m_stEncodeConfig);  
   if (nvStatus != NV_ENC_SUCCESS)  
     return false;  
   
   m_stEncodeConfig.maxWidth = m_stEncodeConfig.maxWidth ? m_stEncodeConfig.maxWidth : m_stEncodeConfig.width;  
   m_stEncodeConfig.maxHeight = m_stEncodeConfig.maxHeight ? m_stEncodeConfig.maxHeight : m_stEncodeConfig.height;  
   
   m_stEncoderInput.enableAsyncMode = m_stEncodeConfig.enableAsyncMode;  
   
   if (m_stEncodeConfig.numB > 0)  
   {  
     m_uEncodeBufferCount = m_stEncodeConfig.numB + 4; // min buffers is numb + 1 + 3 pipelining  
   }  
   else  
   {  
     int numMBs = ((m_stEncodeConfig.maxHeight + 15) >> 4) * ((m_stEncodeConfig.maxWidth + 15) >> 4);  
     int NumIOBuffers;  
     if (numMBs >= 32768) //4kx2k  
       NumIOBuffers = MAX_ENCODE_QUEUE / 8;  
     else if (numMBs >= 16384) // 2kx2k  
       NumIOBuffers = MAX_ENCODE_QUEUE / 4;  
     else if (numMBs >= 8160) // 1920x1080  
       NumIOBuffers = MAX_ENCODE_QUEUE / 2;  
     else  
       NumIOBuffers = MAX_ENCODE_QUEUE;  
     m_uEncodeBufferCount = NumIOBuffers;  
   }  
   m_uPicStruct = m_stEncodeConfig.pictureStruct;  
   
   nvStatus = allocateIOBuffers(m_stEncodeConfig.width, m_stEncodeConfig.height, m_stEncodeConfig.inputFormat);  
   if (nvStatus != NV_ENC_SUCCESS)  
     return 1;  
   
   unsigned int preloadedFrameCount = FRAME_QUEUE;  
   if (m_stEncodeConfig.preloadedFrameCount >= 2)  
   {  
     preloadedFrameCount = m_stEncodeConfig.preloadedFrameCount;  
   }  
   
   qDebug() << "encoder " << width << "x" << height << " opened";  
   
 #if 0  
   GUID guids[10];  
   uint32_t count;  
   m_pNvHWEncoder->NvEncGetEncodeGUIDs(guids, 10, &count);  
   
   NV_ENC_BUFFER_FORMAT fmt[20];  
   memset(fmt, 0, sizeof(fmt));  
   m_pNvHWEncoder->NvEncGetInputFormats(guids[0], fmt, 10, &count);  
 #endif  
   
   return true;  
 }  
   
 void EncodeThread::closeEncoder()  
 {  
   if (m_stEncodeConfig.fOutput) {  
     fclose(m_stEncodeConfig.fOutput);  
     m_stEncodeConfig.fOutput = NULL;  
   }  
   deinitialize(m_stEncodeConfig.deviceType);  
 }  
   
 void convertYUVpitchtoNV12( unsigned char *yuv_luma, unsigned char *yuv_cb, unsigned char *yuv_cr,  
               unsigned char *nv12_luma, unsigned char *nv12_chroma,  
               int width, int height , int srcStride, int dstStride)  
 {  
   int y;  
   int x;  
   if (srcStride == 0)  
     srcStride = width;  
   if (dstStride == 0)  
     dstStride = width;  
   
   for ( y = 0 ; y < height ; y++)  
   {  
     memcpy( nv12_luma + (dstStride*y), yuv_luma + (srcStride*y) , width );  
   }  
   
   for ( y = 0 ; y < height/2 ; y++)  
   {  
     for ( x= 0 ; x < width; x=x+2)  
     {  
       nv12_chroma[(y*dstStride) + x] =  yuv_cb[((srcStride/2)*y) + (x >>1)];  
       nv12_chroma[(y*dstStride) +(x+1)] = yuv_cr[((srcStride/2)*y) + (x >>1)];  
     }  
   }  
 }  
   
 void convertYUV10pitchtoP010PL(unsigned short *yuv_luma, unsigned short *yuv_cb, unsigned short *yuv_cr,  
   unsigned short *nv12_luma, unsigned short *nv12_chroma, int width, int height, int srcStride, int dstStride)  
 {  
   int x, y;  
   
   for (y = 0; y < height; y++)  
   {  
     for (x = 0; x < width; x++)  
     {  
       nv12_luma[(y*dstStride / 2) + x] = yuv_luma[(srcStride*y) + x] << 6;  
     }  
   }  
   
   for (y = 0; y < height / 2; y++)  
   {  
     for (x = 0; x < width; x = x + 2)  
     {  
       nv12_chroma[(y*dstStride / 2) + x] = yuv_cb[((srcStride / 2)*y) + (x >> 1)] << 6;  
       nv12_chroma[(y*dstStride / 2) + (x + 1)] = yuv_cr[((srcStride / 2)*y) + (x >> 1)] << 6;  
     }  
   }  
 }  
   
 void convertYUVpitchtoYUV444(unsigned char *yuv_luma, unsigned char *yuv_cb, unsigned char *yuv_cr,  
   unsigned char *surf_luma, unsigned char *surf_cb, unsigned char *surf_cr, int width, int height, int srcStride, int dstStride)  
 {  
   int h;  
   
   for (h = 0; h < height; h++)  
   {  
     memcpy(surf_luma + dstStride * h, yuv_luma + srcStride * h, width);  
     memcpy(surf_cb + dstStride * h, yuv_cb + srcStride * h, width);  
     memcpy(surf_cr + dstStride * h, yuv_cr + srcStride * h, width);  
   }  
 }  
   
 void convertYUV10pitchtoYUV444(unsigned short *yuv_luma, unsigned short *yuv_cb, unsigned short *yuv_cr,  
   unsigned short *surf_luma, unsigned short *surf_cb, unsigned short *surf_cr,  
   int width, int height, int srcStride, int dstStride)  
 {  
   int x, y;  
   
   for (y = 0; y < height; y++)  
   {  
     for (x = 0; x < width; x++)  
     {  
       surf_luma[(y*dstStride / 2) + x] = yuv_luma[(srcStride*y) + x] << 6;  
       surf_cb[(y*dstStride / 2) + x] = yuv_cb[(srcStride*y) + x] << 6;  
       surf_cr[(y*dstStride / 2) + x] = yuv_cr[(srcStride*y) + x] << 6;  
     }  
   }  
 }  
   
 NVENCSTATUS EncodeThread::encodeFrame(EncodeFrameConfig *pEncodeFrame, bool bFlush, uint32_t width, uint32_t height)  
 {  
   NVENCSTATUS nvStatus = NV_ENC_SUCCESS;  
   uint32_t lockedPitch = 0;  
   EncodeBuffer *pEncodeBuffer = NULL;  
   
   if (bFlush)  
   {  
     flushEncoder();  
     return NV_ENC_SUCCESS;  
   }  
   
   if (!pEncodeFrame)  
   {  
     return NV_ENC_ERR_INVALID_PARAM;  
   }  
   
   pEncodeBuffer = m_EncodeBufferQueue.GetAvailable();  
   if(!pEncodeBuffer)  
   {  
     m_pNvHWEncoder->ProcessOutput(m_EncodeBufferQueue.GetPending());  
     pEncodeBuffer = m_EncodeBufferQueue.GetAvailable();  
   }  
   
   unsigned char *pInputSurface;  
   
   nvStatus = m_pNvHWEncoder->NvEncLockInputBuffer(pEncodeBuffer->stInputBfr.hInputSurface, (void**)&pInputSurface, &lockedPitch);  
   if (nvStatus != NV_ENC_SUCCESS)  
     return nvStatus;  
   
   if (pEncodeBuffer->stInputBfr.bufferFmt == NV_ENC_BUFFER_FORMAT_NV12_PL)  
   {  
     unsigned char *pInputSurfaceCh = pInputSurface + (pEncodeBuffer->stInputBfr.dwHeight*lockedPitch);  
     convertYUVpitchtoNV12(pEncodeFrame->yuv[0], pEncodeFrame->yuv[1], pEncodeFrame->yuv[2], pInputSurface, pInputSurfaceCh, width, height, width, lockedPitch);  
   }  
   else if (pEncodeBuffer->stInputBfr.bufferFmt == NV_ENC_BUFFER_FORMAT_YUV444)  
   {  
     unsigned char *pInputSurfaceCb = pInputSurface + (pEncodeBuffer->stInputBfr.dwHeight * lockedPitch);  
     unsigned char *pInputSurfaceCr = pInputSurfaceCb + (pEncodeBuffer->stInputBfr.dwHeight * lockedPitch);  
     convertYUVpitchtoYUV444(pEncodeFrame->yuv[0], pEncodeFrame->yuv[1], pEncodeFrame->yuv[2], pInputSurface, pInputSurfaceCb, pInputSurfaceCr, width, height, width, lockedPitch);  
   }  
   else if (pEncodeBuffer->stInputBfr.bufferFmt == NV_ENC_BUFFER_FORMAT_YUV420_10BIT)  
   {  
     unsigned char *pInputSurfaceCh = pInputSurface + (pEncodeBuffer->stInputBfr.dwHeight*lockedPitch);  
     convertYUV10pitchtoP010PL((uint16_t *)pEncodeFrame->yuv[0], (uint16_t *)pEncodeFrame->yuv[1], (uint16_t *)pEncodeFrame->yuv[2], (uint16_t *)pInputSurface, (uint16_t *)pInputSurfaceCh, width, height, width, lockedPitch);  
   }  
   else //if (pEncodeBuffer->stInputBfr.bufferFmt == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)  
   {  
     unsigned char *pInputSurfaceCb = pInputSurface + (pEncodeBuffer->stInputBfr.dwHeight * lockedPitch);  
     unsigned char *pInputSurfaceCr = pInputSurfaceCb + (pEncodeBuffer->stInputBfr.dwHeight * lockedPitch);  
     convertYUV10pitchtoYUV444((uint16_t *)pEncodeFrame->yuv[0], (uint16_t *)pEncodeFrame->yuv[1], (uint16_t *)pEncodeFrame->yuv[2], (uint16_t *)pInputSurface, (uint16_t *)pInputSurfaceCb, (uint16_t *)pInputSurfaceCr, width, height, width, lockedPitch);  
   }  
   nvStatus = m_pNvHWEncoder->NvEncUnlockInputBuffer(pEncodeBuffer->stInputBfr.hInputSurface);  
   if (nvStatus != NV_ENC_SUCCESS)  
     return nvStatus;  
   
   nvStatus = m_pNvHWEncoder->NvEncEncodeFrame(pEncodeBuffer, NULL, width, height, (NV_ENC_PIC_STRUCT)m_uPicStruct);  
   if (nvStatus == NV_ENC_SUCCESS) {  
     NV_ENC_LOCK_BITSTREAM lockBitstreamData;  
   
     memset(&lockBitstreamData, 0, sizeof(lockBitstreamData));  
     SET_VER(lockBitstreamData, NV_ENC_LOCK_BITSTREAM);  
     lockBitstreamData.outputBitstream = pEncodeBuffer->stOutputBfr.hBitstreamBuffer;  
     lockBitstreamData.doNotWait = false;  
   
     if (m_pNvHWEncoder->NvEncLockBitstream(&lockBitstreamData) == NV_ENC_SUCCESS) {        
       memcpy(m_pEncodeBuffer, lockBitstreamData.bitstreamBufferPtr, lockBitstreamData.bitstreamSizeInBytes);  
       m_nEncodeBufferSize = lockBitstreamData.bitstreamSizeInBytes;  
       m_pNvHWEncoder->NvEncUnlockBitstream(pEncodeBuffer->stOutputBfr.hBitstreamBuffer);  
     }  
   }  
   
   return nvStatus;  
 }  
   
 NVENCSTATUS EncodeThread::initCuda(uint32_t deviceID)  
 {  
   CUresult cuResult;  
   CUdevice device;  
   CUcontext cuContextCurr;  
   int deviceCount = 0;  
   int SMminor = 0, SMmajor = 0;  
   
 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)  
   typedef HMODULE CUDADRIVER;  
 #else  
   typedef void *CUDADRIVER;  
 #endif  
   CUDADRIVER hHandleDriver = 0;  
   cuResult = cuInit(0, __CUDA_API_VERSION, hHandleDriver);  
   if (cuResult != CUDA_SUCCESS)  
   {  
     PRINTERR("cuInit error:0x%x\n", cuResult);  
     assert(0);  
     return NV_ENC_ERR_NO_ENCODE_DEVICE;  
   }  
   
   cuResult = cuDeviceGetCount(&deviceCount);  
   if (cuResult != CUDA_SUCCESS)  
   {  
     PRINTERR("cuDeviceGetCount error:0x%x\n", cuResult);  
     assert(0);  
     return NV_ENC_ERR_NO_ENCODE_DEVICE;  
   }  
   
   // If dev is negative value, we clamp to 0  
   if ((int)deviceID < 0)  
     deviceID = 0;  
   
   if (deviceID >(unsigned int)deviceCount - 1)  
   {  
     PRINTERR("Invalid Device Id = %d\n", deviceID);  
     return NV_ENC_ERR_INVALID_ENCODERDEVICE;  
   }  
   
   cuResult = cuDeviceGet(&device, deviceID);  
   if (cuResult != CUDA_SUCCESS)  
   {  
     PRINTERR("cuDeviceGet error:0x%x\n", cuResult);  
     return NV_ENC_ERR_NO_ENCODE_DEVICE;  
   }  
   
   cuResult = cuDeviceComputeCapability(&SMmajor, &SMminor, deviceID);  
   if (cuResult != CUDA_SUCCESS)  
   {  
     PRINTERR("cuDeviceComputeCapability error:0x%x\n", cuResult);  
     return NV_ENC_ERR_NO_ENCODE_DEVICE;  
   }  
   
   if (((SMmajor << 4) + SMminor) < 0x30)  
   {  
     PRINTERR("GPU %d does not have NVENC capabilities exiting\n", deviceID);  
     return NV_ENC_ERR_NO_ENCODE_DEVICE;  
   }  
   
   cuResult = cuCtxCreate((CUcontext*)(&m_pDevice), 0, device);  
   if (cuResult != CUDA_SUCCESS)  
   {  
     PRINTERR("cuCtxCreate error:0x%x\n", cuResult);  
     assert(0);  
     return NV_ENC_ERR_NO_ENCODE_DEVICE;  
   }  
   
   cuResult = cuCtxPopCurrent(&cuContextCurr);  
   if (cuResult != CUDA_SUCCESS)  
   {  
     PRINTERR("cuCtxPopCurrent error:0x%x\n", cuResult);  
     assert(0);  
     return NV_ENC_ERR_NO_ENCODE_DEVICE;  
   }  
   return NV_ENC_SUCCESS;  
 }  
   
 NVENCSTATUS EncodeThread::allocateIOBuffers(uint32_t uInputWidth, uint32_t uInputHeight, NV_ENC_BUFFER_FORMAT inputFormat)  
 {  
   NVENCSTATUS nvStatus = NV_ENC_SUCCESS;  
   
   m_EncodeBufferQueue.Initialize(m_stEncodeBuffer, m_uEncodeBufferCount);  
   for (uint32_t i = 0; i < m_uEncodeBufferCount; i++)  
   {  
     nvStatus = m_pNvHWEncoder->NvEncCreateInputBuffer(uInputWidth, uInputHeight, &m_stEncodeBuffer[i].stInputBfr.hInputSurface, inputFormat);  
     if (nvStatus != NV_ENC_SUCCESS)  
       return nvStatus;  
   
     m_stEncodeBuffer[i].stInputBfr.bufferFmt = inputFormat;  
     m_stEncodeBuffer[i].stInputBfr.dwWidth = uInputWidth;  
     m_stEncodeBuffer[i].stInputBfr.dwHeight = uInputHeight;  
     nvStatus = m_pNvHWEncoder->NvEncCreateBitstreamBuffer(BITSTREAM_BUFFER_SIZE, &m_stEncodeBuffer[i].stOutputBfr.hBitstreamBuffer);  
     if (nvStatus != NV_ENC_SUCCESS)  
       return nvStatus;  
      m_stEncodeBuffer[i].stOutputBfr.dwBitstreamBufferSize = BITSTREAM_BUFFER_SIZE;  
     if (m_stEncoderInput.enableAsyncMode)  
     {  
       nvStatus = m_pNvHWEncoder->NvEncRegisterAsyncEvent(&m_stEncodeBuffer[i].stOutputBfr.hOutputEvent);  
       if (nvStatus != NV_ENC_SUCCESS)  
         return nvStatus;  
       m_stEncodeBuffer[i].stOutputBfr.bWaitOnEvent = true;  
     }  
     else  
       m_stEncodeBuffer[i].stOutputBfr.hOutputEvent = NULL;  
   }  
   
   m_stEOSOutputBfr.bEOSFlag = TRUE;  
   
   if (m_stEncoderInput.enableAsyncMode)  
   {  
     nvStatus = m_pNvHWEncoder->NvEncRegisterAsyncEvent(&m_stEOSOutputBfr.hOutputEvent);  
     if (nvStatus != NV_ENC_SUCCESS)  
       return nvStatus;  
   }  
   else  
     m_stEOSOutputBfr.hOutputEvent = NULL;  
   
   return NV_ENC_SUCCESS;  
 }  
   
 NVENCSTATUS EncodeThread::releaseIOBuffers()  
 {  
   for (uint32_t i = 0; i < m_uEncodeBufferCount; i++)  
   {  
     m_pNvHWEncoder->NvEncDestroyInputBuffer(m_stEncodeBuffer[i].stInputBfr.hInputSurface);  
     m_stEncodeBuffer[i].stInputBfr.hInputSurface = NULL;  
     m_pNvHWEncoder->NvEncDestroyBitstreamBuffer(m_stEncodeBuffer[i].stOutputBfr.hBitstreamBuffer);  
     m_stEncodeBuffer[i].stOutputBfr.hBitstreamBuffer = NULL;  
     if (m_stEncoderInput.enableAsyncMode)  
     {  
       m_pNvHWEncoder->NvEncUnregisterAsyncEvent(m_stEncodeBuffer[i].stOutputBfr.hOutputEvent);  
       nvCloseFile(m_stEncodeBuffer[i].stOutputBfr.hOutputEvent);  
       m_stEncodeBuffer[i].stOutputBfr.hOutputEvent = NULL;  
     }  
   }  
   
   if (m_stEOSOutputBfr.hOutputEvent)  
   {  
     if (m_stEncoderInput.enableAsyncMode)  
     {  
       m_pNvHWEncoder->NvEncUnregisterAsyncEvent(m_stEOSOutputBfr.hOutputEvent);  
       nvCloseFile(m_stEOSOutputBfr.hOutputEvent);  
       m_stEOSOutputBfr.hOutputEvent = NULL;  
     }  
   }  
   
   return NV_ENC_SUCCESS;  
 }  
   
 NVENCSTATUS EncodeThread::flushEncoder()  
 {  
   NVENCSTATUS nvStatus = m_pNvHWEncoder->NvEncFlushEncoderQueue(m_stEOSOutputBfr.hOutputEvent);  
   if (nvStatus != NV_ENC_SUCCESS)  
   {  
     assert(0);  
     return nvStatus;  
   }  
   
   EncodeBuffer *pEncodeBufer = m_EncodeBufferQueue.GetPending();  
   while (pEncodeBufer)  
   {  
     m_pNvHWEncoder->ProcessOutput(pEncodeBufer);  
     pEncodeBufer = m_EncodeBufferQueue.GetPending();  
   }  
   
 #if defined(NV_WINDOWS)  
   if (m_stEncoderInput.enableAsyncMode)  
   {  
   
     if (WaitForSingleObject(m_stEOSOutputBfr.hOutputEvent, 500) != WAIT_OBJECT_0)  
     {  
       assert(0);  
       nvStatus = NV_ENC_ERR_GENERIC;  
     }  
   }  
 #endif  
   
   return nvStatus;  
 }  
   
 NVENCSTATUS EncodeThread::deinitialize(uint32_t devicetype)  
 {  
   NVENCSTATUS nvStatus = NV_ENC_SUCCESS;  
   
   releaseIOBuffers();  
   
   nvStatus = m_pNvHWEncoder->NvEncDestroyEncoder();  
   
   if (m_pDevice)  
   {  
     switch (devicetype)  
     {  
 #if defined(NV_WINDOWS)  
     case NV_ENC_DX9:  
       ((IDirect3DDevice9*)(m_pDevice))->Release();  
       break;  
   
     case NV_ENC_DX10:  
       ((ID3D10Device*)(m_pDevice))->Release();  
       break;  
   
     case NV_ENC_DX11:  
       ((ID3D11Device*)(m_pDevice))->Release();  
       break;  
 #endif  
   
     case NV_ENC_CUDA:  
       CUresult cuResult = CUDA_SUCCESS;  
       cuResult = cuCtxDestroy((CUcontext)m_pDevice);  
       if (cuResult != CUDA_SUCCESS)  
         PRINTERR("cuCtxDestroy error:0x%x\n", cuResult);  
     }  
   
     m_pDevice = NULL;  
   }  
   
 #if defined (NV_WINDOWS)  
   if (m_pD3D)  
   {  
     m_pD3D->Release();  
     m_pD3D = NULL;  
   }  
 #endif  
   
   return nvStatus;  
 }  
   

댓글 4개:

  1. 글 잘보고 있습니다. 소스코드 외에 ffmpeg.exe의 옵션으로도 cuda가 사용 가능한가요?
    빌드할 때 cuda와 nvenc가 따로 있다는 것을 여기서 알았는데요 두가지 방식이 어떤식으로 차이가 있는지 궁금합니다.

    답글삭제
    답글
    1. ffmpeg.exe 옵션에 nvenc 사용옵션이 있습니다. 그리고 cuda는 일반용어이고 nvenc는 nvidia가 개발한 cuda를 이용한 비디오 인코더를 의미합니다.

      삭제
  2. 안녕하세요?
    저는 nvdia transcoder를 이용하여 현재 연구를하고있는 대학원생인데 혹시 deltaQp array를 이용하여 ROI 영역의 qp를 바꾸려고하는데 혹시 질문을 해도 될까요?

    oper5320@naver.com

    답글삭제