Skip to content

Commit

Permalink
add comments
Browse files Browse the repository at this point in the history
  • Loading branch information
yukirora committed Aug 10, 2023
1 parent e2957ac commit 09f0e93
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ std::map<std::string, cudaVideoCodec_enum> codecMap = {
{"vp8", cudaVideoCodec_VP8}, {"vp9", cudaVideoCodec_VP9}, {"av1", cudaVideoCodec_AV1}};

/**
* @brief Function to decode media file using OptimizedNvDecoder interface
* @brief Function to decode video file using OptimizedNvDecoder interface
* @param pDec - Handle to OptimizedNvDecoder
* @param demuxer - Pointer to an FFmpegDemuxer instance
* @param pnFrame - Variable to record the number of frames decoded
Expand All @@ -41,13 +41,13 @@ void DecProc(OptimizedNvDecoder *pDec, const char *szInFilePath, int *pnFrame, s
std::unique_ptr<FFmpegDemuxer> demuxer(new FFmpegDemuxer(szInFilePath));
int nVideoBytes = 0, nFrameReturned = 0, nFrame = 0;
uint8_t *pVideo = NULL, *pFrame = NULL;

do {
// Demux video from file using FFmpegDemuxer
demuxer->Demux(&pVideo, &nVideoBytes);
// Decode the video frame from demuxed packet
nFrameReturned = pDec->Decode(pVideo, nVideoBytes);
if (!nFrame && nFrameReturned)
LOG(INFO) << pDec->GetVideoInfo();

nFrame += nFrameReturned;
} while (nVideoBytes);
*pnFrame = nFrame;
Expand All @@ -56,6 +56,9 @@ void DecProc(OptimizedNvDecoder *pDec, const char *szInFilePath, int *pnFrame, s
}
}

/**
* @brief Function to show help message and exit
*/
void ShowHelpAndExit(const char *szBadOption = NULL) {
std::ostringstream oss;
bool bThrowError = false;
Expand All @@ -64,19 +67,15 @@ void ShowHelpAndExit(const char *szBadOption = NULL) {
oss << "Error parsing \"" << szBadOption << "\"" << std::endl;
}
oss << "Options:" << std::endl
<< "-i Input file path" << std::endl
<< "-i Input single video file path" << std::endl
<< "-o Output file path" << std::endl
<< "-gpu Ordinal of GPU to use" << std::endl
<< "-thread Number of decoding thread" << std::endl
<< "-total Number of total video to test" << std::endl
<< "-single (No value) Use single context (this may result in suboptimal performance; default is multiple "
"contexts)"
<< std::endl
<< "-host (No value) Copy frame to host memory (this may result in suboptimal performance; default is "
"device memory)"
<< std::endl
<< "-multi_input Multiple Input file list path" << std::endl
<< "-codec The codecc of video to test" << std::endl;
<< "-single (No value) Use single context (default is multi-context, one context per thread)" << std::endl
<< "-host (No value) Copy frame to host memory (default is device memory)" << std::endl
<< "-multi_input The file path which lists the path of multiple video in each line" << std::endl
<< "-codec The codec of video to test" << std::endl;
if (bThrowError) {
throw std::invalid_argument(oss.str());
} else {
Expand All @@ -85,6 +84,9 @@ void ShowHelpAndExit(const char *szBadOption = NULL) {
}
}

/**
* @brief Function to parse commandline arguments
*/
void ParseCommandLine(int argc, char *argv[], char *szInputFileName, int &iGpu, int &nThread, int &nTotalVideo,
bool &bSingle, bool &bHost, std::string &inputFilesListPath, std::string &outputFile,
cudaVideoCodec &codec) {
Expand Down Expand Up @@ -161,6 +163,9 @@ void ParseCommandLine(int argc, char *argv[], char *szInputFileName, int &iGpu,
}
}

/**
* @brief Function to create cuda context and initialize decoder
*/
OptimizedNvDecoder *InitOptimizedNvDecoder(int i, const CUdevice &cuDevice, CUcontext &cuContext, bool bSingle,
bool bHost, cudaVideoCodec codec, CUVIDDECODECAPS decodecaps) {
if (!bSingle) {
Expand All @@ -171,10 +176,12 @@ OptimizedNvDecoder *InitOptimizedNvDecoder(int i, const CUdevice &cuDevice, CUco
return sessionObject;
}

/**
* @brief Function to convert time_point to human-readable format
*/
std::string GetTime(const std::chrono::_V2::system_clock::time_point &now) {
// Convert the time_point to a time_t
auto now_time_t = std::chrono::system_clock::to_time_t(now);

// Convert the time_t to a human-readable format
std::tm *now_tm = std::localtime(&now_time_t);
char time_cstr[100];
Expand All @@ -183,9 +190,12 @@ std::string GetTime(const std::chrono::_V2::system_clock::time_point &now) {
return time_str;
}

double DecodeVideo(size_t i, const std::vector<OptimizedNvDecoder *> &vDec, const char *szInFilePath, int *pnFrame,
/**
* @brief Function to decode a video in a thread and measure the latency
*/
double DecodeVideo(size_t thread, const std::vector<OptimizedNvDecoder *> &vDec, const char *szInFilePath, int *pnFrame,
std::exception_ptr &ex) {
OptimizedNvDecoder *pDec = vDec[i];
OptimizedNvDecoder *pDec = vDec[thread];
auto start = std::chrono::high_resolution_clock::now();
DecProc(pDec, szInFilePath, pnFrame, ex);
auto end = std::chrono::high_resolution_clock::now();
Expand All @@ -195,6 +205,9 @@ double DecodeVideo(size_t i, const std::vector<OptimizedNvDecoder *> &vDec, cons
return elapsedTime / 1000.0f;
}

/**
* @brief Function to read the video paths from a file
*/
std::vector<std::string> ReadMultipleVideoFiles(std::string filepath) {
std::ifstream file(filepath);
if (!file) {
Expand All @@ -210,6 +223,9 @@ std::vector<std::string> ReadMultipleVideoFiles(std::string filepath) {
return tokens;
}

/**
* @brief Function to get the decoder capability
*/
void GetDefaultDecoderCaps(CUVIDDECODECAPS &decodecaps, cudaVideoCodec codec) {
memset(&decodecaps, 0, sizeof(decodecaps));
decodecaps.eCodecType = codec;
Expand All @@ -218,6 +234,10 @@ void GetDefaultDecoderCaps(CUVIDDECODECAPS &decodecaps, cudaVideoCodec codec) {
NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
}

/**
* @brief Function to initialize the cuda device, cuda context, query the decoder capability and create decoder for
* each thread
*/
void InitializeContext(std::vector<OptimizedNvDecoder *> &vDec, int iGpu, int nThread, bool bSingle, bool bHost,
cudaVideoCodec codec) {
ck(cuInit(0));
Expand Down Expand Up @@ -250,6 +270,9 @@ void InitializeContext(std::vector<OptimizedNvDecoder *> &vDec, int iGpu, int nT
}
}

/**
* @brief Function to write the latency and FPS data of each video to a file
*/
void WriteRawData(const std::vector<double> &data, std::vector<int> &frames, std::string filename) {
// Open the output file stream
std::ofstream outputFile(filename);
Expand All @@ -265,6 +288,9 @@ void WriteRawData(const std::vector<double> &data, std::vector<int> &frames, std
outputFile.close();
}

/**
* @brief Function to calculate the statistical latency metrics
*/
std::tuple<double, double, double, double, double, double, double, double>
CalLatencyMetrics(const std::vector<double> &originData) {
std::vector<double> data = originData;
Expand All @@ -280,6 +306,11 @@ CalLatencyMetrics(const std::vector<double> &originData) {
return std::make_tuple(sum, mean, min, max, p50, p90, p95, p99);
}

/**
* @brief Function to generate the total file list for the given total number of videos.
* If the number of videos is less than the total number of videos, the list will be repeated.
* If the number of videos is greater than the total number of videos, the list will be truncated.
*/
std::vector<std::string> GenerateTotalFileList(std::string inputFilesListPath, int nTotalVideo,
const char *szInFilePath) {
std::vector<std::string> files;
Expand Down Expand Up @@ -309,6 +340,10 @@ std::vector<std::string> GenerateTotalFileList(std::string inputFilesListPath, i
return files;
}

/**
* @brief Function to run the decoding tasks in parallel with thread pool to decode all the videos and record the total
* latency and the total number of frames
*/
float run(std::vector<OptimizedNvDecoder *> &vDec, int nThread, std::vector<std::string> &files,
std::vector<int> &vnFrame, std::vector<std::exception_ptr> &vExceptionPtrs, int *nTotalFrames,
std::vector<double> &vnLatency) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "NvDecoder/NvDecoder.h"

// This class is derived from NvDecoder class and is used to optimize the cuvidGetDecoderCaps overhead
class OptimizedNvDecoder : public NvDecoder {

public:
Expand All @@ -11,6 +12,8 @@ class OptimizedNvDecoder : public NvDecoder {
* @brief This function is used to initialize the decoder session.
* Application must call this function to initialize the decoder, before
* starting to decode any frames.
* The only difference from the original function is to add a new member m_decodecaps.
* Other part is the same as the original function, refer to NvDecoder.cpp in NVIDIA Video Codec SDK.
*/
OptimizedNvDecoder(CUcontext &cuContext, bool bUseDeviceFrame, cudaVideoCodec eCodec, CUVIDDECODECAPS decodecaps,
bool bLowLatency = false, bool bDeviceFramePitched = false, const Rect *pCropRect = NULL,
Expand All @@ -25,7 +28,9 @@ class OptimizedNvDecoder : public NvDecoder {
return ((OptimizedNvDecoder *)pUserData)->HandleVideoSequence(pVideoFormat);
}
/**
* @brief Define the new handler when decoding of sequence starts
* @brief Define the new handler when decoding of sequence starts.
* The only change is to re-query decoder caps when the video codec or format change
* Other part is the same as the original function, refer to NvDecoder.cpp in NVIDIA Video Codec SDK.
*/
int HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,21 @@
#include <thread>
#include <vector>

// ThreadPool is a simple thread pool implementation that supports enqueueing the task with the index of thread to use
// and custom arguments like task(thread_index, *args).
class ThreadPool {
public:
/**
* @brief Construct a new ThreadPool object with the given number of threads.
*/
ThreadPool(size_t numThreads) {
for (size_t i = 0; i < numThreads; ++i) {
threads.emplace_back(&ThreadPool::worker, this, i);
}
}

/**
* @brief Destroy the ThreadPool object and join all threads.
*/
~ThreadPool() {
{
std::unique_lock<std::mutex> lock(mutex);
Expand All @@ -28,7 +35,10 @@ class ThreadPool {
thread.join();
}
}

/**
* @brief TaskWrapper is a wrapper of the task with the index of thread to use and custom arguments like
* task(thread_index, *args).
*/
template <typename R, typename F, typename... Args> struct TaskWrapper {
std::shared_ptr<std::packaged_task<R(size_t)>> task;

Expand All @@ -39,7 +49,9 @@ class ThreadPool {

void operator()(size_t threadIdx) { (*task)(threadIdx); }
};

/**
* @brief Enqueue enqueues the task with custom arguments and return the results of task when finished.
*/
template <typename F, typename... Args>
auto enqueue(F &&f, Args &&...args) -> std::future<typename std::result_of<F(size_t, Args...)>::type> {
using ReturnType = typename std::result_of<F(size_t, Args...)>::type;
Expand All @@ -57,6 +69,9 @@ class ThreadPool {
}

private:
/**
* @brief The worker function that dequeues the task and executes it for each thread index.
*/
void worker(size_t threadIdx) {
while (true) {
std::function<void(size_t)> task;
Expand Down

0 comments on commit 09f0e93

Please sign in to comment.