doubango/plugins/pluginCUDA/plugin_cuda_utils.cxx

155 lines
4.6 KiB
C++
Executable File

/* Copyright (C) 2013 Mamadou DIOP
* Copyright (C) 2013 Doubango Telecom <http://www.doubango.org>
*
* This file is part of Open Source Doubango Framework.
*
* DOUBANGO is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* DOUBANGO is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with DOUBANGO.
*/
#include "plugin_cuda_utils.h"
#include "tsk_debug.h"
#include <NVEncoderAPI.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
bool CudaUtils::g_bStarted = false;
bool CudaUtils::g_bH264Checked = false;
bool CudaUtils::g_bH264Supported = false;
int CudaUtils::g_nCores = 0;
HRESULT CudaUtils::Startup()
{
if(!g_bStarted) {
CUresult cuResult = CUDA_SUCCESS;
HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
if(SUCCEEDED(hr) || hr == 0x80010106) { // 0x80010106 when called from managed code (e.g. Boghe) - More info: http://support.microsoft.com/kb/824480
if((cuResult = cuInit(0)) != CUDA_SUCCESS) {
TSK_DEBUG_ERROR("cuInit() failed with error code = %08x", cuResult);
hr = E_FAIL;
}
else {
hr = S_OK;
}
}
g_bStarted = true;
return hr;
}
return S_OK;
}
HRESULT CudaUtils::Shutdown()
{
// cuDeinit();
return S_OK;
}
bool CudaUtils::IsH264Supported()
{
if(g_bH264Checked) {
return g_bH264Supported;
}
HRESULT hr = S_OK;
CHECK_HR(hr = Startup());
g_bH264Checked = true;
NVEncoder pEncoder = NULL;
CHECK_HR(hr = NVGetHWEncodeCaps());
CHECK_HR(hr = NVCreateEncoder(&pEncoder));
// Both Base and Main profiles *must* be supported
CHECK_HR(hr = NVIsSupportedCodecProfile(pEncoder, NV_CODEC_TYPE_H264, NVVE_H264_PROFILE_BASELINE));
CHECK_HR(hr = NVIsSupportedCodecProfile(pEncoder, NV_CODEC_TYPE_H264, NVVE_H264_PROFILE_MAIN));
g_bH264Supported = true;
bail:
if(pEncoder) {
NVDestroyEncoder(pEncoder);
pEncoder = NULL;
}
return g_bH264Supported;
}
int CudaUtils::ConvertSMVer2Cores(int nMajor, int nMinor)
{
if(g_nCores != 0) {
return g_nCores;
}
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
typedef struct {
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
int Cores;
} sSMtoCores;
sSMtoCores nGpuArchCoresPerSM[] = {
{ 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class
{ 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class
{ 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class
{ 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
{ 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
{ 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
};
int index = 0;
while (nGpuArchCoresPerSM[index].SM != -1) {
if (nGpuArchCoresPerSM[index].SM == ((nMajor << 4) + nMinor)) {
g_nCores = nGpuArchCoresPerSM[index].Cores;
break;
}
index++;
}
// If we don't find the values, we default use the previous one to run properly
TSK_DEBUG_INFO("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM", nMajor, nMinor, nGpuArchCoresPerSM[7].Cores);
g_nCores = nGpuArchCoresPerSM[7].Cores;
return g_nCores;
}
int CudaUtils::GetMaxGflopsDeviceId()
{
int device_count = 0;
cudaGetDeviceCount( &device_count );
cudaDeviceProp device_properties;
int max_gflops_device = 0;
int max_gflops = 0;
int current_device = 0;
cudaGetDeviceProperties( &device_properties, current_device );
max_gflops = device_properties.multiProcessorCount * device_properties.clockRate;
++current_device;
while( current_device < device_count ) {
cudaGetDeviceProperties( &device_properties, current_device );
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if( gflops > max_gflops ) {
max_gflops = gflops;
max_gflops_device = current_device;
}
++current_device;
}
return max_gflops_device;
}