155 lines
4.6 KiB
C++
Executable File
155 lines
4.6 KiB
C++
Executable File
/* Copyright (C) 2013 Mamadou DIOP
|
|
* Copyright (C) 2013 Doubango Telecom <http://www.doubango.org>
|
|
*
|
|
* This file is part of Open Source Doubango Framework.
|
|
*
|
|
* DOUBANGO is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* DOUBANGO is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with DOUBANGO.
|
|
*/
|
|
#include "plugin_cuda_utils.h"
|
|
|
|
#include "tsk_debug.h"
|
|
|
|
#include <NVEncoderAPI.h>
|
|
#include <cuda.h>
|
|
#include <cuda_runtime_api.h>
|
|
|
|
bool CudaUtils::g_bStarted = false;
|
|
bool CudaUtils::g_bH264Checked = false;
|
|
bool CudaUtils::g_bH264Supported = false;
|
|
int CudaUtils::g_nCores = 0;
|
|
|
|
HRESULT CudaUtils::Startup()
|
|
{
|
|
if(!g_bStarted) {
|
|
CUresult cuResult = CUDA_SUCCESS;
|
|
HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
|
|
if(SUCCEEDED(hr) || hr == 0x80010106) { // 0x80010106 when called from managed code (e.g. Boghe) - More info: http://support.microsoft.com/kb/824480
|
|
if((cuResult = cuInit(0)) != CUDA_SUCCESS) {
|
|
TSK_DEBUG_ERROR("cuInit() failed with error code = %08x", cuResult);
|
|
hr = E_FAIL;
|
|
}
|
|
else {
|
|
hr = S_OK;
|
|
}
|
|
}
|
|
g_bStarted = true;
|
|
return hr;
|
|
}
|
|
return S_OK;
|
|
}
|
|
|
|
HRESULT CudaUtils::Shutdown()
|
|
{
|
|
// cuDeinit();
|
|
return S_OK;
|
|
}
|
|
|
|
bool CudaUtils::IsH264Supported()
|
|
{
|
|
if(g_bH264Checked) {
|
|
return g_bH264Supported;
|
|
}
|
|
|
|
HRESULT hr = S_OK;
|
|
|
|
CHECK_HR(hr = Startup());
|
|
|
|
g_bH264Checked = true;
|
|
|
|
NVEncoder pEncoder = NULL;
|
|
|
|
CHECK_HR(hr = NVGetHWEncodeCaps());
|
|
CHECK_HR(hr = NVCreateEncoder(&pEncoder));
|
|
// Both Base and Main profiles *must* be supported
|
|
CHECK_HR(hr = NVIsSupportedCodecProfile(pEncoder, NV_CODEC_TYPE_H264, NVVE_H264_PROFILE_BASELINE));
|
|
CHECK_HR(hr = NVIsSupportedCodecProfile(pEncoder, NV_CODEC_TYPE_H264, NVVE_H264_PROFILE_MAIN));
|
|
|
|
g_bH264Supported = true;
|
|
|
|
bail:
|
|
if(pEncoder) {
|
|
NVDestroyEncoder(pEncoder);
|
|
pEncoder = NULL;
|
|
}
|
|
|
|
return g_bH264Supported;
|
|
}
|
|
|
|
int CudaUtils::ConvertSMVer2Cores(int nMajor, int nMinor)
|
|
{
|
|
if(g_nCores != 0) {
|
|
return g_nCores;
|
|
}
|
|
|
|
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
|
|
typedef struct {
|
|
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
|
|
int Cores;
|
|
} sSMtoCores;
|
|
|
|
sSMtoCores nGpuArchCoresPerSM[] = {
|
|
{ 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class
|
|
{ 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class
|
|
{ 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class
|
|
{ 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class
|
|
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
|
|
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
|
|
{ 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
|
|
{ 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
|
|
};
|
|
|
|
int index = 0;
|
|
|
|
while (nGpuArchCoresPerSM[index].SM != -1) {
|
|
if (nGpuArchCoresPerSM[index].SM == ((nMajor << 4) + nMinor)) {
|
|
g_nCores = nGpuArchCoresPerSM[index].Cores;
|
|
break;
|
|
}
|
|
|
|
index++;
|
|
}
|
|
|
|
// If we don't find the values, we default use the previous one to run properly
|
|
TSK_DEBUG_INFO("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM", nMajor, nMinor, nGpuArchCoresPerSM[7].Cores);
|
|
g_nCores = nGpuArchCoresPerSM[7].Cores;
|
|
|
|
return g_nCores;
|
|
}
|
|
|
|
int CudaUtils::GetMaxGflopsDeviceId()
|
|
{
|
|
int device_count = 0;
|
|
cudaGetDeviceCount( &device_count );
|
|
|
|
cudaDeviceProp device_properties;
|
|
int max_gflops_device = 0;
|
|
int max_gflops = 0;
|
|
|
|
int current_device = 0;
|
|
cudaGetDeviceProperties( &device_properties, current_device );
|
|
max_gflops = device_properties.multiProcessorCount * device_properties.clockRate;
|
|
++current_device;
|
|
|
|
while( current_device < device_count ) {
|
|
cudaGetDeviceProperties( &device_properties, current_device );
|
|
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
|
|
if( gflops > max_gflops ) {
|
|
max_gflops = gflops;
|
|
max_gflops_device = current_device;
|
|
}
|
|
++current_device;
|
|
}
|
|
|
|
return max_gflops_device;
|
|
} |