MentorEM: Memory alignment and access optimization, built-in RGB32->I420 converter, general optimizations.
This commit is contained in:
parent
0aef5aa336
commit
619407e16d
|
@ -31,6 +31,25 @@
|
|||
#include "tsk_string.h"
|
||||
#include "tsk_debug.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# define DDRAW_HAVE_RGB32_TO_I420 1
|
||||
# if !TDAV_UNDER_WINDOWS_CE
|
||||
# define DDRAW_HAVE_RGB32_TO_I420_INTRIN 1
|
||||
# include <intrin.h>
|
||||
# endif /* TDAV_UNDER_WINDOWS_CE */
|
||||
# if !defined(_M_X64) /*|| _MSC_VER <= 1500*/ // https://msdn.microsoft.com/en-us/library/4ks26t93.aspx: Inline assembly is not supported on the ARM and x64 processors (1500 = VS2008)
|
||||
# define DDRAW_HAVE_RGB32_TO_I420_ASM 1
|
||||
# endif
|
||||
#endif /* _MSC_VER */
|
||||
|
||||
#if !defined(DDRAW_MEM_ALIGNMENT)
|
||||
# define DDRAW_MEM_ALIGNMENT 16 // SSE = 16, AVX = 32. Should be 16.
|
||||
#endif /* DDRAW_MEM_ALIGNMENT */
|
||||
|
||||
#if !defined(DDRAW_IS_ALIGNED)
|
||||
# define DDRAW_IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
|
||||
#endif /* DDRAW_IS_ALIGNED */
|
||||
|
||||
#if !defined(DDRAW_HIGH_PRIO_MEMCPY)
|
||||
# define DDRAW_HIGH_PRIO_MEMCPY 0
|
||||
#endif /* DDRAW_HIGH_PRIO_MEMCPY */
|
||||
|
@ -39,6 +58,10 @@
|
|||
# define DDRAW_CPU_MONITOR 0
|
||||
#endif /* DDRAW_CPU_MONITOR */
|
||||
|
||||
#if !defined(DDRAW_MEM_SURFACE_DIRECT_ACCESS)
|
||||
# define DDRAW_MEM_SURFACE_DIRECT_ACCESS 0 // direct access to "ddsd.lpSurface" is very slow even if the memory is correctly aligned: to be investigated
|
||||
#endif /* DDRAW_MEM_SURFACE_DIRECT_ACCESS */
|
||||
|
||||
#if DDRAW_CPU_MONITOR && !defined(DDRAW_CPU_MONITOR_TIME_OUT)
|
||||
# define DDRAW_CPU_MONITOR_TIME_OUT 1000
|
||||
#endif /* DDRAW_CPU_MONITOR */
|
||||
|
@ -86,10 +109,15 @@ typedef struct tdav_producer_screencast_ddraw_s
|
|||
|
||||
tsk_thread_handle_t* tid[1];
|
||||
|
||||
void* p_buff_neg; // must use VirtualAlloc()
|
||||
tsk_size_t n_buff_neg;
|
||||
void* p_buff_rgb_aligned;
|
||||
tsk_size_t n_buff_rgb;
|
||||
tsk_size_t n_buff_rgb_bitscount;
|
||||
|
||||
void* p_buff_yuv_aligned;
|
||||
tsk_size_t n_buff_yuv;
|
||||
|
||||
BOOL b_have_rgb32_conv; // support for RGB32 -> I420 and primary screen format is RGB32
|
||||
|
||||
tsk_bool_t b_started;
|
||||
tsk_bool_t b_paused;
|
||||
tsk_bool_t b_muted;
|
||||
|
@ -98,12 +126,43 @@ typedef struct tdav_producer_screencast_ddraw_s
|
|||
}
|
||||
tdav_producer_screencast_ddraw_t;
|
||||
|
||||
static BOOL _tdav_producer_screencast_have_ssse3();
|
||||
static tmedia_chroma_t _tdav_producer_screencast_get_chroma(const DDPIXELFORMAT* pixelFormat);
|
||||
static void* TSK_STDCALL _tdav_producer_screencast_record_thread(void *arg);
|
||||
static int _tdav_producer_screencast_timer_cb(const void* arg, tsk_timer_id_t timer_id);
|
||||
static int _tdav_producer_screencast_grab(tdav_producer_screencast_ddraw_t* p_self);
|
||||
static HRESULT _tdav_producer_screencast_create_module(LPDDrawModule lpModule);
|
||||
|
||||
#if DDRAW_HAVE_RGB32_TO_I420_INTRIN || DDRAW_HAVE_RGB32_TO_I420_ASM
|
||||
static __declspec(align(DDRAW_MEM_ALIGNMENT)) const int8_t kYCoeffs[16] = {
|
||||
13, 65, 33, 0,
|
||||
13, 65, 33, 0,
|
||||
13, 65, 33, 0,
|
||||
13, 65, 33, 0,
|
||||
};
|
||||
static __declspec(align(DDRAW_MEM_ALIGNMENT)) const int8_t kUCoeffs[16] = {
|
||||
112, -74, -38, 0,
|
||||
112, -74, -38, 0,
|
||||
112, -74, -38, 0,
|
||||
112, -74, -38, 0,
|
||||
};
|
||||
static __declspec(align(DDRAW_MEM_ALIGNMENT)) const int8_t kVCoeffs[16] = {
|
||||
-18, -94, 112, 0,
|
||||
-18, -94, 112, 0,
|
||||
-18, -94, 112, 0,
|
||||
-18, -94, 112, 0,
|
||||
};
|
||||
static __declspec(align(DDRAW_MEM_ALIGNMENT)) const int32_t kRGBAShuffleDuplicate[4] = { 0x03020100, 0x0b0a0908, 0x03020100, 0x0b0a0908 }; // RGBA(X) || RGBA(X + 2) || RGBA(X) || RGBA(X + 2) = 2U || 2V
|
||||
static __declspec(align(DDRAW_MEM_ALIGNMENT)) const uint16_t kY16[8] = {
|
||||
16, 16, 16, 16,
|
||||
16, 16, 16, 16
|
||||
};
|
||||
static __declspec(align(DDRAW_MEM_ALIGNMENT)) const uint16_t kUV128[8] = {
|
||||
128, 128, 128, 128,
|
||||
128, 128, 128, 128
|
||||
};
|
||||
#endif /* DDRAW_HAVE_RGB32_TO_I420_INTRIN || DDRAW_HAVE_RGB32_TO_I420_ASM */
|
||||
|
||||
// public function used to check that we can use DDRAW plugin before loading it
|
||||
tsk_bool_t tdav_producer_screencast_ddraw_plugin_is_supported()
|
||||
{
|
||||
|
@ -149,6 +208,382 @@ bail:
|
|||
return __supported;
|
||||
}
|
||||
|
||||
static BOOL _tdav_producer_screencast_have_ssse3()
|
||||
{
|
||||
static BOOL __checked = FALSE; // static guard to avoid checking more than once
|
||||
static BOOL __supported = FALSE;
|
||||
|
||||
if (__checked) {
|
||||
return __supported;
|
||||
}
|
||||
__checked = TRUE;
|
||||
|
||||
#ifndef BIT
|
||||
# define BIT(n) (1<<n)
|
||||
#endif /*BIT*/
|
||||
#if DDRAW_HAVE_RGB32_TO_I420_ASM
|
||||
#define cpuid(func, func2, a, b, c, d)\
|
||||
__asm mov eax, func\
|
||||
__asm mov ecx, func2\
|
||||
__asm cpuid\
|
||||
__asm mov a, eax\
|
||||
__asm mov b, ebx\
|
||||
__asm mov c, ecx\
|
||||
__asm mov d, edx
|
||||
|
||||
#define HAS_MMX 0x01
|
||||
#define HAS_SSE 0x02
|
||||
#define HAS_SSE2 0x04
|
||||
#define HAS_SSE3 0x08
|
||||
#define HAS_SSSE3 0x10
|
||||
#define HAS_SSE4_1 0x20
|
||||
#define HAS_AVX 0x40
|
||||
#define HAS_AVX2 0x80
|
||||
|
||||
unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx;
|
||||
cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
|
||||
if (reg_eax < 1) {
|
||||
DDRAW_DEBUG_ERROR("reg_eax < 1");
|
||||
return FALSE;
|
||||
}
|
||||
cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
|
||||
__supported = (reg_ecx & BIT(9)) ? TRUE : FALSE;
|
||||
#elif DDRAW_HAVE_RGB32_TO_I420_INTRIN
|
||||
int cpu_info[4] = { 0 }, num_ids;
|
||||
__cpuid(cpu_info, 0);
|
||||
num_ids = cpu_info[0];
|
||||
__cpuid(cpu_info, 0x80000000);
|
||||
if (num_ids > 0) {
|
||||
__cpuid(cpu_info, 0x00000001);
|
||||
__supported = (cpu_info[2] & BIT(9)) ? TRUE : FALSE;
|
||||
}
|
||||
#endif /* DDRAW_HAVE_RGB32_TO_I420_ASM */
|
||||
|
||||
DDRAW_DEBUG_INFO("SSSE3 supported = %s", __supported ? "YES" : "NO");
|
||||
|
||||
return __supported;
|
||||
}
|
||||
|
||||
#if DDRAW_HAVE_RGB32_TO_I420_INTRIN
|
||||
|
||||
#define DDRAW_COPY16_INTRIN(dst, src) \
|
||||
_mm_store_si128((__m128i*)dst, _mm_load_si128((__m128i*)src))
|
||||
#define DDRAW_COPY64_INTRIN(dst, src) \
|
||||
_mm_store_si128((__m128i*)dst, _mm_load_si128((__m128i*)src)); \
|
||||
_mm_store_si128((__m128i*)&dst[16], _mm_load_si128((__m128i*)&src[16])); \
|
||||
_mm_store_si128((__m128i*)&dst[32], _mm_load_si128((__m128i*)&src[32])); \
|
||||
_mm_store_si128((__m128i*)&dst[48], _mm_load_si128((__m128i*)&src[48]))
|
||||
#define DDRAW_COPY128_INTRIN(dst, src) \
|
||||
DDRAW_COPY64_INTRIN(dst, src); \
|
||||
_mm_store_si128((__m128i*)&dst[64], _mm_load_si128((__m128i*)&src[64])); \
|
||||
_mm_store_si128((__m128i*)&dst[80], _mm_load_si128((__m128i*)&src[80])); \
|
||||
_mm_store_si128((__m128i*)&dst[96], _mm_load_si128((__m128i*)&src[96])); \
|
||||
_mm_store_si128((__m128i*)&dst[112], _mm_load_si128((__m128i*)&src[112]))
|
||||
|
||||
static void _tdav_producer_screencast_rgb32_to_yuv420_intrin_ssse3(uint8_t *yuvPtr, const uint8_t *rgbPtr, int width, int height)
|
||||
{
|
||||
// rgbPtr contains (samplesCount * 16) bytes
|
||||
// yPtr contains samplesCount bytes
|
||||
const int samplesCount = (width * height); // "width" and "height" are in samples
|
||||
const uint8_t *rgbPtr_;
|
||||
uint8_t* yPtr_ = yuvPtr, *uPtr_ = (yPtr_ + samplesCount), *vPtr_ = uPtr_ + (samplesCount >> 2);
|
||||
__m128i mmRgb0, mmRgb1, mmRgb2, mmRgb3, mmY0, mmY1, mmY;
|
||||
__m128i mmRgbU0, mmRgbU1, mmRgbV0, mmRgbV1;
|
||||
|
||||
// Convert 16 RGBA samples to 16 Y samples
|
||||
rgbPtr_ = rgbPtr;
|
||||
/* const */__m128i yCoeffs = _mm_load_si128((__m128i*)kYCoeffs);
|
||||
/* const */__m128i y16 = _mm_load_si128((__m128i*)kY16);
|
||||
for(int i = 0; i < samplesCount; i += 16)
|
||||
{
|
||||
// load 16 RGBA samples
|
||||
_mm_store_si128(&mmRgb0, _mm_load_si128((__m128i*)rgbPtr_)); // 4 RGBA samples
|
||||
_mm_store_si128(&mmRgb1, _mm_load_si128((__m128i*)&rgbPtr_[16])); // 4 RGBA samples
|
||||
_mm_store_si128(&mmRgb2, _mm_load_si128((__m128i*)&rgbPtr_[32])); // 4 RGBA samples
|
||||
_mm_store_si128(&mmRgb3, _mm_load_si128((__m128i*)&rgbPtr_[48])); // 4 RGBA samples
|
||||
|
||||
_mm_store_si128(&mmRgb0, _mm_maddubs_epi16(mmRgb0/*unsigned*/, yCoeffs/*signed*/)); // mmRgb0 = ((yCoeffs[j] * mmRgb0[j]) + (yCoeffs[j + 1] * mmRgb0[j + 1]))
|
||||
_mm_store_si128(&mmRgb1, _mm_maddubs_epi16(mmRgb1/*unsigned*/, yCoeffs/*signed*/));
|
||||
_mm_store_si128(&mmRgb2, _mm_maddubs_epi16(mmRgb2/*unsigned*/, yCoeffs/*signed*/));
|
||||
_mm_store_si128(&mmRgb3, _mm_maddubs_epi16(mmRgb3/*unsigned*/, yCoeffs/*signed*/));
|
||||
|
||||
_mm_store_si128(&mmY0, _mm_hadd_epi16(mmRgb0, mmRgb1)); // horizontal add
|
||||
_mm_store_si128(&mmY1, _mm_hadd_epi16(mmRgb2, mmRgb3));
|
||||
|
||||
_mm_store_si128(&mmY0, _mm_srai_epi16(mmY0, 7)); // >> 7
|
||||
_mm_store_si128(&mmY1, _mm_srai_epi16(mmY1, 7));
|
||||
|
||||
_mm_store_si128(&mmY0, _mm_add_epi16(mmY0, y16)); // + 16
|
||||
_mm_store_si128(&mmY1, _mm_add_epi16(mmY1, y16));
|
||||
|
||||
_mm_store_si128(&mmY, _mm_packus_epi16(mmY0, mmY1)); // Saturate(I16 -> U8)
|
||||
|
||||
_mm_store_si128((__m128i*)yPtr_, mmY);
|
||||
|
||||
rgbPtr_ += 64; // 16samples * 4bytes
|
||||
yPtr_ += 16; // 16samples * 1byte
|
||||
}
|
||||
|
||||
// U+V planes
|
||||
/* const */__m128i uCoeffs = _mm_load_si128((__m128i*)kUCoeffs);
|
||||
/* const */__m128i vCoeffs = _mm_load_si128((__m128i*)kVCoeffs);
|
||||
/* const */__m128i rgbaShuffleDuplicate = _mm_load_si128((__m128i*)kRGBAShuffleDuplicate);
|
||||
/* const */__m128i uv128 = _mm_load_si128((__m128i*)kUV128);
|
||||
rgbPtr_ = rgbPtr;
|
||||
for(int i = 0; i < samplesCount; )
|
||||
{
|
||||
// load 16 RGBA samples
|
||||
_mm_store_si128(&mmRgb0, _mm_load_si128((__m128i*)rgbPtr_)); // 4 RGBA samples
|
||||
_mm_store_si128(&mmRgb1, _mm_load_si128((__m128i*)&rgbPtr_[16])); // 4 RGBA samples
|
||||
_mm_store_si128(&mmRgb2, _mm_load_si128((__m128i*)&rgbPtr_[32])); // 4 RGBA samples
|
||||
_mm_store_si128(&mmRgb3, _mm_load_si128((__m128i*)&rgbPtr_[48])); // 4 RGBA samples
|
||||
|
||||
_mm_store_si128(&mmRgb0, _mm_shuffle_epi8(mmRgb0, rgbaShuffleDuplicate));
|
||||
_mm_store_si128(&mmRgb1, _mm_shuffle_epi8(mmRgb1, rgbaShuffleDuplicate));
|
||||
_mm_store_si128(&mmRgb2, _mm_shuffle_epi8(mmRgb2, rgbaShuffleDuplicate));
|
||||
_mm_store_si128(&mmRgb3, _mm_shuffle_epi8(mmRgb3, rgbaShuffleDuplicate));
|
||||
|
||||
_mm_store_si128(&mmRgbU0, _mm_unpacklo_epi64(mmRgb0, mmRgb1));
|
||||
_mm_store_si128(&mmRgbV0, _mm_unpackhi_epi64(mmRgb0, mmRgb1)); // same as mmRgbU0: Use _mm_store_si128??
|
||||
_mm_store_si128(&mmRgbU1, _mm_unpacklo_epi64(mmRgb2, mmRgb3));
|
||||
_mm_store_si128(&mmRgbV1, _mm_unpackhi_epi64(mmRgb2, mmRgb3)); // same as mmRgbU0: Use _mm_store_si128??
|
||||
|
||||
_mm_store_si128(&mmRgbU0, _mm_maddubs_epi16(mmRgbU0/*unsigned*/, uCoeffs/*signed*/));
|
||||
_mm_store_si128(&mmRgbV0, _mm_maddubs_epi16(mmRgbV0/*unsigned*/, vCoeffs/*signed*/));
|
||||
_mm_store_si128(&mmRgbU1, _mm_maddubs_epi16(mmRgbU1/*unsigned*/, uCoeffs/*signed*/));
|
||||
_mm_store_si128(&mmRgbV1, _mm_maddubs_epi16(mmRgbV1/*unsigned*/, vCoeffs/*signed*/));
|
||||
|
||||
_mm_store_si128(&mmY0, _mm_hadd_epi16(mmRgbU0, mmRgbU1)); // horizontal add
|
||||
_mm_store_si128(&mmY1, _mm_hadd_epi16(mmRgbV0, mmRgbV1));
|
||||
|
||||
_mm_store_si128(&mmY0, _mm_srai_epi16(mmY0, 8)); // >> 8
|
||||
_mm_store_si128(&mmY1, _mm_srai_epi16(mmY1, 8));
|
||||
|
||||
_mm_store_si128(&mmY0, _mm_add_epi16(mmY0, uv128)); // + 128
|
||||
_mm_store_si128(&mmY1, _mm_add_epi16(mmY1, uv128));
|
||||
|
||||
// Y contains 8 samples for U then 8 samples for V
|
||||
_mm_store_si128(&mmY, _mm_packus_epi16(mmY0, mmY1)); // Saturate(I16 -> U8)
|
||||
_mm_storel_pi((__m64*)uPtr_, _mm_load_ps((float*)&mmY));
|
||||
_mm_storeh_pi((__m64*)vPtr_, _mm_load_ps((float*)&mmY));
|
||||
|
||||
uPtr_ += 8; // 8samples * 1byte
|
||||
vPtr_ += 8; // 8samples * 1byte
|
||||
|
||||
// move to next 16 samples
|
||||
i += 16;
|
||||
rgbPtr_ += 64; // 16samples * 4bytes
|
||||
|
||||
if (/*i % width == 0*/ !(i & (width - 1)))
|
||||
{
|
||||
// skip next line
|
||||
i += width;
|
||||
rgbPtr_ += (width * 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* DDRAW_HAVE_RGB32_TO_I420_INTRIN */
|
||||
|
||||
#if DDRAW_HAVE_RGB32_TO_I420_ASM
|
||||
|
||||
// __asm keyword must be duplicated in macro: https://msdn.microsoft.com/en-us/library/aa293825(v=vs.60).aspx
|
||||
#define DDRAW_COPY16_ASM(dst, src) \
|
||||
__asm { \
|
||||
__asm mov eax, dword ptr [src] \
|
||||
__asm mov ecx, dword ptr [dst] \
|
||||
\
|
||||
__asm movdqa xmm0, xmmword ptr [eax] \
|
||||
__asm movdqa xmmword ptr [ecx], xmm0 \
|
||||
}
|
||||
#define DDRAW_COPY64_ASM(dst, src) \
|
||||
__asm { \
|
||||
__asm mov eax, dword ptr [src] \
|
||||
__asm mov ecx, dword ptr [dst] \
|
||||
\
|
||||
__asm movdqa xmm0, xmmword ptr [eax] \
|
||||
__asm add eax, dword ptr 16 \
|
||||
__asm movdqa xmm1, xmmword ptr [eax] \
|
||||
__asm add eax, dword ptr 16 \
|
||||
__asm movdqa xmm2, xmmword ptr [eax] \
|
||||
__asm add eax, dword ptr 16 \
|
||||
__asm movdqa xmm3, xmmword ptr [eax] \
|
||||
\
|
||||
__asm movdqa xmmword ptr [ecx], xmm0 \
|
||||
__asm add ecx, dword ptr 16 \
|
||||
__asm movdqa xmmword ptr [ecx], xmm1 \
|
||||
__asm add ecx, dword ptr 16 \
|
||||
__asm movdqa xmmword ptr [ecx], xmm2 \
|
||||
__asm add ecx, dword ptr 16 \
|
||||
__asm movdqa xmmword ptr [ecx], xmm3 \
|
||||
}
|
||||
#define DDRAW_COPY128_ASM(dst, src) \
|
||||
__asm { \
|
||||
__asm mov eax, dword ptr [src] \
|
||||
__asm mov ecx, dword ptr [dst] \
|
||||
\
|
||||
__asm movdqa xmm0, xmmword ptr [eax] \
|
||||
__asm add eax, dword ptr 16 \
|
||||
__asm movdqa xmm1, xmmword ptr [eax] \
|
||||
__asm add eax, dword ptr 16 \
|
||||
__asm movdqa xmm2, xmmword ptr [eax] \
|
||||
__asm add eax, dword ptr 16 \
|
||||
__asm movdqa xmm3, xmmword ptr [eax] \
|
||||
__asm add eax, dword ptr 16 \
|
||||
__asm movdqa xmm4, xmmword ptr [eax] \
|
||||
__asm add eax, dword ptr 16 \
|
||||
__asm movdqa xmm5, xmmword ptr [eax] \
|
||||
__asm add eax, dword ptr 16 \
|
||||
__asm movdqa xmm6, xmmword ptr [eax] \
|
||||
__asm add eax, dword ptr 16 \
|
||||
__asm movdqa xmm7, xmmword ptr [eax] \
|
||||
\
|
||||
__asm movdqa xmmword ptr [ecx], xmm0 \
|
||||
__asm add ecx, dword ptr 16 \
|
||||
__asm movdqa xmmword ptr [ecx], xmm1 \
|
||||
__asm add ecx, dword ptr 16 \
|
||||
__asm movdqa xmmword ptr [ecx], xmm2 \
|
||||
__asm add ecx, dword ptr 16 \
|
||||
__asm movdqa xmmword ptr [ecx], xmm3 \
|
||||
__asm add ecx, dword ptr 16 \
|
||||
__asm movdqa xmmword ptr [ecx], xmm4 \
|
||||
__asm add ecx, dword ptr 16 \
|
||||
__asm movdqa xmmword ptr [ecx], xmm5 \
|
||||
__asm add ecx, dword ptr 16 \
|
||||
__asm movdqa xmmword ptr [ecx], xmm6 \
|
||||
__asm add ecx, dword ptr 16 \
|
||||
__asm movdqa xmmword ptr [ecx], xmm7 \
|
||||
}
|
||||
|
||||
__declspec(naked) __declspec(align(DDRAW_MEM_ALIGNMENT))
|
||||
static void _tdav_producer_screencast_rgb32_to_yuv420_asm_ssse3(uint8_t *yuvPtr, const uint8_t *rgbPtr, int width, int height)
|
||||
{
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
push ebx
|
||||
/*** Y Samples ***/
|
||||
mov edx, [esp + 12 + 4] // yuvPtr
|
||||
mov eax, [esp + 12 + 8] // rgbPtr
|
||||
mov ecx, [esp + 12 + 12] // width
|
||||
imul ecx, [esp + 12 + 16] // (width * height) = samplesCount
|
||||
|
||||
movdqa xmm7, kYCoeffs // yCoeffs
|
||||
movdqa xmm6, kY16 // y16
|
||||
/* loopY start */
|
||||
loopY:
|
||||
// load 16 RGBA samples
|
||||
movdqa xmm0, [eax] // mmRgb0
|
||||
movdqa xmm1, [eax + 16] // mmRgb1
|
||||
movdqa xmm2, [eax + 32] // mmRgb2
|
||||
movdqa xmm3, [eax + 48] // mmRgb3
|
||||
lea eax, [eax + 64] // rgbPtr_ += 64
|
||||
// (yCoeffs[0] * mmRgbX[0]) + (yCoeffs[1] * mmRgbX[1])
|
||||
pmaddubsw xmm0, xmm7
|
||||
pmaddubsw xmm1, xmm7
|
||||
pmaddubsw xmm2, xmm7
|
||||
pmaddubsw xmm3, xmm7
|
||||
// horizontal add
|
||||
phaddw xmm0, xmm1
|
||||
phaddw xmm2, xmm3
|
||||
// >> 7
|
||||
psraw xmm0, 7
|
||||
psraw xmm2, 7
|
||||
// + 16
|
||||
paddw xmm0, xmm6
|
||||
paddw xmm2, xmm6
|
||||
// Saturate(I16 -> U8) - Packs
|
||||
packuswb xmm0, xmm2
|
||||
// Copy to yuvPtr
|
||||
movdqa [edx], xmm0
|
||||
lea edx, [edx + 16] // yPtr_ += 16
|
||||
sub ecx, 16 // samplesCount -= 16
|
||||
jnz loopY // goto loop if (samplesCount != 0)
|
||||
|
||||
//==================================//
|
||||
//=========== UV Samples ===========//
|
||||
//==================================//
|
||||
mov esi, [esp + 12 + 4] // yuvPtr
|
||||
mov eax, [esp + 12 + 8] // rgbPtr
|
||||
mov ecx, [esp + 12 + 12] // width
|
||||
imul ecx, [esp + 12 + 16] // (width * height) = samplesCount
|
||||
mov edx, ecx
|
||||
shr edx, 2 // edx = samplesCount / 4
|
||||
add esi, ecx // [[esi = uPtr_]]
|
||||
mov edi, esi // edi = uPtr_
|
||||
add edi, edx // [[edi = uPtr_ + edx = uPtr_ + (samplesCount / 4) = vPtr_]]
|
||||
xor edx, edx // edx = 0 = i
|
||||
mov ebx, [esp + 12 + 12] // ebx = width
|
||||
sub ebx, 1 // ebx = (width - 1)
|
||||
|
||||
movdqa xmm7, kUCoeffs // uCoeffs
|
||||
movdqa xmm6, kVCoeffs // vCoeffs
|
||||
movdqa xmm5, kRGBAShuffleDuplicate // rgbaShuffleDuplicate
|
||||
movdqa xmm4, kUV128 // uv128
|
||||
|
||||
/* loopUV start */
|
||||
loopUV:
|
||||
// load 16 RGBA samples
|
||||
movdqa xmm0, [eax] // mmRgb0
|
||||
movdqa xmm1, [eax + 16] // mmRgb1
|
||||
movdqa xmm2, [eax + 32] // mmRgb2
|
||||
movdqa xmm3, [eax + 48] // mmRgb3
|
||||
lea eax, [eax + 64] // rgbPtr_ += 64
|
||||
|
||||
pshufb xmm0, xmm5
|
||||
pshufb xmm1, xmm5
|
||||
pshufb xmm2, xmm5
|
||||
pshufb xmm3, xmm5
|
||||
|
||||
punpcklqdq xmm0, xmm1 // mmRgbU0
|
||||
punpcklqdq xmm2, xmm3 // mmRgbU1
|
||||
movdqa xmm1, xmm0 // mmRgbV0
|
||||
movdqa xmm3, xmm2 // mmRgbV1
|
||||
|
||||
pmaddubsw xmm0, xmm7 // mmRgbU0
|
||||
pmaddubsw xmm1, xmm6 // mmRgbV0
|
||||
pmaddubsw xmm2, xmm7 // mmRgbU1
|
||||
pmaddubsw xmm3, xmm6 // mmRgbV1
|
||||
|
||||
phaddw xmm0, xmm2 // mmY0
|
||||
phaddw xmm1, xmm3 // mmY1
|
||||
|
||||
psraw xmm0, 8
|
||||
psraw xmm1, 8
|
||||
|
||||
paddw xmm0, xmm4
|
||||
paddw xmm1, xmm4
|
||||
|
||||
packuswb xmm0, xmm1
|
||||
movlps [esi], xmm0
|
||||
movhps [edi], xmm0
|
||||
|
||||
lea esi, [esi + 8]
|
||||
lea edi, [edi + 8]
|
||||
|
||||
add edx, 16 // i += 16;
|
||||
push edx // save edx
|
||||
and edx, ebx // edx = (ebx & ebx) = (ebx & (width - 1)) = (ebx % width)
|
||||
cmp edx, 0 // (ebx % width) == 0 ?
|
||||
pop edx // restore edx
|
||||
jne loopUV_NextLine
|
||||
|
||||
// loopUV_EndOfLine: ((ebx % width) == 0)
|
||||
add ebx, 1// change ebx value from width-1 to width
|
||||
add edx, ebx // i += width
|
||||
lea eax, [eax + 4 * ebx]// rgbPtr_ += (width * 4);
|
||||
sub ebx, 1// change back ebx value to width - 1
|
||||
loopUV_NextLine:
|
||||
cmp edx, ecx
|
||||
jl loopUV
|
||||
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif /* DDRAW_HAVE_RGB32_TO_I420_ASM */
|
||||
|
||||
/* ============ Media Producer Interface ================= */
|
||||
static int _tdav_producer_screencast_ddraw_set(tmedia_producer_t *p_self, const tmedia_param_t* pc_param)
|
||||
{
|
||||
|
@ -183,7 +618,7 @@ static int _tdav_producer_screencast_ddraw_prepare(tmedia_producer_t* p_self, co
|
|||
tdav_producer_screencast_ddraw_t* p_ddraw = (tdav_producer_screencast_ddraw_t*)p_self;
|
||||
int ret = 0;
|
||||
HRESULT hr = DD_OK;
|
||||
tsk_size_t n_buff_neg_new;
|
||||
tsk_size_t n_buff_rgb_new;
|
||||
#if 0
|
||||
DDPIXELFORMAT DDPixelFormat;
|
||||
#endif
|
||||
|
@ -247,14 +682,38 @@ static int _tdav_producer_screencast_ddraw_prepare(tmedia_producer_t* p_self, co
|
|||
DDRAW_CHECK_HR(hr = DDERR_INVALIDCAPS);
|
||||
}
|
||||
#endif
|
||||
n_buff_neg_new = (ddsd.dwWidth * ddsd.dwHeight * (ddsd.ddpfPixelFormat.dwRGBBitCount >> 3));
|
||||
if (p_ddraw->n_buff_neg < n_buff_neg_new) {
|
||||
if (p_ddraw->p_buff_neg) VirtualFree(p_ddraw->p_buff_neg, 0, MEM_RELEASE);
|
||||
if (!(p_ddraw->p_buff_neg = VirtualAlloc(NULL, n_buff_neg_new, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE))) {
|
||||
p_ddraw->n_buff_neg = 0;
|
||||
// allocate RGB buffer
|
||||
n_buff_rgb_new = (ddsd.dwWidth * ddsd.dwHeight * (ddsd.ddpfPixelFormat.dwRGBBitCount >> 3));
|
||||
if (p_ddraw->n_buff_rgb < n_buff_rgb_new) {
|
||||
p_ddraw->p_buff_rgb_aligned = tsk_realloc_aligned(p_ddraw->p_buff_rgb_aligned, n_buff_rgb_new, DDRAW_MEM_ALIGNMENT);
|
||||
if (!p_ddraw->p_buff_rgb_aligned) {
|
||||
p_ddraw->n_buff_rgb = 0;
|
||||
DDRAW_CHECK_HR(hr = DDERR_OUTOFMEMORY);
|
||||
}
|
||||
p_ddraw->n_buff_rgb = n_buff_rgb_new;
|
||||
}
|
||||
|
||||
// Check if we can use built-in chroma conversion
|
||||
#if DDRAW_HAVE_RGB32_TO_I420_INTRIN || DDRAW_HAVE_RGB32_TO_I420_ASM
|
||||
p_ddraw->b_have_rgb32_conv =
|
||||
_tdav_producer_screencast_have_ssse3() // SSSE3 supported
|
||||
&& DDRAW_IS_ALIGNED(TMEDIA_PRODUCER(p_ddraw)->video.width, DDRAW_MEM_ALIGNMENT) // width multiple of 16
|
||||
/* && DDRAW_IS_ALIGNED(TMEDIA_PRODUCER(p_ddraw)->video.height, DDRAW_MEM_ALIGNMENT) // height multiple of 16 */
|
||||
&& TMEDIA_PRODUCER(p_ddraw)->video.chroma == tmedia_chroma_rgb32; // Primary screen RGB32
|
||||
if (p_ddraw->b_have_rgb32_conv) {
|
||||
TMEDIA_PRODUCER(p_ddraw)->video.chroma = tmedia_chroma_yuv420p;
|
||||
}
|
||||
#endif
|
||||
DDRAW_DEBUG_INFO("RGB32 -> I420 conversion supported: %s", p_ddraw->b_have_rgb32_conv ? "YES" : "NO");
|
||||
|
||||
// allocate YUV buffer
|
||||
if (p_ddraw->b_have_rgb32_conv) {
|
||||
p_ddraw->n_buff_yuv = (TMEDIA_PRODUCER(p_ddraw)->video.width * TMEDIA_PRODUCER(p_ddraw)->video.height * 3) >> 1;
|
||||
p_ddraw->p_buff_yuv_aligned = tsk_realloc_aligned(p_ddraw->p_buff_yuv_aligned, p_ddraw->n_buff_yuv, DDRAW_MEM_ALIGNMENT);
|
||||
if (!p_ddraw->p_buff_yuv_aligned) {
|
||||
p_ddraw->n_buff_yuv = 0;
|
||||
DDRAW_CHECK_HR(hr = DDERR_OUTOFMEMORY);
|
||||
}
|
||||
p_ddraw->n_buff_neg = n_buff_neg_new;
|
||||
}
|
||||
|
||||
// BitmapInfo for preview
|
||||
|
@ -387,8 +846,10 @@ static int _tdav_producer_screencast_grab(tdav_producer_screencast_ddraw_t* p_se
|
|||
DWORD nSizeWithoutPadding, nRowLengthInBytes, lockFlags;
|
||||
tmedia_producer_t* p_base = TMEDIA_PRODUCER(p_self);
|
||||
LPVOID lpBuffToSend;
|
||||
BOOL bDirectMemSurfAccess = DDRAW_MEM_SURFACE_DIRECT_ACCESS;
|
||||
//--uint64_t timeStart, timeEnd;
|
||||
tsk_bool_t b_using_locked_buffer;
|
||||
|
||||
//--timeStart = tsk_time_now();
|
||||
|
||||
if (!p_self) {
|
||||
DDRAW_CHECK_HR(hr = E_INVALIDARG);
|
||||
|
@ -418,7 +879,7 @@ static int _tdav_producer_screencast_grab(tdav_producer_screencast_ddraw_t* p_se
|
|||
DDRAW_CHECK_HR(hr = p_self->p_surf_primary->Lock(NULL, &ddsd, lockFlags, NULL));
|
||||
// make sure surface size and number of bits per pixel haven't changed
|
||||
if (TMEDIA_PRODUCER(p_self)->video.width != ddsd.dwWidth || TMEDIA_PRODUCER(p_self)->video.height != ddsd.dwHeight || p_self->n_buff_rgb_bitscount != ddsd.ddpfPixelFormat.dwRGBBitCount) {
|
||||
tsk_size_t n_buff_neg_new;
|
||||
tsk_size_t n_buff_rgb_new;
|
||||
tmedia_chroma_t chroma_new;
|
||||
DDRAW_DEBUG_WARN("surface has changed: width %d<>%d or height %d<>%d or rgb_bits_count %d<>%d",
|
||||
p_base->video.width, ddsd.dwWidth,
|
||||
|
@ -427,20 +888,43 @@ static int _tdav_producer_screencast_grab(tdav_producer_screencast_ddraw_t* p_se
|
|||
if ((chroma_new = _tdav_producer_screencast_get_chroma(&ddsd.ddpfPixelFormat)) == tmedia_chroma_none) {
|
||||
DDRAW_CHECK_HR(hr = DDERR_INVALIDCAPS);
|
||||
}
|
||||
n_buff_neg_new = (ddsd.dwWidth * ddsd.dwHeight * (ddsd.ddpfPixelFormat.dwRGBBitCount >> 3));
|
||||
if (p_self->n_buff_neg < n_buff_neg_new) {
|
||||
if (p_self->p_buff_neg) VirtualFree(p_self->p_buff_neg, 0, MEM_RELEASE);
|
||||
if (!(p_self->p_buff_neg = VirtualAlloc(NULL, n_buff_neg_new, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE))) {
|
||||
p_self->n_buff_neg = 0;
|
||||
// allocate RGB buffer
|
||||
n_buff_rgb_new = (ddsd.dwWidth * ddsd.dwHeight * (ddsd.ddpfPixelFormat.dwRGBBitCount >> 3));
|
||||
if (p_self->n_buff_rgb < n_buff_rgb_new) {
|
||||
p_self->p_buff_rgb_aligned = tsk_realloc_aligned(p_self->p_buff_rgb_aligned, n_buff_rgb_new, DDRAW_MEM_ALIGNMENT);
|
||||
if (!p_self->p_buff_rgb_aligned) {
|
||||
p_self->n_buff_rgb = 0;
|
||||
p_self->p_surf_primary->Unlock(NULL); // unlock before going to bail
|
||||
DDRAW_CHECK_HR(hr = DDERR_OUTOFMEMORY);
|
||||
}
|
||||
p_self->n_buff_neg = n_buff_neg_new;
|
||||
p_self->n_buff_rgb = n_buff_rgb_new;
|
||||
}
|
||||
p_base->video.width = ddsd.dwWidth;
|
||||
p_base->video.height = ddsd.dwHeight;
|
||||
p_base->video.chroma = chroma_new;
|
||||
p_self->n_buff_rgb_bitscount = ddsd.ddpfPixelFormat.dwRGBBitCount;
|
||||
// Check if we can use built-in chroma conversion
|
||||
#if DDRAW_HAVE_RGB32_TO_I420_INTRIN || DDRAW_HAVE_RGB32_TO_I420_ASM
|
||||
p_self->b_have_rgb32_conv =
|
||||
_tdav_producer_screencast_have_ssse3() // SSSE3 supported
|
||||
&& DDRAW_IS_ALIGNED(p_base->video.width, DDRAW_MEM_ALIGNMENT) // width multiple of 16
|
||||
/* && DDRAW_IS_ALIGNED(p_base->video.height, DDRAW_MEM_ALIGNMENT) // height multiple of 16 */
|
||||
&& p_base->video.chroma == tmedia_chroma_rgb32; // Primary screen RGB32
|
||||
if (p_self->b_have_rgb32_conv) {
|
||||
p_base->video.chroma = tmedia_chroma_yuv420p;
|
||||
}
|
||||
#endif
|
||||
DDRAW_DEBUG_INFO("RGB32 -> I420 conversion supported: %s", p_self->b_have_rgb32_conv ? "YES" : "NO");
|
||||
// allocate YUV buffer
|
||||
if (p_self->b_have_rgb32_conv) {
|
||||
p_self->n_buff_yuv = (p_base->video.width * p_base->video.height * 3) >> 1;
|
||||
p_self->p_buff_yuv_aligned = tsk_realloc_aligned(p_self->p_buff_yuv_aligned, p_self->n_buff_yuv, DDRAW_MEM_ALIGNMENT);
|
||||
if (!p_self->p_buff_yuv_aligned) {
|
||||
p_self->n_buff_yuv = 0;
|
||||
p_self->p_surf_primary->Unlock(NULL); // unlock before going to bail
|
||||
DDRAW_CHECK_HR(hr = DDERR_OUTOFMEMORY);
|
||||
}
|
||||
}
|
||||
// preview
|
||||
#if DDRAW_PREVIEW
|
||||
p_self->bi_preview.bmiHeader.biWidth = ddsd.dwWidth;
|
||||
|
@ -453,17 +937,59 @@ static int _tdav_producer_screencast_grab(tdav_producer_screencast_ddraw_t* p_se
|
|||
nSizeWithoutPadding = ddsd.dwHeight * nRowLengthInBytes;
|
||||
|
||||
// init lpBuffToSend
|
||||
if (ddsd.lPitch == nRowLengthInBytes) {
|
||||
if (DDRAW_MEM_SURFACE_DIRECT_ACCESS && ddsd.lPitch == nRowLengthInBytes && (!p_self->b_have_rgb32_conv || DDRAW_IS_ALIGNED(ddsd.lpSurface, DDRAW_MEM_ALIGNMENT))) {
|
||||
// no padding
|
||||
lpBuffToSend = ddsd.lpSurface;
|
||||
b_using_locked_buffer = tsk_true;
|
||||
bDirectMemSurfAccess = TRUE;
|
||||
}
|
||||
else {
|
||||
// with padding or copy requested
|
||||
UINT8 *pSurfBuff = (UINT8 *)ddsd.lpSurface, *pNegBuff = (UINT8 *)p_self->p_buff_neg;
|
||||
UINT8 *pSurfBuff = (UINT8 *)ddsd.lpSurface, *pNegBuff = (UINT8 *)p_self->p_buff_rgb_aligned;
|
||||
DWORD y;
|
||||
b_using_locked_buffer = tsk_false;
|
||||
bDirectMemSurfAccess = FALSE;
|
||||
//--timeStart = tsk_time_now();
|
||||
if (ddsd.lPitch == nRowLengthInBytes) {
|
||||
// copy without padding padding
|
||||
const UINT8* src = pSurfBuff;
|
||||
UINT8* dst = (UINT8*)p_self->p_buff_rgb_aligned;
|
||||
if (DDRAW_IS_ALIGNED(src, 16) && (nSizeWithoutPadding & 15) == 0) {
|
||||
#if DDRAW_HAVE_RGB32_TO_I420_INTRIN || DDRAW_HAVE_RGB32_TO_I420_ASM
|
||||
if ((nSizeWithoutPadding & 127) == 0) {
|
||||
for (DWORD i = 0; i < nSizeWithoutPadding; i += 128, src += 128, dst += 128) {
|
||||
#if defined(DDRAW_COPY128_ASM)
|
||||
DDRAW_COPY128_ASM(dst, src);
|
||||
#else
|
||||
DDRAW_COPY128_INTRIN(dst, src);
|
||||
#endif /* DDRAW_COPY128_ASM */
|
||||
}
|
||||
}
|
||||
else if((nSizeWithoutPadding & 63) == 0) {
|
||||
for (DWORD i = 0; i < nSizeWithoutPadding; i += 64, src += 64, dst += 64) {
|
||||
#if defined(DDRAW_COPY64_ASM)
|
||||
DDRAW_COPY64_ASM(dst, src);
|
||||
#else
|
||||
DDRAW_COPY64_INTRIN(dst, src);
|
||||
#endif /* DDRAW_COPY64_ASM */
|
||||
}
|
||||
}
|
||||
else { // (nSizeWithoutPadding & 15) == 0
|
||||
for (DWORD i = 0; i < nSizeWithoutPadding; i += 16, src += 16, dst += 16) {
|
||||
#if defined(DDRAW_COPY16_ASM)
|
||||
DDRAW_COPY16_ASM(dst, src);
|
||||
#else
|
||||
DDRAW_COPY16_INTRIN(dst, src);
|
||||
#endif /* DDRAW_COPY16_ASM */
|
||||
}
|
||||
}
|
||||
#else // neither ASM nor INTRINSIC support
|
||||
CopyMemory(dst, src, nSizeWithoutPadding);
|
||||
#endif /* DDRAW_HAVE_RGB32_TO_I420_INTRIN || DDRAW_HAVE_RGB32_TO_I420_ASM */
|
||||
}
|
||||
else { // not 16bytes aligned
|
||||
CopyMemory(dst, src, nSizeWithoutPadding);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// copy with padding padding
|
||||
for (y = 0; y < ddsd.dwHeight; ++y) {
|
||||
CopyMemory(pNegBuff, pSurfBuff, nRowLengthInBytes);
|
||||
|
@ -471,11 +997,13 @@ static int _tdav_producer_screencast_grab(tdav_producer_screencast_ddraw_t* p_se
|
|||
pNegBuff += nRowLengthInBytes;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// copy without padding padding
|
||||
CopyMemory(p_self->p_buff_neg, pSurfBuff, nSizeWithoutPadding);
|
||||
}
|
||||
lpBuffToSend = p_self->p_buff_neg;
|
||||
lpBuffToSend = p_self->p_buff_rgb_aligned;
|
||||
//--timeEnd = tsk_time_now();
|
||||
//--DDRAW_DEBUG_INFO("Mem copy: start=%llu, end=%llu, duration=%llu", timeStart, timeEnd, (timeEnd - timeStart));
|
||||
}
|
||||
if (!bDirectMemSurfAccess) {
|
||||
// surface buffer no longer needed, unlock
|
||||
DDRAW_CHECK_HR(hr = p_self->p_surf_primary->Unlock(NULL));
|
||||
}
|
||||
// display preview
|
||||
#if DDRAW_PREVIEW
|
||||
|
@ -500,17 +1028,28 @@ static int _tdav_producer_screencast_grab(tdav_producer_screencast_ddraw_t* p_se
|
|||
}
|
||||
}
|
||||
#endif /* DDRAW_PREVIEW */
|
||||
if (!b_using_locked_buffer) {
|
||||
// Unlock the buffer before the encode callback
|
||||
DDRAW_CHECK_HR(hr = p_self->p_surf_primary->Unlock(NULL));
|
||||
}
|
||||
|
||||
//--timeStart = tsk_time_now();
|
||||
p_base->enc_cb.callback(p_base->enc_cb.callback_data, lpBuffToSend, nSizeWithoutPadding);
|
||||
if (p_self->b_have_rgb32_conv) {
|
||||
// Convert from RGB32 to I420
|
||||
#if DDRAW_HAVE_RGB32_TO_I420_ASM
|
||||
_tdav_producer_screencast_rgb32_to_yuv420_asm_ssse3((uint8_t*)p_self->p_buff_yuv_aligned, (const uint8_t*)lpBuffToSend, (int)p_base->video.width, (int)p_base->video.height);
|
||||
#elif DDRAW_HAVE_RGB32_TO_I420_INTRIN
|
||||
_tdav_producer_screencast_rgb32_to_yuv420_intrin_ssse3((uint8_t*)p_self->p_buff_yuv_aligned, (const uint8_t*)lpBuffToSend, (int)p_base->video.width, (int)p_base->video.height);
|
||||
#else
|
||||
DDRAW_CHECK_HR(hr = E_NOTIMPL); // never called
|
||||
#endif
|
||||
p_base->enc_cb.callback(p_base->enc_cb.callback_data, p_self->p_buff_yuv_aligned, p_self->n_buff_yuv);
|
||||
}
|
||||
else {
|
||||
// Send RGB32 buffer to the encode callback and let conversion be done by libyuv
|
||||
p_base->enc_cb.callback(p_base->enc_cb.callback_data, lpBuffToSend, nSizeWithoutPadding);
|
||||
}
|
||||
//--timeEnd = tsk_time_now();
|
||||
//--DDRAW_DEBUG_INFO("Encode callback: start=%llu, end=%llu, duration=%llu", timeStart, timeEnd, (timeEnd - timeStart));
|
||||
|
||||
if (b_using_locked_buffer) {
|
||||
// Unlock the buffer after the encode callback
|
||||
if (bDirectMemSurfAccess) {
|
||||
// surface buffer was used in preview and encode callback, unlock now
|
||||
DDRAW_CHECK_HR(hr = p_self->p_surf_primary->Unlock(NULL));
|
||||
}
|
||||
|
||||
|
@ -519,6 +1058,10 @@ bail:
|
|||
/*hr = */p_self->p_surf_primary->Restore();
|
||||
hr = S_OK;
|
||||
}
|
||||
|
||||
//--timeEnd = tsk_time_now();
|
||||
//--DDRAW_DEBUG_INFO("Grab and encode duration=%llu", (timeEnd - timeStart));
|
||||
|
||||
return SUCCEEDED(hr) ? 0 : -1;
|
||||
}
|
||||
|
||||
|
@ -711,10 +1254,8 @@ static tsk_object_t* _tdav_producer_screencast_ddraw_dtor(tsk_object_t * self)
|
|||
tsk_timer_manager_destroy(&p_ddraw->p_timer_mgr);
|
||||
}
|
||||
#endif /* DDRAW_CPU_MONITOR */
|
||||
if (p_ddraw->p_buff_neg) {
|
||||
VirtualFree(p_ddraw->p_buff_neg, 0, MEM_RELEASE);
|
||||
p_ddraw->p_buff_neg = NULL;
|
||||
}
|
||||
TSK_FREE_ALIGNED(p_ddraw->p_buff_rgb_aligned);
|
||||
TSK_FREE_ALIGNED(p_ddraw->p_buff_yuv_aligned);
|
||||
DDRAW_SAFE_RELEASE(&p_ddraw->p_surf_primary);
|
||||
DDrawModuleSafeFree(p_ddraw->ddrawModule);
|
||||
tsk_safeobj_deinit(p_ddraw);
|
||||
|
|
|
@ -1,126 +1,173 @@
|
|||
/*
|
||||
* Copyright (C) 2010-2011 Mamadou Diop.
|
||||
*
|
||||
* Contact: Mamadou Diop <diopmamadou(at)doubango[dot]org>
|
||||
*
|
||||
* This file is part of Open Source Doubango Framework.
|
||||
*
|
||||
* DOUBANGO is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DOUBANGO is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DOUBANGO.
|
||||
*
|
||||
*/
|
||||
|
||||
/**@file tsk_memory.c
|
||||
* @brief Useful memory management functions to handle memory.
|
||||
* As I'm a lazy man, some definition come from <ahref="http://www.cplusplus.com">this website</a>
|
||||
*
|
||||
* @author Mamadou Diop <diopmamadou(at)doubango[dot]org>
|
||||
*
|
||||
|
||||
*/
|
||||
#include "tsk_memory.h"
|
||||
#include "tsk_debug.h"
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/**@defgroup tsk_memory_group Utility functions for memory management.
|
||||
*/
|
||||
|
||||
/**@ingroup tsk_memory_group
|
||||
* Allocates a block of size bytes of memory, returning a pointer to the beginning of the block.
|
||||
* The content of the newly allocated block of memory is not initialized, remaining with indeterminate values.
|
||||
* @param size Size of the memory block, in bytes.
|
||||
* @retval On success, a pointer to the memory block allocated by the function.
|
||||
* It is up to you to free the returned pointer.
|
||||
*/
|
||||
void* tsk_malloc(tsk_size_t size)
|
||||
{
|
||||
void *ret = malloc(size);
|
||||
if(!ret){
|
||||
TSK_DEBUG_ERROR("Memory allocation failed");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**@ingroup tsk_memory_group
|
||||
* Reallocate memory block.
|
||||
* In case that ptr is NULL, the function behaves exactly as @a tsk_malloc, assigning a new block of size bytes and returning a pointer to the beginning of it.
|
||||
* The function may move the memory block to a new location, in which case the new location is returned. The content of the memory block is preserved up to the lesser of the
|
||||
* new and old sizes, even if the block is moved. If the new size is larger, the value of the newly allocated portion is indeterminate.
|
||||
* In case that the size is 0, the memory previously allocated in ptr is deallocated as if a call to free was made, and a NULL pointer is returned.
|
||||
* @param ptr Pointer to a memory block previously allocated with malloc, calloc or realloc to be reallocated.
|
||||
* If this is NULL, a new block is allocated and a pointer to it is returned by the function.
|
||||
* @param size New size for the memory block, in bytes.
|
||||
* If it is 0 and ptr points to an existing block of memory, the memory block pointed by ptr is deallocated and a NULL pointer is returned.
|
||||
* @retval A pointer to the reallocated memory block, which may be either the same as the ptr argument or a new location.
|
||||
* The type of this pointer is void*, which can be cast to the desired type of data pointer in order to be dereferenceable.
|
||||
* If the function failed to allocate the requested block of memory, a NULL pointer is returned.
|
||||
* It is up to you to free the returned pointer.
|
||||
*/
|
||||
void* tsk_realloc (void* ptr, tsk_size_t size)
|
||||
{
|
||||
void *ret = tsk_null;
|
||||
|
||||
if(size) {
|
||||
if(ptr){
|
||||
if(!(ret = realloc(ptr, size))){
|
||||
TSK_DEBUG_ERROR("Memory reallocation failed");
|
||||
}
|
||||
}
|
||||
else{
|
||||
if(!(ret = calloc(size, 1))){
|
||||
TSK_DEBUG_ERROR("Memory allocation (%u) failed", (unsigned)size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**@ingroup tsk_memory_group
|
||||
* Deallocate space in memory.
|
||||
* @param ptr Pointer to a memory block previously allocated with @a tsk_malloc, @a tsk_calloc or @a tsk_realloc to be deallocated.
|
||||
* If a null pointer is passed as argument, no action occurs.
|
||||
*/
|
||||
void tsk_free(void** ptr)
|
||||
{
|
||||
if(ptr && *ptr){
|
||||
free(*ptr);
|
||||
*ptr = tsk_null;
|
||||
}
|
||||
}
|
||||
|
||||
/**@ingroup tsk_memory_group
|
||||
* Allocates a block of memory for an array of num elements, each of them size bytes long, and initializes all its bits to zero.
|
||||
* The effective result is the allocation of an zero-initialized memory block of (num * size) bytes.
|
||||
* @param num Number of elements to be allocated
|
||||
* @param size Size of elements
|
||||
* @retval A pointer to the memory block allocated by the function. The type of this pointer is always void*, which can be cast to the desired type of data pointer in order to be dereferenceable.
|
||||
* If the function failed to allocate the requested block of memory, a NULL pointer is returned.
|
||||
* It is up to you to free the returned pointer.
|
||||
*/
|
||||
void* tsk_calloc(tsk_size_t num, tsk_size_t size)
|
||||
{
|
||||
void* ret = tsk_null;
|
||||
if(num && size){
|
||||
ret = calloc(num, size);
|
||||
if(!ret){
|
||||
TSK_DEBUG_ERROR("Memory allocation failed. num=%u and size=%u", (unsigned)num, (unsigned)size);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
/*
|
||||
* Copyright (C) 2010-2011 Mamadou Diop.
|
||||
*
|
||||
* Contact: Mamadou Diop <diopmamadou(at)doubango[dot]org>
|
||||
*
|
||||
* This file is part of Open Source Doubango Framework.
|
||||
*
|
||||
* DOUBANGO is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DOUBANGO is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DOUBANGO.
|
||||
*
|
||||
*/
|
||||
|
||||
/**@file tsk_memory.c
|
||||
* @brief Useful memory management functions to handle memory.
|
||||
* As I'm a lazy man, some definition come from <ahref="http://www.cplusplus.com">this website</a>
|
||||
*
|
||||
* @author Mamadou Diop <diopmamadou(at)doubango[dot]org>
|
||||
*
|
||||
|
||||
*/
|
||||
#include "tsk_memory.h"
|
||||
#include "tsk_debug.h"
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/**@defgroup tsk_memory_group Utility functions for memory management.
|
||||
*/
|
||||
|
||||
/**@ingroup tsk_memory_group
|
||||
* Allocates a block of size bytes of memory, returning a pointer to the beginning of the block.
|
||||
* The content of the newly allocated block of memory is not initialized, remaining with indeterminate values.
|
||||
* @param size Size of the memory block, in bytes.
|
||||
* @retval On success, a pointer to the memory block allocated by the function.
|
||||
* It is up to you to free the returned pointer.
|
||||
*/
|
||||
void* tsk_malloc(tsk_size_t size)
|
||||
{
|
||||
void *ret = malloc(size);
|
||||
if(!ret){
|
||||
TSK_DEBUG_ERROR("Memory allocation failed");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**@ingroup tsk_memory_group
|
||||
* Reallocate memory block.
|
||||
* In case that ptr is NULL, the function behaves exactly as @a tsk_malloc, assigning a new block of size bytes and returning a pointer to the beginning of it.
|
||||
* The function may move the memory block to a new location, in which case the new location is returned. The content of the memory block is preserved up to the lesser of the
|
||||
* new and old sizes, even if the block is moved. If the new size is larger, the value of the newly allocated portion is indeterminate.
|
||||
* In case that the size is 0, the memory previously allocated in ptr is deallocated as if a call to free was made, and a NULL pointer is returned.
|
||||
* @param ptr Pointer to a memory block previously allocated with malloc, calloc or realloc to be reallocated.
|
||||
* If this is NULL, a new block is allocated and a pointer to it is returned by the function.
|
||||
* @param size New size for the memory block, in bytes.
|
||||
* If it is 0 and ptr points to an existing block of memory, the memory block pointed by ptr is deallocated and a NULL pointer is returned.
|
||||
* @retval A pointer to the reallocated memory block, which may be either the same as the ptr argument or a new location.
|
||||
* The type of this pointer is void*, which can be cast to the desired type of data pointer in order to be dereferenceable.
|
||||
* If the function failed to allocate the requested block of memory, a NULL pointer is returned.
|
||||
* It is up to you to free the returned pointer.
|
||||
*/
|
||||
void* tsk_realloc (void* ptr, tsk_size_t size)
|
||||
{
|
||||
void *ret = tsk_null;
|
||||
|
||||
if(size) {
|
||||
if(ptr){
|
||||
if(!(ret = realloc(ptr, size))){
|
||||
TSK_DEBUG_ERROR("Memory reallocation failed");
|
||||
}
|
||||
}
|
||||
else{
|
||||
if(!(ret = calloc(size, 1))){
|
||||
TSK_DEBUG_ERROR("Memory allocation (%u) failed", (unsigned)size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**@ingroup tsk_memory_group
|
||||
* Deallocate space in memory.
|
||||
* @param ptr Pointer to a memory block previously allocated with @a tsk_malloc, @a tsk_calloc or @a tsk_realloc to be deallocated.
|
||||
* If a null pointer is passed as argument, no action occurs.
|
||||
*/
|
||||
void tsk_free(void** ptr)
|
||||
{
|
||||
if(ptr && *ptr){
|
||||
free(*ptr);
|
||||
*ptr = tsk_null;
|
||||
}
|
||||
}
|
||||
|
||||
/**@ingroup tsk_memory_group
|
||||
* Allocates a block of memory for an array of num elements, each of them size bytes long, and initializes all its bits to zero.
|
||||
* The effective result is the allocation of an zero-initialized memory block of (num * size) bytes.
|
||||
* @param num Number of elements to be allocated
|
||||
* @param size Size of elements
|
||||
* @retval A pointer to the memory block allocated by the function. The type of this pointer is always void*, which can be cast to the desired type of data pointer in order to be dereferenceable.
|
||||
* If the function failed to allocate the requested block of memory, a NULL pointer is returned.
|
||||
* It is up to you to free the returned pointer.
|
||||
*/
|
||||
void* tsk_calloc(tsk_size_t num, tsk_size_t size)
|
||||
{
|
||||
void* ret = tsk_null;
|
||||
if(num && size){
|
||||
ret = calloc(num, size);
|
||||
if(!ret){
|
||||
TSK_DEBUG_ERROR("Memory allocation failed. num=%u and size=%u", (unsigned)num, (unsigned)size);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void* tsk_malloc_aligned(tsk_size_t size, tsk_size_t alignment)
|
||||
{
|
||||
#if TSK_UNDER_WINDOWS && !TSK_UNDER_WINDOWS_CE && !TSK_UNDER_WINDOWS_RT
|
||||
return _aligned_malloc(size, alignment);
|
||||
#else
|
||||
void* ret = malloc(size + alignment);
|
||||
if (ret) {
|
||||
long pad = ((~(long)ret) % alignment) + 1;
|
||||
ret = ((uint8_t*)ret) + pad; // pad
|
||||
((uint8_t*)ret)[-1] = (uint8_t)pad; // store the pad for later use
|
||||
}
|
||||
return ret;
|
||||
#endif
|
||||
}
|
||||
|
||||
void* tsk_realloc_aligned(void * ptr, tsk_size_t size, tsk_size_t alignment)
|
||||
{
|
||||
#if TSK_UNDER_WINDOWS && !TSK_UNDER_WINDOWS_CE && !TSK_UNDER_WINDOWS_RT
|
||||
return _aligned_realloc(ptr, size, alignment);
|
||||
#else
|
||||
tsk_free_aligned(ptr);
|
||||
return tsk_malloc_aligned(size, alignment);
|
||||
#endif
|
||||
}
|
||||
|
||||
void tsk_free_aligned(void** ptr)
|
||||
{
|
||||
if (ptr && *ptr) {
|
||||
void* ptr_ = *ptr;
|
||||
#if TSK_UNDER_WINDOWS && !TSK_UNDER_WINDOWS_CE && !TSK_UNDER_WINDOWS_RT
|
||||
_aligned_free(ptr_);
|
||||
#else
|
||||
free((((uint8_t*)ptr_) - ((uint8_t*)ptr_)[-1]));
|
||||
#endif
|
||||
*ptr = tsk_null;
|
||||
}
|
||||
}
|
||||
|
||||
void* tsk_calloc_aligned(tsk_size_t num, tsk_size_t size, tsk_size_t alignment)
|
||||
{
|
||||
void* ptr = tsk_malloc_aligned((size * num), alignment);
|
||||
if (ptr) {
|
||||
memset(ptr, 0, (size * num));
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
|
|
@ -45,8 +45,10 @@
|
|||
|
||||
TSK_BEGIN_DECLS
|
||||
|
||||
#define TSK_SAFE_FREE(ptr) (void)tsk_free((void**)(&ptr));
|
||||
#define TSK_SAFE_FREE(ptr) (void)tsk_free((void**)(&ptr))
|
||||
#define TSK_SAFE_FREE_ALIGNED(ptr) (void)tsk_free_aligned((void**)(&ptr))
|
||||
#define TSK_FREE(ptr) TSK_SAFE_FREE((ptr))
|
||||
#define TSK_FREE_ALIGNED(ptr) TSK_SAFE_FREE_ALIGNED((ptr))
|
||||
#define TSK_SAFE_FREE_ARRAY(pptr, count){ \
|
||||
int __i; \
|
||||
for(__i = 0; __i < (count); ++__i) \
|
||||
|
@ -61,6 +63,11 @@ TINYSAK_API void* tsk_realloc (void * ptr, tsk_size_t size);
|
|||
TINYSAK_API void tsk_free(void** ptr);
|
||||
TINYSAK_API void* tsk_calloc(tsk_size_t num, tsk_size_t size);
|
||||
|
||||
TINYSAK_API void* tsk_malloc_aligned(tsk_size_t size, tsk_size_t alignment);
|
||||
TINYSAK_API void* tsk_realloc_aligned(void * ptr, tsk_size_t size, tsk_size_t alignment);
|
||||
TINYSAK_API void tsk_free_aligned(void** ptr);
|
||||
TINYSAK_API void* tsk_calloc_aligned(tsk_size_t num, tsk_size_t size, tsk_size_t alignment);
|
||||
|
||||
TSK_END_DECLS
|
||||
|
||||
#endif /* _TINYSAK_MEMORY_H_ */
|
||||
|
|
Loading…
Reference in New Issue