/* * display.cl * * Display OpenCL kernel. Formats the raw FFT output into GL objects we * use for display. * * Copyright (C) 2013-2014 Sylvain Munaut * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * NOTE (to make it clear): For the purpose of this license, any software * making use of this kernel (or derivative thereof) is considered to be * a derivative work (i.e. "a work based on the Program"). */ /* Enable or not use of NV SM11 histogram algo (set automatically) */ /* #define USE_NV_SM11_ATOMICS */ /* Enable or not the use of cl_khr_local_int32_base_atomics to * implement atomic add (set automatically) */ /* #define USE_EXT_ATOMICS */ #ifdef USE_EXT_ATOMICS #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable #endif #define CLAMP //#define MAX_HOLD_LIVE //#define MAX_HOLD_HISTO //#define MAX_HOLD_NORMAL #define MAX_HOLD_DECAY #ifdef USE_NV_SM11_ATOMICS #define LOG2_WARP_SIZE 5U #define UINT_BITS 32U #define WARP_SIZE (1U << LOG2_WARP_SIZE) #define TAG_MASK ( (1U << (UINT_BITS - LOG2_WARP_SIZE)) - 1U ) /* See NVidia OpenCL Histogram256 example for how/why this works */ inline void nv_sm11_atomic_inc(volatile __local uint *p, uint tag) { uint count; do { count = *p & TAG_MASK; count = tag | (count + 1); *p = count; } while (*p != count); } #endif /* USE_NV_SM11_ATOMICS */ __attribute__((reqd_work_group_size(16, 16, 1))) __kernel void display( /* FFT Input */ __global const float2 *fft, /* [ 0] Input FFT (complex) */ const uint fft_log2_len, /* [ 1] log2(FFT length) */ const uint fft_batch, /* [ 2] # spectrums in the input */ /* Waterfall */ __write_only image2d_t wf_tex, /* [ 3] Texture handle */ const uint wf_offset, /* [ 4] Y Offset in the texture */ /* Histogram */ __read_only image2d_t histo_tex_r, /* [ 5] Texture read handle */ __write_only image2d_t histo_tex_w, /* [ 6] Texture write handle */ const float histo_t0r, /* [ 7] Rise time constant */ const float histo_t0d, /* [ 8] Decay time constant */ const float histo_scale, /* [ 9] Val->Bin: scaling */ const float histo_ofs, /* [10] Val->Bin: offset */ /* Live spectrum */ __global float2 *spectrum_vbo, /* [11] Vertex Buffer Object */ const float live_alpha) /* [12] Averaging time constant */ { int gidx; float max_pwr = - 1000.0f; /* Local memory */ __local float live_buf[16 * 16]; /* get_local_size(0) * get_local_size(1) */ __local float max_buf[16 * 16]; /* get_local_size(0) * get_local_size(1) */ __local uint histo_buf[16 * 128]; /* Local shortcuts */ const float live_one_minus_alpha = 1.0f - live_alpha; /* Transposition & Atomic emulation */ #ifdef USE_NV_SM11_ATOMICS __local float pwr_buf[16 * 16]; /* pwr transpose buffer */ uint tib = (get_local_id(0) + get_local_id(1)) & 15; uint ti0 = tib | (get_local_id(0) << 4); uint ti1 = tib | (get_local_id(1) << 4); const uint tag = get_local_id(0) << (UINT_BITS - LOG2_WARP_SIZE); #endif /* Clear buffers */ live_buf[get_local_id(1) * get_local_size(0) + get_local_id(0)] = 0.0f; __local uint *h = &histo_buf[get_local_id(1) * get_local_size(0) + get_local_id(0)]; h[ 0] = 0; h[ 256] = 0; h[ 512] = 0; h[ 768] = 0; h[1024] = 0; h[1280] = 0; h[1536] = 0; h[1792] = 0; /* Wait for all clears to be done by everyone */ barrier(CLK_LOCAL_MEM_FENCE); /* Main loop */ for (gidx=0; gidx 127) #ifdef CLAMP bin = (bin < 0) ? 0 : 127; #else continue; #endif /* Atomic Bin increment */ #if defined(USE_NV_SM11_ATOMICS) nv_sm11_atomic_inc(&histo_buf[(bin << 4) + get_local_id(1)], tag); #elif defined(USE_EXT_ATOMICS) atom_inc(&histo_buf[(bin << 4) + get_local_id(0)]); #else atomic_inc(&histo_buf[(bin << 4) + get_local_id(0)]); #endif } max_buf[get_local_id(1) * get_local_size(0) + get_local_id(0)] = max_pwr; /* Wait for everyone before the final merges */ barrier(CLK_LOCAL_MEM_FENCE); /* Live Spectrum merging */ __global float2 *live_vbo = &spectrum_vbo[0]; if (get_global_id(1) == 0) { int i,n; float sum; float2 vertex; /* Compute sum */ sum = 0.0f; for (i=0; i> 1; i = get_global_id(0) ^ n; /* Compute vertex position */ vertex = live_vbo[i]; if (!isfinite(vertex.y)) /* Safety if previous val is weird */ vertex.y = sum / get_local_size(1); vertex.x = ((float)i / (float)n) - 1.0f; vertex.y = vertex.y * native_powr(live_one_minus_alpha, (float)fft_batch) + sum * live_alpha; live_vbo[i] = vertex; } /* Histogram merging */ for (gidx=0; gidx<128; gidx+=get_local_size(1)) { const sampler_t direct_sample = CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST | CLK_ADDRESS_CLAMP_TO_EDGE; /* Histogram coordinates */ int2 coord; coord.x = get_global_id(0); coord.y = gidx + get_local_id(1); /* Fetch previous histogram value */ float4 hv = read_imagef(histo_tex_r, direct_sample, coord); /* Fetch hit count */ uint hc = histo_buf[coord.y * get_local_size(0) + get_local_id(0)] #ifdef USE_NV_SM11_ATOMICS & TAG_MASK #endif ; /* Fast exit if possible ... */ if ((hv.x <= 0.01f) && (hc == 0)) continue; /* Apply the rise / decay */ float a = (float)hc / (float)fft_batch; float b = a * native_recip(histo_t0r); float c = b + native_recip(histo_t0d); float d = b * native_recip(c); float e = native_powr(1.0f - c, (float)fft_batch); hv.x = (hv.x - d) * e + d; /* Clamp value (we don't clear the texture so we get crap) */ hv.x = clamp(hv.x, 0.0f, 1.0f); /* Write new histogram value */ write_imagef(histo_tex_w, coord, hv); } /* Max hold */ __global float2 *max_vbo = &spectrum_vbo[1 << fft_log2_len]; if (get_global_id(1) == 0) { int i, j, n; float2 vertex; #ifdef MAX_HOLD_HISTO vertex.y = - histo_ofs; for (gidx=0; gidx<128; gidx++) { const sampler_t direct_sample = CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST | CLK_ADDRESS_CLAMP_TO_EDGE; /* Histogram coordinates */ int2 coord; coord.x = get_global_id(0); coord.y = gidx; /* Fetch histogram value */ float4 hv = read_imagef(histo_tex_r, direct_sample, coord); /* Set vertex position */ if (hv.x > 0.1f) vertex.y = ((float)gidx / histo_scale)- histo_ofs; } #endif /* Position in spectrum */ n = get_global_size(0) >> 1; i = get_global_id(0) ^ n; max_pwr = max_vbo[i].y; if (!isfinite(max_pwr)) max_pwr = - MAXFLOAT; /* Will be replaced by max() below */ vertex.x = ((float)i / (float)n) - 1.0f; #ifdef MAX_HOLD_LIVE vertex.y = max(live_vbo[i].y, max_pwr); #endif #ifdef MAX_HOLD_NORMAL for (i=0; i