1/* snes_ntsc 0.2.2. http://www.slack.net/~ant/ */
2
3/* Common implementation of NTSC filters */
4
5#include <assert.h>
6#include <math.h>
7
8/* Copyright (C) 2006 Shay Green. This module is free software; you
9can redistribute it and/or modify it under the terms of the GNU Lesser
10General Public License as published by the Free Software Foundation; either
11version 2.1 of the License, or (at your option) any later version. This
12module is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
15details. You should have received a copy of the GNU Lesser General Public
16License along with this module; if not, write to the Free Software Foundation,
17Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
18
19#define DISABLE_CORRECTION 0
20
21#undef PI
22#define PI 3.14159265358979323846f
23
24#ifndef LUMA_CUTOFF
25 #define LUMA_CUTOFF 0.20
26#endif
27#ifndef gamma_size
28 #define gamma_size 1
29#endif
30#ifndef rgb_bits
31 #define rgb_bits 8
32#endif
33#ifndef artifacts_max
34 #define artifacts_max (artifacts_mid * 1.5f)
35#endif
36#ifndef fringing_max
37 #define fringing_max (fringing_mid * 2)
38#endif
39#ifndef STD_HUE_CONDITION
40 #define STD_HUE_CONDITION( setup ) 1
41#endif
42
43#define ext_decoder_hue (std_decoder_hue + 15)
44#define rgb_unit (1 << rgb_bits)
45#define rgb_offset (rgb_unit * 2 + 0.5f)
46
47enum { burst_size = snes_ntsc_entry_size / burst_count };
48enum { kernel_half = 16 };
49enum { kernel_size = kernel_half * 2 + 1 };
50
51typedef struct init_t
52{
53 float to_rgb [burst_count * 6];
54 float to_float [gamma_size];
55 float contrast;
56 float brightness;
57 float artifacts;
58 float fringing;
59 float kernel [rescale_out * kernel_size * 2];
60} init_t;
61
62#define ROTATE_IQ( i, q, sin_b, cos_b ) {\
63 float t;\
64 t = i * cos_b - q * sin_b;\
65 q = i * sin_b + q * cos_b;\
66 i = t;\
67}
68
69static void init_filters( init_t* impl, snes_ntsc_setup_t const* setup )
70{
71#if rescale_out > 1
72 float kernels [kernel_size * 2];
73#else
74 float* const kernels = impl->kernel;
75#endif
76
77 /* generate luma (y) filter using sinc kernel */
78 {
79 /* sinc with rolloff (dsf) */
80 float const rolloff = 1 + (float) setup->sharpness * (float) 0.032;
81 float const maxh = 32;
82 float const pow_a_n = (float) pow( rolloff, maxh );
83 float sum;
84 int i;
85 /* quadratic mapping to reduce negative (blurring) range */
86 float to_angle = (float) setup->resolution + 1;
87 to_angle = PI / maxh * (float) LUMA_CUTOFF * (to_angle * to_angle + 1);
88
89 kernels [kernel_size * 3 / 2] = maxh; /* default center value */
90 for ( i = 0; i < kernel_half * 2 + 1; i++ )
91 {
92 int x = i - kernel_half;
93 float angle = x * to_angle;
94 /* instability occurs at center point with rolloff very close to 1.0 */
95 if ( x || pow_a_n > (float) 1.056 || pow_a_n < (float) 0.981 )
96 {
97 float rolloff_cos_a = rolloff * (float) cos( angle );
98 float num = 1 - rolloff_cos_a -
99 pow_a_n * (float) cos( maxh * angle ) +
100 pow_a_n * rolloff * (float) cos( (maxh - 1) * angle );
101 float den = 1 - rolloff_cos_a - rolloff_cos_a + rolloff * rolloff;
102 float dsf = num / den;
103 kernels [kernel_size * 3 / 2 - kernel_half + i] = dsf - (float) 0.5;
104 }
105 }
106
107 /* apply blackman window and find sum */
108 sum = 0;
109 for ( i = 0; i < kernel_half * 2 + 1; i++ )
110 {
111 float x = PI * 2 / (kernel_half * 2) * i;
112 float blackman = 0.42f - 0.5f * (float) cos( x ) + 0.08f * (float) cos( x * 2 );
113 sum += (kernels [kernel_size * 3 / 2 - kernel_half + i] *= blackman);
114 }
115
116 /* normalize kernel */
117 sum = 1.0f / sum;
118 for ( i = 0; i < kernel_half * 2 + 1; i++ )
119 {
120 int x = kernel_size * 3 / 2 - kernel_half + i;
121 kernels [x] *= sum;
122 /* assert( kernels [x] == kernels [x] ); catch numerical instability */
123 }
124 }
125
126 /* generate chroma (iq) filter using gaussian kernel */
127 {
128 float const cutoff_factor = -0.03125f;
129 float cutoff = (float) setup->bleed;
130 int i;
131
132 if ( cutoff < 0 )
133 {
134 /* keep extreme value accessible only near upper end of scale (1.0) */
135 cutoff *= cutoff;
136 cutoff *= cutoff;
137 cutoff *= cutoff;
138 cutoff *= -30.0f / 0.65f;
139 }
140 cutoff = cutoff_factor - 0.65f * cutoff_factor * cutoff;
141
142 for ( i = -kernel_half; i <= kernel_half; i++ )
143 kernels [kernel_size / 2 + i] = (float) exp( i * i * cutoff );
144
145 /* normalize even and odd phases separately */
146 for ( i = 0; i < 2; i++ )
147 {
148 float sum = 0;
149 int x;
150 for ( x = i; x < kernel_size; x += 2 )
151 sum += kernels [x];
152
153 sum = 1.0f / sum;
154 for ( x = i; x < kernel_size; x += 2 )
155 {
156 kernels [x] *= sum;
157 /* assert( kernels [x] == kernels [x] ); catch numerical instability */
158 }
159 }
160 }
161
162 /*
163 printf( "luma:\n" );
164 for ( i = kernel_size; i < kernel_size * 2; i++ )
165 printf( "%f\n", kernels [i] );
166 printf( "chroma:\n" );
167 for ( i = 0; i < kernel_size; i++ )
168 printf( "%f\n", kernels [i] );
169 */
170
171 /* generate linear rescale kernels */
172 #if rescale_out > 1
173 {
174 float weight = 1.0f;
175 float* out = impl->kernel;
176 int n = rescale_out;
177 do
178 {
179 float remain = 0;
180 int i;
181 weight -= 1.0f / rescale_in;
182 for ( i = 0; i < kernel_size * 2; i++ )
183 {
184 float cur = kernels [i];
185 float m = cur * weight;
186 *out++ = m + remain;
187 remain = cur - m;
188 }
189 }
190 while ( --n );
191 }
192 #endif
193}
194
195static float const default_decoder [6] =
196 { 0.956f, 0.621f, -0.272f, -0.647f, -1.105f, 1.702f };
197
198static void init( init_t* impl, snes_ntsc_setup_t const* setup )
199{
200 impl->brightness = (float) setup->brightness * (0.5f * rgb_unit) + rgb_offset;
201 impl->contrast = (float) setup->contrast * (0.5f * rgb_unit) + rgb_unit;
202 #ifdef default_palette_contrast
203 if ( !setup->palette )
204 impl->contrast *= default_palette_contrast;
205 #endif
206
207 impl->artifacts = (float) setup->artifacts;
208 if ( impl->artifacts > 0 )
209 impl->artifacts *= artifacts_max - artifacts_mid;
210 impl->artifacts = impl->artifacts * artifacts_mid + artifacts_mid;
211
212 impl->fringing = (float) setup->fringing;
213 if ( impl->fringing > 0 )
214 impl->fringing *= fringing_max - fringing_mid;
215 impl->fringing = impl->fringing * fringing_mid + fringing_mid;
216
217 init_filters( impl, setup );
218
219 /* generate gamma table */
220 if ( gamma_size > 1 )
221 {
222 float const to_float = 1.0f / (gamma_size - (gamma_size > 1));
223 float const gamma = 1.1333f - (float) setup->gamma * 0.5f;
224 /* match common PC's 2.2 gamma to TV's 2.65 gamma */
225 int i;
226 for ( i = 0; i < gamma_size; i++ )
227 impl->to_float [i] =
228 (float) pow( i * to_float, gamma ) * impl->contrast + impl->brightness;
229 }
230
231 /* setup decoder matricies */
232 {
233 float hue = (float) setup->hue * PI + PI / 180 * ext_decoder_hue;
234 float sat = (float) setup->saturation + 1;
235 float const* decoder = setup->decoder_matrix;
236 if ( !decoder )
237 {
238 decoder = default_decoder;
239 if ( STD_HUE_CONDITION( setup ) )
240 hue += PI / 180 * (std_decoder_hue - ext_decoder_hue);
241 }
242
243 {
244 float s = (float) sin( hue ) * sat;
245 float c = (float) cos( hue ) * sat;
246 float* out = impl->to_rgb;
247 int n;
248
249 n = burst_count;
250 do
251 {
252 float const* in = decoder;
253 int n = 3;
254 do
255 {
256 float i = *in++;
257 float q = *in++;
258 *out++ = i * c - q * s;
259 *out++ = i * s + q * c;
260 }
261 while ( --n );
262 if ( burst_count <= 1 )
263 break;
264 ROTATE_IQ( s, c, 0.866025f, -0.5f ); /* +120 degrees */
265 }
266 while ( --n );
267 }
268 }
269}
270
271/* kernel generation */
272
273#define RGB_TO_YIQ( r, g, b, y, i ) (\
274 (y = (r) * 0.299f + (g) * 0.587f + (b) * 0.114f),\
275 (i = (r) * 0.596f - (g) * 0.275f - (b) * 0.321f),\
276 ((r) * 0.212f - (g) * 0.523f + (b) * 0.311f)\
277)
278
279#define YIQ_TO_RGB( y, i, q, to_rgb, type, r, g ) (\
280 r = (type) (y + to_rgb [0] * i + to_rgb [1] * q),\
281 g = (type) (y + to_rgb [2] * i + to_rgb [3] * q),\
282 (type) (y + to_rgb [4] * i + to_rgb [5] * q)\
283)
284
285#define PACK_RGB( r, g, b ) ((r) << 21 | (g) << 11 | (b) << 1)
286
287enum { rgb_kernel_size = burst_size / alignment_count };
288enum { rgb_bias = rgb_unit * 2 * snes_ntsc_rgb_builder };
289
290typedef struct pixel_info_t
291{
292 int offset;
293 float negate;
294 float kernel [4];
295} pixel_info_t;
296
297#if rescale_in > 1
298 #define PIXEL_OFFSET_( ntsc, scaled ) \
299 (kernel_size / 2 + ntsc + (scaled != 0) + (rescale_out - scaled) % rescale_out + \
300 (kernel_size * 2 * scaled))
301
302 #define PIXEL_OFFSET( ntsc, scaled ) \
303 PIXEL_OFFSET_( ((ntsc) - (scaled) / rescale_out * rescale_in),\
304 (((scaled) + rescale_out * 10) % rescale_out) ),\
305 (1.0f - (((ntsc) + 100) & 2))
306#else
307 #define PIXEL_OFFSET( ntsc, scaled ) \
308 (kernel_size / 2 + (ntsc) - (scaled)),\
309 (1.0f - (((ntsc) + 100) & 2))
310#endif
311
312extern pixel_info_t const snes_ntsc_pixels [alignment_count];
313
314/* Generate pixel at all burst phases and column alignments */
315static void gen_kernel( init_t* impl, float y, float i, float q, snes_ntsc_rgb_t* out )
316{
317 /* generate for each scanline burst phase */
318 float const* to_rgb = impl->to_rgb;
319 int burst_remain = burst_count;
320 y -= rgb_offset;
321 do
322 {
323 /* Encode yiq into *two* composite signals (to allow control over artifacting).
324 Convolve these with kernels which: filter respective components, apply
325 sharpening, and rescale horizontally. Convert resulting yiq to rgb and pack
326 into integer. Based on algorithm by NewRisingSun. */
327 pixel_info_t const* pixel = snes_ntsc_pixels;
328 int alignment_remain = alignment_count;
329 do
330 {
331 /* negate is -1 when composite starts at odd multiple of 2 */
332 float const yy = y * impl->fringing * pixel->negate;
333 float const ic0 = (i + yy) * pixel->kernel [0];
334 float const qc1 = (q + yy) * pixel->kernel [1];
335 float const ic2 = (i - yy) * pixel->kernel [2];
336 float const qc3 = (q - yy) * pixel->kernel [3];
337
338 float const factor = impl->artifacts * pixel->negate;
339 float const ii = i * factor;
340 float const yc0 = (y + ii) * pixel->kernel [0];
341 float const yc2 = (y - ii) * pixel->kernel [2];
342
343 float const qq = q * factor;
344 float const yc1 = (y + qq) * pixel->kernel [1];
345 float const yc3 = (y - qq) * pixel->kernel [3];
346
347 float const* k = &impl->kernel [pixel->offset];
348 int n;
349 ++pixel;
350 for ( n = rgb_kernel_size; n; --n )
351 {
352 float i = k[0]*ic0 + k[2]*ic2;
353 float q = k[1]*qc1 + k[3]*qc3;
354 float y = k[kernel_size+0]*yc0 + k[kernel_size+1]*yc1 +
355 k[kernel_size+2]*yc2 + k[kernel_size+3]*yc3 + rgb_offset;
356 if ( rescale_out <= 1 )
357 k--;
358 else if ( k < &impl->kernel [kernel_size * 2 * (rescale_out - 1)] )
359 k += kernel_size * 2 - 1;
360 else
361 k -= kernel_size * 2 * (rescale_out - 1) + 2;
362 {
363 int r, g, b = YIQ_TO_RGB( y, i, q, to_rgb, int, r, g );
364 *out++ = PACK_RGB( r, g, b ) - rgb_bias;
365 }
366 }
367 }
368 while ( alignment_count > 1 && --alignment_remain );
369
370 if ( burst_count <= 1 )
371 break;
372
373 to_rgb += 6;
374
375 ROTATE_IQ( i, q, -0.866025f, -0.5f ); /* -120 degrees */
376 }
377 while ( --burst_remain );
378}
379
380static void correct_errors( snes_ntsc_rgb_t color, snes_ntsc_rgb_t* out );
381
382#if DISABLE_CORRECTION
383 #define CORRECT_ERROR( a ) { out [i] += rgb_bias; }
384 #define DISTRIBUTE_ERROR( a, b, c ) { out [i] += rgb_bias; }
385#else
386 #define CORRECT_ERROR( a ) { out [a] += error; }
387 #define DISTRIBUTE_ERROR( a, b, c ) {\
388 snes_ntsc_rgb_t fourth = (error + 2 * snes_ntsc_rgb_builder) >> 2;\
389 fourth &= (rgb_bias >> 1) - snes_ntsc_rgb_builder;\
390 fourth -= rgb_bias >> 2;\
391 out [a] += fourth;\
392 out [b] += fourth;\
393 out [c] += fourth;\
394 out [i] += error - (fourth * 3);\
395 }
396#endif
397
398#define RGB_PALETTE_OUT( rgb, out_ )\
399{\
400 unsigned char* out = (out_);\
401 snes_ntsc_rgb_t clamped = (rgb);\
402 SNES_NTSC_CLAMP_( clamped, (8 - rgb_bits) );\
403 out [0] = (unsigned char) (clamped >> 21);\
404 out [1] = (unsigned char) (clamped >> 11);\
405 out [2] = (unsigned char) (clamped >> 1);\
406}
407
408/* blitter related */
409
410#ifndef restrict
411 #if defined (__GNUC__)
412 #define restrict __restrict__
413 #elif defined (_MSC_VER) && _MSC_VER > 1300
414 #define restrict __restrict
415 #else
416 /* no support for restricted pointers */
417 #define restrict
418 #endif
419#endif
420
421#include <limits.h>
422
423#if SNES_NTSC_OUT_DEPTH <= 16
424 #if USHRT_MAX == 0xFFFF
425 typedef unsigned short snes_ntsc_out_t;
426 #else
427 #error "Need 16-bit int type"
428 #endif
429
430#else
431 #if UINT_MAX == 0xFFFFFFFF
432 typedef unsigned int snes_ntsc_out_t;
433 #elif ULONG_MAX == 0xFFFFFFFF
434 typedef unsigned long snes_ntsc_out_t;
435 #else
436 #error "Need 32-bit int type"
437 #endif
438
439#endif
440