1 | /* snes_ntsc 0.2.2. http://www.slack.net/~ant/ */ |
2 | |
3 | /* Common implementation of NTSC filters */ |
4 | |
5 | #include <assert.h> |
6 | #include <math.h> |
7 | |
8 | /* Copyright (C) 2006 Shay Green. This module is free software; you |
9 | can redistribute it and/or modify it under the terms of the GNU Lesser |
10 | General Public License as published by the Free Software Foundation; either |
11 | version 2.1 of the License, or (at your option) any later version. This |
12 | module is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
14 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
15 | details. You should have received a copy of the GNU Lesser General Public |
16 | License along with this module; if not, write to the Free Software Foundation, |
17 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ |
18 | |
19 | #define DISABLE_CORRECTION 0 |
20 | |
21 | #undef PI |
22 | #define PI 3.14159265358979323846f |
23 | |
24 | #ifndef LUMA_CUTOFF |
25 | #define LUMA_CUTOFF 0.20 |
26 | #endif |
27 | #ifndef gamma_size |
28 | #define gamma_size 1 |
29 | #endif |
30 | #ifndef rgb_bits |
31 | #define rgb_bits 8 |
32 | #endif |
33 | #ifndef artifacts_max |
34 | #define artifacts_max (artifacts_mid * 1.5f) |
35 | #endif |
36 | #ifndef fringing_max |
37 | #define fringing_max (fringing_mid * 2) |
38 | #endif |
39 | #ifndef STD_HUE_CONDITION |
40 | #define STD_HUE_CONDITION( setup ) 1 |
41 | #endif |
42 | |
43 | #define ext_decoder_hue (std_decoder_hue + 15) |
44 | #define rgb_unit (1 << rgb_bits) |
45 | #define rgb_offset (rgb_unit * 2 + 0.5f) |
46 | |
47 | enum { burst_size = snes_ntsc_entry_size / burst_count }; |
48 | enum { kernel_half = 16 }; |
49 | enum { kernel_size = kernel_half * 2 + 1 }; |
50 | |
51 | typedef struct init_t |
52 | { |
53 | float to_rgb [burst_count * 6]; |
54 | float to_float [gamma_size]; |
55 | float contrast; |
56 | float brightness; |
57 | float artifacts; |
58 | float fringing; |
59 | float kernel [rescale_out * kernel_size * 2]; |
60 | } init_t; |
61 | |
62 | #define ROTATE_IQ( i, q, sin_b, cos_b ) {\ |
63 | float t;\ |
64 | t = i * cos_b - q * sin_b;\ |
65 | q = i * sin_b + q * cos_b;\ |
66 | i = t;\ |
67 | } |
68 | |
69 | static void init_filters( init_t* impl, snes_ntsc_setup_t const* setup ) |
70 | { |
71 | #if rescale_out > 1 |
72 | float kernels [kernel_size * 2]; |
73 | #else |
74 | float* const kernels = impl->kernel; |
75 | #endif |
76 | |
77 | /* generate luma (y) filter using sinc kernel */ |
78 | { |
79 | /* sinc with rolloff (dsf) */ |
80 | float const rolloff = 1 + (float) setup->sharpness * (float) 0.032; |
81 | float const maxh = 32; |
82 | float const pow_a_n = (float) pow( rolloff, maxh ); |
83 | float sum; |
84 | int i; |
85 | /* quadratic mapping to reduce negative (blurring) range */ |
86 | float to_angle = (float) setup->resolution + 1; |
87 | to_angle = PI / maxh * (float) LUMA_CUTOFF * (to_angle * to_angle + 1); |
88 | |
89 | kernels [kernel_size * 3 / 2] = maxh; /* default center value */ |
90 | for ( i = 0; i < kernel_half * 2 + 1; i++ ) |
91 | { |
92 | int x = i - kernel_half; |
93 | float angle = x * to_angle; |
94 | /* instability occurs at center point with rolloff very close to 1.0 */ |
95 | if ( x || pow_a_n > (float) 1.056 || pow_a_n < (float) 0.981 ) |
96 | { |
97 | float rolloff_cos_a = rolloff * (float) cos( angle ); |
98 | float num = 1 - rolloff_cos_a - |
99 | pow_a_n * (float) cos( maxh * angle ) + |
100 | pow_a_n * rolloff * (float) cos( (maxh - 1) * angle ); |
101 | float den = 1 - rolloff_cos_a - rolloff_cos_a + rolloff * rolloff; |
102 | float dsf = num / den; |
103 | kernels [kernel_size * 3 / 2 - kernel_half + i] = dsf - (float) 0.5; |
104 | } |
105 | } |
106 | |
107 | /* apply blackman window and find sum */ |
108 | sum = 0; |
109 | for ( i = 0; i < kernel_half * 2 + 1; i++ ) |
110 | { |
111 | float x = PI * 2 / (kernel_half * 2) * i; |
112 | float blackman = 0.42f - 0.5f * (float) cos( x ) + 0.08f * (float) cos( x * 2 ); |
113 | sum += (kernels [kernel_size * 3 / 2 - kernel_half + i] *= blackman); |
114 | } |
115 | |
116 | /* normalize kernel */ |
117 | sum = 1.0f / sum; |
118 | for ( i = 0; i < kernel_half * 2 + 1; i++ ) |
119 | { |
120 | int x = kernel_size * 3 / 2 - kernel_half + i; |
121 | kernels [x] *= sum; |
122 | /* assert( kernels [x] == kernels [x] ); catch numerical instability */ |
123 | } |
124 | } |
125 | |
126 | /* generate chroma (iq) filter using gaussian kernel */ |
127 | { |
128 | float const cutoff_factor = -0.03125f; |
129 | float cutoff = (float) setup->bleed; |
130 | int i; |
131 | |
132 | if ( cutoff < 0 ) |
133 | { |
134 | /* keep extreme value accessible only near upper end of scale (1.0) */ |
135 | cutoff *= cutoff; |
136 | cutoff *= cutoff; |
137 | cutoff *= cutoff; |
138 | cutoff *= -30.0f / 0.65f; |
139 | } |
140 | cutoff = cutoff_factor - 0.65f * cutoff_factor * cutoff; |
141 | |
142 | for ( i = -kernel_half; i <= kernel_half; i++ ) |
143 | kernels [kernel_size / 2 + i] = (float) exp( i * i * cutoff ); |
144 | |
145 | /* normalize even and odd phases separately */ |
146 | for ( i = 0; i < 2; i++ ) |
147 | { |
148 | float sum = 0; |
149 | int x; |
150 | for ( x = i; x < kernel_size; x += 2 ) |
151 | sum += kernels [x]; |
152 | |
153 | sum = 1.0f / sum; |
154 | for ( x = i; x < kernel_size; x += 2 ) |
155 | { |
156 | kernels [x] *= sum; |
157 | /* assert( kernels [x] == kernels [x] ); catch numerical instability */ |
158 | } |
159 | } |
160 | } |
161 | |
162 | /* |
163 | printf( "luma:\n" ); |
164 | for ( i = kernel_size; i < kernel_size * 2; i++ ) |
165 | printf( "%f\n", kernels [i] ); |
166 | printf( "chroma:\n" ); |
167 | for ( i = 0; i < kernel_size; i++ ) |
168 | printf( "%f\n", kernels [i] ); |
169 | */ |
170 | |
171 | /* generate linear rescale kernels */ |
172 | #if rescale_out > 1 |
173 | { |
174 | float weight = 1.0f; |
175 | float* out = impl->kernel; |
176 | int n = rescale_out; |
177 | do |
178 | { |
179 | float remain = 0; |
180 | int i; |
181 | weight -= 1.0f / rescale_in; |
182 | for ( i = 0; i < kernel_size * 2; i++ ) |
183 | { |
184 | float cur = kernels [i]; |
185 | float m = cur * weight; |
186 | *out++ = m + remain; |
187 | remain = cur - m; |
188 | } |
189 | } |
190 | while ( --n ); |
191 | } |
192 | #endif |
193 | } |
194 | |
195 | static float const default_decoder [6] = |
196 | { 0.956f, 0.621f, -0.272f, -0.647f, -1.105f, 1.702f }; |
197 | |
198 | static void init( init_t* impl, snes_ntsc_setup_t const* setup ) |
199 | { |
200 | impl->brightness = (float) setup->brightness * (0.5f * rgb_unit) + rgb_offset; |
201 | impl->contrast = (float) setup->contrast * (0.5f * rgb_unit) + rgb_unit; |
202 | #ifdef default_palette_contrast |
203 | if ( !setup->palette ) |
204 | impl->contrast *= default_palette_contrast; |
205 | #endif |
206 | |
207 | impl->artifacts = (float) setup->artifacts; |
208 | if ( impl->artifacts > 0 ) |
209 | impl->artifacts *= artifacts_max - artifacts_mid; |
210 | impl->artifacts = impl->artifacts * artifacts_mid + artifacts_mid; |
211 | |
212 | impl->fringing = (float) setup->fringing; |
213 | if ( impl->fringing > 0 ) |
214 | impl->fringing *= fringing_max - fringing_mid; |
215 | impl->fringing = impl->fringing * fringing_mid + fringing_mid; |
216 | |
217 | init_filters( impl, setup ); |
218 | |
219 | /* generate gamma table */ |
220 | if ( gamma_size > 1 ) |
221 | { |
222 | float const to_float = 1.0f / (gamma_size - (gamma_size > 1)); |
223 | float const gamma = 1.1333f - (float) setup->gamma * 0.5f; |
224 | /* match common PC's 2.2 gamma to TV's 2.65 gamma */ |
225 | int i; |
226 | for ( i = 0; i < gamma_size; i++ ) |
227 | impl->to_float [i] = |
228 | (float) pow( i * to_float, gamma ) * impl->contrast + impl->brightness; |
229 | } |
230 | |
231 | /* setup decoder matricies */ |
232 | { |
233 | float hue = (float) setup->hue * PI + PI / 180 * ext_decoder_hue; |
234 | float sat = (float) setup->saturation + 1; |
235 | float const* decoder = setup->decoder_matrix; |
236 | if ( !decoder ) |
237 | { |
238 | decoder = default_decoder; |
239 | if ( STD_HUE_CONDITION( setup ) ) |
240 | hue += PI / 180 * (std_decoder_hue - ext_decoder_hue); |
241 | } |
242 | |
243 | { |
244 | float s = (float) sin( hue ) * sat; |
245 | float c = (float) cos( hue ) * sat; |
246 | float* out = impl->to_rgb; |
247 | int n; |
248 | |
249 | n = burst_count; |
250 | do |
251 | { |
252 | float const* in = decoder; |
253 | int n = 3; |
254 | do |
255 | { |
256 | float i = *in++; |
257 | float q = *in++; |
258 | *out++ = i * c - q * s; |
259 | *out++ = i * s + q * c; |
260 | } |
261 | while ( --n ); |
262 | if ( burst_count <= 1 ) |
263 | break; |
264 | ROTATE_IQ( s, c, 0.866025f, -0.5f ); /* +120 degrees */ |
265 | } |
266 | while ( --n ); |
267 | } |
268 | } |
269 | } |
270 | |
271 | /* kernel generation */ |
272 | |
273 | #define RGB_TO_YIQ( r, g, b, y, i ) (\ |
274 | (y = (r) * 0.299f + (g) * 0.587f + (b) * 0.114f),\ |
275 | (i = (r) * 0.596f - (g) * 0.275f - (b) * 0.321f),\ |
276 | ((r) * 0.212f - (g) * 0.523f + (b) * 0.311f)\ |
277 | ) |
278 | |
279 | #define YIQ_TO_RGB( y, i, q, to_rgb, type, r, g ) (\ |
280 | r = (type) (y + to_rgb [0] * i + to_rgb [1] * q),\ |
281 | g = (type) (y + to_rgb [2] * i + to_rgb [3] * q),\ |
282 | (type) (y + to_rgb [4] * i + to_rgb [5] * q)\ |
283 | ) |
284 | |
285 | #define PACK_RGB( r, g, b ) ((r) << 21 | (g) << 11 | (b) << 1) |
286 | |
287 | enum { rgb_kernel_size = burst_size / alignment_count }; |
288 | enum { rgb_bias = rgb_unit * 2 * snes_ntsc_rgb_builder }; |
289 | |
290 | typedef struct pixel_info_t |
291 | { |
292 | int offset; |
293 | float negate; |
294 | float kernel [4]; |
295 | } pixel_info_t; |
296 | |
297 | #if rescale_in > 1 |
298 | #define PIXEL_OFFSET_( ntsc, scaled ) \ |
299 | (kernel_size / 2 + ntsc + (scaled != 0) + (rescale_out - scaled) % rescale_out + \ |
300 | (kernel_size * 2 * scaled)) |
301 | |
302 | #define PIXEL_OFFSET( ntsc, scaled ) \ |
303 | PIXEL_OFFSET_( ((ntsc) - (scaled) / rescale_out * rescale_in),\ |
304 | (((scaled) + rescale_out * 10) % rescale_out) ),\ |
305 | (1.0f - (((ntsc) + 100) & 2)) |
306 | #else |
307 | #define PIXEL_OFFSET( ntsc, scaled ) \ |
308 | (kernel_size / 2 + (ntsc) - (scaled)),\ |
309 | (1.0f - (((ntsc) + 100) & 2)) |
310 | #endif |
311 | |
312 | extern pixel_info_t const snes_ntsc_pixels [alignment_count]; |
313 | |
314 | /* Generate pixel at all burst phases and column alignments */ |
315 | static void gen_kernel( init_t* impl, float y, float i, float q, snes_ntsc_rgb_t* out ) |
316 | { |
317 | /* generate for each scanline burst phase */ |
318 | float const* to_rgb = impl->to_rgb; |
319 | int burst_remain = burst_count; |
320 | y -= rgb_offset; |
321 | do |
322 | { |
323 | /* Encode yiq into *two* composite signals (to allow control over artifacting). |
324 | Convolve these with kernels which: filter respective components, apply |
325 | sharpening, and rescale horizontally. Convert resulting yiq to rgb and pack |
326 | into integer. Based on algorithm by NewRisingSun. */ |
327 | pixel_info_t const* pixel = snes_ntsc_pixels; |
328 | int alignment_remain = alignment_count; |
329 | do |
330 | { |
331 | /* negate is -1 when composite starts at odd multiple of 2 */ |
332 | float const yy = y * impl->fringing * pixel->negate; |
333 | float const ic0 = (i + yy) * pixel->kernel [0]; |
334 | float const qc1 = (q + yy) * pixel->kernel [1]; |
335 | float const ic2 = (i - yy) * pixel->kernel [2]; |
336 | float const qc3 = (q - yy) * pixel->kernel [3]; |
337 | |
338 | float const factor = impl->artifacts * pixel->negate; |
339 | float const ii = i * factor; |
340 | float const yc0 = (y + ii) * pixel->kernel [0]; |
341 | float const yc2 = (y - ii) * pixel->kernel [2]; |
342 | |
343 | float const qq = q * factor; |
344 | float const yc1 = (y + qq) * pixel->kernel [1]; |
345 | float const yc3 = (y - qq) * pixel->kernel [3]; |
346 | |
347 | float const* k = &impl->kernel [pixel->offset]; |
348 | int n; |
349 | ++pixel; |
350 | for ( n = rgb_kernel_size; n; --n ) |
351 | { |
352 | float i = k[0]*ic0 + k[2]*ic2; |
353 | float q = k[1]*qc1 + k[3]*qc3; |
354 | float y = k[kernel_size+0]*yc0 + k[kernel_size+1]*yc1 + |
355 | k[kernel_size+2]*yc2 + k[kernel_size+3]*yc3 + rgb_offset; |
356 | if ( rescale_out <= 1 ) |
357 | k--; |
358 | else if ( k < &impl->kernel [kernel_size * 2 * (rescale_out - 1)] ) |
359 | k += kernel_size * 2 - 1; |
360 | else |
361 | k -= kernel_size * 2 * (rescale_out - 1) + 2; |
362 | { |
363 | int r, g, b = YIQ_TO_RGB( y, i, q, to_rgb, int, r, g ); |
364 | *out++ = PACK_RGB( r, g, b ) - rgb_bias; |
365 | } |
366 | } |
367 | } |
368 | while ( alignment_count > 1 && --alignment_remain ); |
369 | |
370 | if ( burst_count <= 1 ) |
371 | break; |
372 | |
373 | to_rgb += 6; |
374 | |
375 | ROTATE_IQ( i, q, -0.866025f, -0.5f ); /* -120 degrees */ |
376 | } |
377 | while ( --burst_remain ); |
378 | } |
379 | |
380 | static void correct_errors( snes_ntsc_rgb_t color, snes_ntsc_rgb_t* out ); |
381 | |
382 | #if DISABLE_CORRECTION |
383 | #define CORRECT_ERROR( a ) { out [i] += rgb_bias; } |
384 | #define DISTRIBUTE_ERROR( a, b, c ) { out [i] += rgb_bias; } |
385 | #else |
386 | #define CORRECT_ERROR( a ) { out [a] += error; } |
387 | #define DISTRIBUTE_ERROR( a, b, c ) {\ |
388 | snes_ntsc_rgb_t fourth = (error + 2 * snes_ntsc_rgb_builder) >> 2;\ |
389 | fourth &= (rgb_bias >> 1) - snes_ntsc_rgb_builder;\ |
390 | fourth -= rgb_bias >> 2;\ |
391 | out [a] += fourth;\ |
392 | out [b] += fourth;\ |
393 | out [c] += fourth;\ |
394 | out [i] += error - (fourth * 3);\ |
395 | } |
396 | #endif |
397 | |
398 | #define RGB_PALETTE_OUT( rgb, out_ )\ |
399 | {\ |
400 | unsigned char* out = (out_);\ |
401 | snes_ntsc_rgb_t clamped = (rgb);\ |
402 | SNES_NTSC_CLAMP_( clamped, (8 - rgb_bits) );\ |
403 | out [0] = (unsigned char) (clamped >> 21);\ |
404 | out [1] = (unsigned char) (clamped >> 11);\ |
405 | out [2] = (unsigned char) (clamped >> 1);\ |
406 | } |
407 | |
408 | /* blitter related */ |
409 | |
410 | #ifndef restrict |
411 | #if defined (__GNUC__) |
412 | #define restrict __restrict__ |
413 | #elif defined (_MSC_VER) && _MSC_VER > 1300 |
414 | #define restrict __restrict |
415 | #else |
416 | /* no support for restricted pointers */ |
417 | #define restrict |
418 | #endif |
419 | #endif |
420 | |
421 | #include <limits.h> |
422 | |
423 | #if SNES_NTSC_OUT_DEPTH <= 16 |
424 | #if USHRT_MAX == 0xFFFF |
425 | typedef unsigned short snes_ntsc_out_t; |
426 | #else |
427 | #error "Need 16-bit int type" |
428 | #endif |
429 | |
430 | #else |
431 | #if UINT_MAX == 0xFFFFFFFF |
432 | typedef unsigned int snes_ntsc_out_t; |
433 | #elif ULONG_MAX == 0xFFFFFFFF |
434 | typedef unsigned long snes_ntsc_out_t; |
435 | #else |
436 | #error "Need 32-bit int type" |
437 | #endif |
438 | |
439 | #endif |
440 | |