1 | /******************************************************************** |
2 | * * |
3 | * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * |
4 | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
5 | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
6 | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
7 | * * |
8 | * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * |
9 | * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * |
10 | * * |
11 | ******************************************************************** |
12 | |
13 | function: |
14 | last mod: $Id$ |
15 | |
16 | ********************************************************************/ |
17 | |
18 | #include <stdlib.h> |
19 | #include <string.h> |
20 | #include "state.h" |
21 | #if defined(OC_DUMP_IMAGES) |
22 | # include <stdio.h> |
23 | # include "png.h" |
24 | # include "zlib.h" |
25 | #endif |
26 | |
27 | /*The function used to fill in the chroma plane motion vectors for a macro |
28 | block when 4 different motion vectors are specified in the luma plane. |
29 | This version is for use with chroma decimated in the X and Y directions |
30 | (4:2:0). |
31 | _cbmvs: The chroma block-level motion vectors to fill in. |
32 | _lbmvs: The luma block-level motion vectors.*/ |
33 | static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ |
34 | int dx; |
35 | int dy; |
36 | dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1]) |
37 | +OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]); |
38 | dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1]) |
39 | +OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]); |
40 | _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,2,2),OC_DIV_ROUND_POW2(dy,2,2)); |
41 | } |
42 | |
43 | /*The function used to fill in the chroma plane motion vectors for a macro |
44 | block when 4 different motion vectors are specified in the luma plane. |
45 | This version is for use with chroma decimated in the Y direction. |
46 | _cbmvs: The chroma block-level motion vectors to fill in. |
47 | _lbmvs: The luma block-level motion vectors.*/ |
48 | static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ |
49 | int dx; |
50 | int dy; |
51 | dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[2]); |
52 | dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[2]); |
53 | _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1)); |
54 | dx=OC_MV_X(_lbmvs[1])+OC_MV_X(_lbmvs[3]); |
55 | dy=OC_MV_Y(_lbmvs[1])+OC_MV_Y(_lbmvs[3]); |
56 | _cbmvs[1]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1)); |
57 | } |
58 | |
59 | /*The function used to fill in the chroma plane motion vectors for a macro |
60 | block when 4 different motion vectors are specified in the luma plane. |
61 | This version is for use with chroma decimated in the X direction (4:2:2). |
62 | _cbmvs: The chroma block-level motion vectors to fill in. |
63 | _lbmvs: The luma block-level motion vectors.*/ |
64 | static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ |
65 | int dx; |
66 | int dy; |
67 | dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1]); |
68 | dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1]); |
69 | _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1)); |
70 | dx=OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]); |
71 | dy=OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]); |
72 | _cbmvs[2]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1)); |
73 | } |
74 | |
75 | /*The function used to fill in the chroma plane motion vectors for a macro |
76 | block when 4 different motion vectors are specified in the luma plane. |
77 | This version is for use with no chroma decimation (4:4:4). |
78 | _cbmvs: The chroma block-level motion vectors to fill in. |
79 | _lmbmv: The luma macro-block level motion vector to fill in for use in |
80 | prediction. |
81 | _lbmvs: The luma block-level motion vectors.*/ |
82 | static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){ |
83 | _cbmvs[0]=_lbmvs[0]; |
84 | _cbmvs[1]=_lbmvs[1]; |
85 | _cbmvs[2]=_lbmvs[2]; |
86 | _cbmvs[3]=_lbmvs[3]; |
87 | } |
88 | |
89 | /*A table of functions used to fill in the chroma plane motion vectors for a |
90 | macro block when 4 different motion vectors are specified in the luma |
91 | plane.*/ |
92 | const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={ |
93 | (oc_set_chroma_mvs_func)oc_set_chroma_mvs00, |
94 | (oc_set_chroma_mvs_func)oc_set_chroma_mvs01, |
95 | (oc_set_chroma_mvs_func)oc_set_chroma_mvs10, |
96 | (oc_set_chroma_mvs_func)oc_set_chroma_mvs11 |
97 | }; |
98 | |
99 | |
100 | |
101 | /*Returns the fragment index of the top-left block in a macro block. |
102 | This can be used to test whether or not the whole macro block is valid. |
103 | _sb_map: The super block map. |
104 | _quadi: The quadrant number. |
105 | Return: The index of the fragment of the upper left block in the macro |
106 | block, or -1 if the block lies outside the coded frame.*/ |
107 | static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){ |
108 | /*It so happens that under the Hilbert curve ordering described below, the |
109 | upper-left block in each macro block is at index 0, except in macro block |
110 | 3, where it is at index 2.*/ |
111 | return _sb_map[_quadi][_quadi&_quadi<<1]; |
112 | } |
113 | |
114 | /*Fills in the mapping from block positions to fragment numbers for a single |
115 | color plane. |
116 | This function also fills in the "valid" flag of each quadrant in the super |
117 | block flags. |
118 | _sb_maps: The array of super block maps for the color plane. |
119 | _sb_flags: The array of super block flags for the color plane. |
120 | _frag0: The index of the first fragment in the plane. |
121 | _hfrags: The number of horizontal fragments in a coded frame. |
122 | _vfrags: The number of vertical fragments in a coded frame.*/ |
123 | static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[], |
124 | oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){ |
125 | /*Contains the (macro_block,block) indices for a 4x4 grid of |
126 | fragments. |
127 | The pattern is a 4x4 Hilbert space-filling curve. |
128 | A Hilbert curve has the nice property that as the curve grows larger, its |
129 | fractal dimension approaches 2. |
130 | The intuition is that nearby blocks in the curve are also close spatially, |
131 | with the previous element always an immediate neighbor, so that runs of |
132 | blocks should be well correlated.*/ |
133 | static const int SB_MAP[4][4][2]={ |
134 | {{0,0},{0,1},{3,2},{3,3}}, |
135 | {{0,3},{0,2},{3,1},{3,0}}, |
136 | {{1,0},{1,3},{2,0},{2,3}}, |
137 | {{1,1},{1,2},{2,1},{2,2}} |
138 | }; |
139 | ptrdiff_t yfrag; |
140 | unsigned sbi; |
141 | int y; |
142 | sbi=0; |
143 | yfrag=_frag0; |
144 | for(y=0;;y+=4){ |
145 | int imax; |
146 | int x; |
147 | /*Figure out how many columns of blocks in this super block lie within the |
148 | image.*/ |
149 | imax=_vfrags-y; |
150 | if(imax>4)imax=4; |
151 | else if(imax<=0)break; |
152 | for(x=0;;x+=4,sbi++){ |
153 | ptrdiff_t xfrag; |
154 | int jmax; |
155 | int quadi; |
156 | int i; |
157 | /*Figure out how many rows of blocks in this super block lie within the |
158 | image.*/ |
159 | jmax=_hfrags-x; |
160 | if(jmax>4)jmax=4; |
161 | else if(jmax<=0)break; |
162 | /*By default, set all fragment indices to -1.*/ |
163 | memset(_sb_maps[sbi],0xFF,sizeof(_sb_maps[sbi])); |
164 | /*Fill in the fragment map for this super block.*/ |
165 | xfrag=yfrag+x; |
166 | for(i=0;i<imax;i++){ |
167 | int j; |
168 | for(j=0;j<jmax;j++){ |
169 | _sb_maps[sbi][SB_MAP[i][j][0]][SB_MAP[i][j][1]]=xfrag+j; |
170 | } |
171 | xfrag+=_hfrags; |
172 | } |
173 | /*Mark which quadrants of this super block lie within the image.*/ |
174 | for(quadi=0;quadi<4;quadi++){ |
175 | _sb_flags[sbi].quad_valid|= |
176 | (oc_sb_quad_top_left_frag(_sb_maps[sbi],quadi)>=0)<<quadi; |
177 | } |
178 | } |
179 | yfrag+=_hfrags<<2; |
180 | } |
181 | } |
182 | |
183 | /*Fills in the Y plane fragment map for a macro block given the fragment |
184 | coordinates of its upper-left hand corner. |
185 | _mb_map: The macro block map to fill. |
186 | _fplane: The description of the Y plane. |
187 | _xfrag0: The X location of the upper-left hand fragment in the luma plane. |
188 | _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ |
189 | static void oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3], |
190 | const oc_fragment_plane *_fplane,int _xfrag0,int _yfrag0){ |
191 | int i; |
192 | int j; |
193 | for(i=0;i<2;i++)for(j=0;j<2;j++){ |
194 | _mb_map[0][i<<1|j]=(_yfrag0+i)*(ptrdiff_t)_fplane->nhfrags+_xfrag0+j; |
195 | } |
196 | } |
197 | |
198 | /*Fills in the chroma plane fragment maps for a macro block. |
199 | This version is for use with chroma decimated in the X and Y directions |
200 | (4:2:0). |
201 | _mb_map: The macro block map to fill. |
202 | _fplanes: The descriptions of the fragment planes. |
203 | _xfrag0: The X location of the upper-left hand fragment in the luma plane. |
204 | _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ |
205 | static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3], |
206 | const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ |
207 | ptrdiff_t fragi; |
208 | _xfrag0>>=1; |
209 | _yfrag0>>=1; |
210 | fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; |
211 | _mb_map[1][0]=fragi+_fplanes[1].froffset; |
212 | _mb_map[2][0]=fragi+_fplanes[2].froffset; |
213 | } |
214 | |
215 | /*Fills in the chroma plane fragment maps for a macro block. |
216 | This version is for use with chroma decimated in the Y direction. |
217 | _mb_map: The macro block map to fill. |
218 | _fplanes: The descriptions of the fragment planes. |
219 | _xfrag0: The X location of the upper-left hand fragment in the luma plane. |
220 | _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ |
221 | static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3], |
222 | const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ |
223 | ptrdiff_t fragi; |
224 | int j; |
225 | _yfrag0>>=1; |
226 | fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; |
227 | for(j=0;j<2;j++){ |
228 | _mb_map[1][j]=fragi+_fplanes[1].froffset; |
229 | _mb_map[2][j]=fragi+_fplanes[2].froffset; |
230 | fragi++; |
231 | } |
232 | } |
233 | |
234 | /*Fills in the chroma plane fragment maps for a macro block. |
235 | This version is for use with chroma decimated in the X direction (4:2:2). |
236 | _mb_map: The macro block map to fill. |
237 | _fplanes: The descriptions of the fragment planes. |
238 | _xfrag0: The X location of the upper-left hand fragment in the luma plane. |
239 | _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ |
240 | static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3], |
241 | const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ |
242 | ptrdiff_t fragi; |
243 | int i; |
244 | _xfrag0>>=1; |
245 | fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0; |
246 | for(i=0;i<2;i++){ |
247 | _mb_map[1][i<<1]=fragi+_fplanes[1].froffset; |
248 | _mb_map[2][i<<1]=fragi+_fplanes[2].froffset; |
249 | fragi+=_fplanes[1].nhfrags; |
250 | } |
251 | } |
252 | |
253 | /*Fills in the chroma plane fragment maps for a macro block. |
254 | This version is for use with no chroma decimation (4:4:4). |
255 | This uses the already filled-in luma plane values. |
256 | _mb_map: The macro block map to fill. |
257 | _fplanes: The descriptions of the fragment planes. |
258 | _xfrag0: The X location of the upper-left hand fragment in the luma plane. |
259 | _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ |
260 | static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3], |
261 | const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){ |
262 | int k; |
263 | (void)_xfrag0; |
264 | (void)_yfrag0; |
265 | for(k=0;k<4;k++){ |
266 | _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset; |
267 | _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset; |
268 | } |
269 | } |
270 | |
271 | /*The function type used to fill in the chroma plane fragment maps for a |
272 | macro block. |
273 | _mb_map: The macro block map to fill. |
274 | _fplanes: The descriptions of the fragment planes. |
275 | _xfrag0: The X location of the upper-left hand fragment in the luma plane. |
276 | _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/ |
277 | typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3], |
278 | const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0); |
279 | |
280 | /*A table of functions used to fill in the chroma plane fragment maps for a |
281 | macro block for each type of chrominance decimation.*/ |
282 | static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={ |
283 | oc_mb_fill_cmapping00, |
284 | oc_mb_fill_cmapping01, |
285 | oc_mb_fill_cmapping10, |
286 | oc_mb_fill_cmapping11 |
287 | }; |
288 | |
289 | /*Fills in the mapping from macro blocks to their corresponding fragment |
290 | numbers in each plane. |
291 | _mb_maps: The list of macro block maps. |
292 | _mb_modes: The list of macro block modes; macro blocks completely outside |
293 | the coded region are marked invalid. |
294 | _fplanes: The descriptions of the fragment planes. |
295 | _pixel_fmt: The chroma decimation type.*/ |
296 | static void oc_mb_create_mapping(oc_mb_map _mb_maps[], |
297 | signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){ |
298 | oc_mb_fill_cmapping_func mb_fill_cmapping; |
299 | unsigned sbi; |
300 | int y; |
301 | mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt]; |
302 | /*Loop through the luma plane super blocks.*/ |
303 | for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){ |
304 | int x; |
305 | for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){ |
306 | int ymb; |
307 | /*Loop through the macro blocks in each super block in display order.*/ |
308 | for(ymb=0;ymb<2;ymb++){ |
309 | int xmb; |
310 | for(xmb=0;xmb<2;xmb++){ |
311 | unsigned mbi; |
312 | int mbx; |
313 | int mby; |
314 | mbi=sbi<<2|OC_MB_MAP[ymb][xmb]; |
315 | mbx=x|xmb<<1; |
316 | mby=y|ymb<<1; |
317 | /*Initialize fragment indices to -1.*/ |
318 | memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi])); |
319 | /*Make sure this macro block is within the encoded region.*/ |
320 | if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){ |
321 | _mb_modes[mbi]=OC_MODE_INVALID; |
322 | continue; |
323 | } |
324 | /*Fill in the fragment indices for the luma plane.*/ |
325 | oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby); |
326 | /*Fill in the fragment indices for the chroma planes.*/ |
327 | (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby); |
328 | } |
329 | } |
330 | } |
331 | } |
332 | } |
333 | |
334 | /*Marks the fragments which fall all or partially outside the displayable |
335 | region of the frame. |
336 | _state: The Theora state containing the fragments to be marked.*/ |
337 | static void oc_state_border_init(oc_theora_state *_state){ |
338 | oc_fragment *frag; |
339 | oc_fragment *yfrag_end; |
340 | oc_fragment *xfrag_end; |
341 | oc_fragment_plane *fplane; |
342 | int crop_x0; |
343 | int crop_y0; |
344 | int crop_xf; |
345 | int crop_yf; |
346 | int pli; |
347 | int y; |
348 | int x; |
349 | /*The method we use here is slow, but the code is dead simple and handles |
350 | all the special cases easily. |
351 | We only ever need to do it once.*/ |
352 | /*Loop through the fragments, marking those completely outside the |
353 | displayable region and constructing a border mask for those that straddle |
354 | the border.*/ |
355 | _state->nborders=0; |
356 | yfrag_end=frag=_state->frags; |
357 | for(pli=0;pli<3;pli++){ |
358 | fplane=_state->fplanes+pli; |
359 | /*Set up the cropping rectangle for this plane.*/ |
360 | crop_x0=_state->info.pic_x; |
361 | crop_xf=_state->info.pic_x+_state->info.pic_width; |
362 | crop_y0=_state->info.pic_y; |
363 | crop_yf=_state->info.pic_y+_state->info.pic_height; |
364 | if(pli>0){ |
365 | if(!(_state->info.pixel_fmt&1)){ |
366 | crop_x0=crop_x0>>1; |
367 | crop_xf=crop_xf+1>>1; |
368 | } |
369 | if(!(_state->info.pixel_fmt&2)){ |
370 | crop_y0=crop_y0>>1; |
371 | crop_yf=crop_yf+1>>1; |
372 | } |
373 | } |
374 | y=0; |
375 | for(yfrag_end+=fplane->nfrags;frag<yfrag_end;y+=8){ |
376 | x=0; |
377 | for(xfrag_end=frag+fplane->nhfrags;frag<xfrag_end;frag++,x+=8){ |
378 | /*First check to see if this fragment is completely outside the |
379 | displayable region.*/ |
380 | /*Note the special checks for an empty cropping rectangle. |
381 | This guarantees that if we count a fragment as straddling the |
382 | border below, at least one pixel in the fragment will be inside |
383 | the displayable region.*/ |
384 | if(x+8<=crop_x0||crop_xf<=x||y+8<=crop_y0||crop_yf<=y|| |
385 | crop_x0>=crop_xf||crop_y0>=crop_yf){ |
386 | frag->invalid=1; |
387 | } |
388 | /*Otherwise, check to see if it straddles the border.*/ |
389 | else if(x<crop_x0&&crop_x0<x+8||x<crop_xf&&crop_xf<x+8|| |
390 | y<crop_y0&&crop_y0<y+8||y<crop_yf&&crop_yf<y+8){ |
391 | ogg_int64_t mask; |
392 | int npixels; |
393 | int i; |
394 | mask=npixels=0; |
395 | for(i=0;i<8;i++){ |
396 | int j; |
397 | for(j=0;j<8;j++){ |
398 | if(x+j>=crop_x0&&x+j<crop_xf&&y+i>=crop_y0&&y+i<crop_yf){ |
399 | mask|=(ogg_int64_t)1<<(i<<3|j); |
400 | npixels++; |
401 | } |
402 | } |
403 | } |
404 | /*Search the fragment array for border info with the same pattern. |
405 | In general, there will be at most 8 different patterns (per |
406 | plane).*/ |
407 | for(i=0;;i++){ |
408 | if(i>=_state->nborders){ |
409 | _state->nborders++; |
410 | _state->borders[i].mask=mask; |
411 | _state->borders[i].npixels=npixels; |
412 | } |
413 | else if(_state->borders[i].mask!=mask)continue; |
414 | frag->borderi=i; |
415 | break; |
416 | } |
417 | } |
418 | else frag->borderi=-1; |
419 | } |
420 | } |
421 | } |
422 | } |
423 | |
424 | static int oc_state_frarray_init(oc_theora_state *_state){ |
425 | int yhfrags; |
426 | int yvfrags; |
427 | int chfrags; |
428 | int cvfrags; |
429 | ptrdiff_t yfrags; |
430 | ptrdiff_t cfrags; |
431 | ptrdiff_t nfrags; |
432 | unsigned yhsbs; |
433 | unsigned yvsbs; |
434 | unsigned chsbs; |
435 | unsigned cvsbs; |
436 | unsigned ysbs; |
437 | unsigned csbs; |
438 | unsigned nsbs; |
439 | size_t nmbs; |
440 | int hdec; |
441 | int vdec; |
442 | int pli; |
443 | /*Figure out the number of fragments in each plane.*/ |
444 | /*These parameters have already been validated to be multiples of 16.*/ |
445 | yhfrags=_state->info.frame_width>>3; |
446 | yvfrags=_state->info.frame_height>>3; |
447 | hdec=!(_state->info.pixel_fmt&1); |
448 | vdec=!(_state->info.pixel_fmt&2); |
449 | chfrags=yhfrags+hdec>>hdec; |
450 | cvfrags=yvfrags+vdec>>vdec; |
451 | yfrags=yhfrags*(ptrdiff_t)yvfrags; |
452 | cfrags=chfrags*(ptrdiff_t)cvfrags; |
453 | nfrags=yfrags+2*cfrags; |
454 | /*Figure out the number of super blocks in each plane.*/ |
455 | yhsbs=yhfrags+3>>2; |
456 | yvsbs=yvfrags+3>>2; |
457 | chsbs=chfrags+3>>2; |
458 | cvsbs=cvfrags+3>>2; |
459 | ysbs=yhsbs*yvsbs; |
460 | csbs=chsbs*cvsbs; |
461 | nsbs=ysbs+2*csbs; |
462 | nmbs=(size_t)ysbs<<2; |
463 | /*Check for overflow. |
464 | We support the ridiculous upper limits of the specification (1048560 by |
465 | 1048560, or 3 TB frames) if the target architecture has 64-bit pointers, |
466 | but for those with 32-bit pointers (or smaller!) we have to check. |
467 | If the caller wants to prevent denial-of-service by imposing a more |
468 | reasonable upper limit on the size of attempted allocations, they must do |
469 | so themselves; we have no platform independent way to determine how much |
470 | system memory there is nor an application-independent way to decide what a |
471 | "reasonable" allocation is.*/ |
472 | if(yfrags/yhfrags!=yvfrags||2*cfrags<cfrags||nfrags<yfrags|| |
473 | ysbs/yhsbs!=yvsbs||2*csbs<csbs||nsbs<ysbs||nmbs>>2!=ysbs){ |
474 | return TH_EIMPL; |
475 | } |
476 | /*Initialize the fragment array.*/ |
477 | _state->fplanes[0].nhfrags=yhfrags; |
478 | _state->fplanes[0].nvfrags=yvfrags; |
479 | _state->fplanes[0].froffset=0; |
480 | _state->fplanes[0].nfrags=yfrags; |
481 | _state->fplanes[0].nhsbs=yhsbs; |
482 | _state->fplanes[0].nvsbs=yvsbs; |
483 | _state->fplanes[0].sboffset=0; |
484 | _state->fplanes[0].nsbs=ysbs; |
485 | _state->fplanes[1].nhfrags=_state->fplanes[2].nhfrags=chfrags; |
486 | _state->fplanes[1].nvfrags=_state->fplanes[2].nvfrags=cvfrags; |
487 | _state->fplanes[1].froffset=yfrags; |
488 | _state->fplanes[2].froffset=yfrags+cfrags; |
489 | _state->fplanes[1].nfrags=_state->fplanes[2].nfrags=cfrags; |
490 | _state->fplanes[1].nhsbs=_state->fplanes[2].nhsbs=chsbs; |
491 | _state->fplanes[1].nvsbs=_state->fplanes[2].nvsbs=cvsbs; |
492 | _state->fplanes[1].sboffset=ysbs; |
493 | _state->fplanes[2].sboffset=ysbs+csbs; |
494 | _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs; |
495 | _state->nfrags=nfrags; |
496 | _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags)); |
497 | _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs)); |
498 | _state->nsbs=nsbs; |
499 | _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps)); |
500 | _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags)); |
501 | _state->nhmbs=yhsbs<<1; |
502 | _state->nvmbs=yvsbs<<1; |
503 | _state->nmbs=nmbs; |
504 | _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps)); |
505 | _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes)); |
506 | _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis)); |
507 | if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL|| |
508 | _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL|| |
509 | _state->coded_fragis==NULL){ |
510 | return TH_EFAULT; |
511 | } |
512 | /*Create the mapping from super blocks to fragments.*/ |
513 | for(pli=0;pli<3;pli++){ |
514 | oc_fragment_plane *fplane; |
515 | fplane=_state->fplanes+pli; |
516 | oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset, |
517 | _state->sb_flags+fplane->sboffset,fplane->froffset, |
518 | fplane->nhfrags,fplane->nvfrags); |
519 | } |
520 | /*Create the mapping from macro blocks to fragments.*/ |
521 | oc_mb_create_mapping(_state->mb_maps,_state->mb_modes, |
522 | _state->fplanes,_state->info.pixel_fmt); |
523 | /*Initialize the invalid and borderi fields of each fragment.*/ |
524 | oc_state_border_init(_state); |
525 | return 0; |
526 | } |
527 | |
528 | static void oc_state_frarray_clear(oc_theora_state *_state){ |
529 | _ogg_free(_state->coded_fragis); |
530 | _ogg_free(_state->mb_modes); |
531 | _ogg_free(_state->mb_maps); |
532 | _ogg_free(_state->sb_flags); |
533 | _ogg_free(_state->sb_maps); |
534 | _ogg_free(_state->frag_mvs); |
535 | _ogg_free(_state->frags); |
536 | } |
537 | |
538 | |
539 | /*Initializes the buffers used for reconstructed frames. |
540 | These buffers are padded with 16 extra pixels on each side, to allow |
541 | unrestricted motion vectors without special casing the boundary. |
542 | If chroma is decimated in either direction, the padding is reduced by a |
543 | factor of 2 on the appropriate sides. |
544 | _nrefs: The number of reference buffers to init; must be in the range 3...6.*/ |
545 | static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){ |
546 | th_info *info; |
547 | unsigned char *ref_frame_data; |
548 | size_t ref_frame_data_sz; |
549 | size_t ref_frame_sz; |
550 | size_t yplane_sz; |
551 | size_t cplane_sz; |
552 | int yhstride; |
553 | int yheight; |
554 | int chstride; |
555 | int cheight; |
556 | ptrdiff_t align; |
557 | ptrdiff_t yoffset; |
558 | ptrdiff_t coffset; |
559 | ptrdiff_t *frag_buf_offs; |
560 | ptrdiff_t fragi; |
561 | int hdec; |
562 | int vdec; |
563 | int rfi; |
564 | int pli; |
565 | if(_nrefs<3||_nrefs>6)return TH_EINVAL; |
566 | info=&_state->info; |
567 | /*Compute the image buffer parameters for each plane.*/ |
568 | hdec=!(info->pixel_fmt&1); |
569 | vdec=!(info->pixel_fmt&2); |
570 | yhstride=info->frame_width+2*OC_UMV_PADDING; |
571 | yheight=info->frame_height+2*OC_UMV_PADDING; |
572 | /*Require 16-byte aligned rows in the chroma planes.*/ |
573 | chstride=(yhstride>>hdec)+15&~15; |
574 | cheight=yheight>>vdec; |
575 | yplane_sz=yhstride*(size_t)yheight; |
576 | cplane_sz=chstride*(size_t)cheight; |
577 | yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride; |
578 | coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride; |
579 | /*Although we guarantee the rows of the chroma planes are a multiple of 16 |
580 | bytes, the initial padding on the first row may only be 8 bytes. |
581 | Compute the offset needed to the actual image data to a multiple of 16.*/ |
582 | align=-coffset&15; |
583 | ref_frame_sz=yplane_sz+2*cplane_sz+16; |
584 | ref_frame_data_sz=_nrefs*ref_frame_sz; |
585 | /*Check for overflow. |
586 | The same caveats apply as for oc_state_frarray_init().*/ |
587 | if(yplane_sz/yhstride!=(size_t)yheight||2*cplane_sz+16<cplane_sz|| |
588 | ref_frame_sz<yplane_sz||ref_frame_data_sz/_nrefs!=ref_frame_sz){ |
589 | return TH_EIMPL; |
590 | } |
591 | ref_frame_data=oc_aligned_malloc(ref_frame_data_sz,16); |
592 | frag_buf_offs=_state->frag_buf_offs= |
593 | _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs)); |
594 | if(ref_frame_data==NULL||frag_buf_offs==NULL){ |
595 | _ogg_free(frag_buf_offs); |
596 | oc_aligned_free(ref_frame_data); |
597 | return TH_EFAULT; |
598 | } |
599 | /*Set up the width, height and stride for the image buffers.*/ |
600 | _state->ref_frame_bufs[0][0].width=info->frame_width; |
601 | _state->ref_frame_bufs[0][0].height=info->frame_height; |
602 | _state->ref_frame_bufs[0][0].stride=yhstride; |
603 | _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width= |
604 | info->frame_width>>hdec; |
605 | _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height= |
606 | info->frame_height>>vdec; |
607 | _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride= |
608 | chstride; |
609 | for(rfi=1;rfi<_nrefs;rfi++){ |
610 | memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0], |
611 | sizeof(_state->ref_frame_bufs[0])); |
612 | } |
613 | _state->ref_frame_handle=ref_frame_data; |
614 | /*Set up the data pointers for the image buffers.*/ |
615 | for(rfi=0;rfi<_nrefs;rfi++){ |
616 | _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset; |
617 | ref_frame_data+=yplane_sz+align; |
618 | _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset; |
619 | ref_frame_data+=cplane_sz; |
620 | _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset; |
621 | ref_frame_data+=cplane_sz+(16-align); |
622 | /*Flip the buffer upside down. |
623 | This allows us to decode Theora's bottom-up frames in their natural |
624 | order, yet return a top-down buffer with a positive stride to the user.*/ |
625 | oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi], |
626 | _state->ref_frame_bufs[rfi]); |
627 | } |
628 | _state->ref_ystride[0]=-yhstride; |
629 | _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride; |
630 | /*Initialize the fragment buffer offsets.*/ |
631 | ref_frame_data=_state->ref_frame_bufs[0][0].data; |
632 | fragi=0; |
633 | for(pli=0;pli<3;pli++){ |
634 | th_img_plane *iplane; |
635 | oc_fragment_plane *fplane; |
636 | unsigned char *vpix; |
637 | ptrdiff_t stride; |
638 | ptrdiff_t vfragi_end; |
639 | int nhfrags; |
640 | iplane=_state->ref_frame_bufs[0]+pli; |
641 | fplane=_state->fplanes+pli; |
642 | vpix=iplane->data; |
643 | vfragi_end=fplane->froffset+fplane->nfrags; |
644 | nhfrags=fplane->nhfrags; |
645 | stride=iplane->stride; |
646 | while(fragi<vfragi_end){ |
647 | ptrdiff_t hfragi_end; |
648 | unsigned char *hpix; |
649 | hpix=vpix; |
650 | for(hfragi_end=fragi+nhfrags;fragi<hfragi_end;fragi++){ |
651 | frag_buf_offs[fragi]=hpix-ref_frame_data; |
652 | hpix+=8; |
653 | } |
654 | vpix+=stride<<3; |
655 | } |
656 | } |
657 | /*Initialize the reference frame pointers and indices.*/ |
658 | _state->ref_frame_idx[OC_FRAME_GOLD]= |
659 | _state->ref_frame_idx[OC_FRAME_PREV]= |
660 | _state->ref_frame_idx[OC_FRAME_GOLD_ORIG]= |
661 | _state->ref_frame_idx[OC_FRAME_PREV_ORIG]= |
662 | _state->ref_frame_idx[OC_FRAME_SELF]= |
663 | _state->ref_frame_idx[OC_FRAME_IO]=-1; |
664 | _state->ref_frame_data[OC_FRAME_GOLD]= |
665 | _state->ref_frame_data[OC_FRAME_PREV]= |
666 | _state->ref_frame_data[OC_FRAME_GOLD_ORIG]= |
667 | _state->ref_frame_data[OC_FRAME_PREV_ORIG]= |
668 | _state->ref_frame_data[OC_FRAME_SELF]= |
669 | _state->ref_frame_data[OC_FRAME_IO]=NULL; |
670 | return 0; |
671 | } |
672 | |
673 | static void oc_state_ref_bufs_clear(oc_theora_state *_state){ |
674 | _ogg_free(_state->frag_buf_offs); |
675 | oc_aligned_free(_state->ref_frame_handle); |
676 | } |
677 | |
678 | |
679 | void oc_state_accel_init_c(oc_theora_state *_state){ |
680 | _state->cpu_flags=0; |
681 | #if defined(OC_STATE_USE_VTABLE) |
682 | _state->opt_vtable.frag_copy=oc_frag_copy_c; |
683 | _state->opt_vtable.frag_copy_list=oc_frag_copy_list_c; |
684 | _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c; |
685 | _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c; |
686 | _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c; |
687 | _state->opt_vtable.idct8x8=oc_idct8x8_c; |
688 | _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c; |
689 | _state->opt_vtable.loop_filter_init=oc_loop_filter_init_c; |
690 | _state->opt_vtable.state_loop_filter_frag_rows= |
691 | oc_state_loop_filter_frag_rows_c; |
692 | _state->opt_vtable.restore_fpu=oc_restore_fpu_c; |
693 | #endif |
694 | _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG; |
695 | } |
696 | |
697 | |
698 | int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){ |
699 | int ret; |
700 | /*First validate the parameters.*/ |
701 | if(_info==NULL)return TH_EFAULT; |
702 | /*The width and height of the encoded frame must be multiples of 16. |
703 | They must also, when divided by 16, fit into a 16-bit unsigned integer. |
704 | The displayable frame offset coordinates must fit into an 8-bit unsigned |
705 | integer. |
706 | Note that the offset Y in the API is specified on the opposite side from |
707 | how it is specified in the bitstream, because the Y axis is flipped in |
708 | the bitstream. |
709 | The displayable frame must fit inside the encoded frame. |
710 | The color space must be one known by the encoder. |
711 | The framerate ratio must not contain a zero value.*/ |
712 | if((_info->frame_width&0xF)||(_info->frame_height&0xF)|| |
713 | _info->frame_width<=0||_info->frame_width>=0x100000|| |
714 | _info->frame_height<=0||_info->frame_height>=0x100000|| |
715 | _info->pic_x+_info->pic_width>_info->frame_width|| |
716 | _info->pic_y+_info->pic_height>_info->frame_height|| |
717 | _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255|| |
718 | /*Note: the following <0 comparisons may generate spurious warnings on |
719 | platforms where enums are unsigned. |
720 | We could cast them to unsigned and just use the following >= comparison, |
721 | but there are a number of compilers which will mis-optimize this. |
722 | It's better to live with the spurious warnings.*/ |
723 | _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES|| |
724 | _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS|| |
725 | _info->fps_numerator<1||_info->fps_denominator<1){ |
726 | return TH_EINVAL; |
727 | } |
728 | memset(_state,0,sizeof(*_state)); |
729 | memcpy(&_state->info,_info,sizeof(*_info)); |
730 | /*Invert the sense of pic_y to match Theora's right-handed coordinate |
731 | system.*/ |
732 | _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y; |
733 | _state->frame_type=OC_UNKWN_FRAME; |
734 | oc_state_accel_init(_state); |
735 | ret=oc_state_frarray_init(_state); |
736 | if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs); |
737 | if(ret<0){ |
738 | oc_state_frarray_clear(_state); |
739 | return ret; |
740 | } |
741 | /*If the keyframe_granule_shift is out of range, use the maximum allowable |
742 | value.*/ |
743 | if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){ |
744 | _state->info.keyframe_granule_shift=31; |
745 | } |
746 | _state->keyframe_num=0; |
747 | _state->curframe_num=-1; |
748 | /*3.2.0 streams mark the frame index instead of the frame count. |
749 | This was changed with stream version 3.2.1 to conform to other Ogg |
750 | codecs. |
751 | We add an extra bias when computing granule positions for new streams.*/ |
752 | _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1); |
753 | return 0; |
754 | } |
755 | |
756 | void oc_state_clear(oc_theora_state *_state){ |
757 | oc_state_ref_bufs_clear(_state); |
758 | oc_state_frarray_clear(_state); |
759 | } |
760 | |
761 | |
762 | /*Duplicates the pixels on the border of the image plane out into the |
763 | surrounding padding for use by unrestricted motion vectors. |
764 | This function only adds the left and right borders, and only for the fragment |
765 | rows specified. |
766 | _refi: The index of the reference buffer to pad. |
767 | _pli: The color plane. |
768 | _y0: The Y coordinate of the first row to pad. |
769 | _yend: The Y coordinate of the row to stop padding at.*/ |
770 | void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli, |
771 | int _y0,int _yend){ |
772 | th_img_plane *iplane; |
773 | unsigned char *apix; |
774 | unsigned char *bpix; |
775 | unsigned char *epix; |
776 | int stride; |
777 | int hpadding; |
778 | hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1)); |
779 | iplane=_state->ref_frame_bufs[_refi]+_pli; |
780 | stride=iplane->stride; |
781 | apix=iplane->data+_y0*(ptrdiff_t)stride; |
782 | bpix=apix+iplane->width-1; |
783 | epix=iplane->data+_yend*(ptrdiff_t)stride; |
784 | /*Note the use of != instead of <, which allows the stride to be negative.*/ |
785 | while(apix!=epix){ |
786 | memset(apix-hpadding,apix[0],hpadding); |
787 | memset(bpix+1,bpix[0],hpadding); |
788 | apix+=stride; |
789 | bpix+=stride; |
790 | } |
791 | } |
792 | |
793 | /*Duplicates the pixels on the border of the image plane out into the |
794 | surrounding padding for use by unrestricted motion vectors. |
795 | This function only adds the top and bottom borders, and must be called after |
796 | the left and right borders are added. |
797 | _refi: The index of the reference buffer to pad. |
798 | _pli: The color plane.*/ |
799 | void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){ |
800 | th_img_plane *iplane; |
801 | unsigned char *apix; |
802 | unsigned char *bpix; |
803 | unsigned char *epix; |
804 | int stride; |
805 | int hpadding; |
806 | int vpadding; |
807 | int fullw; |
808 | hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1)); |
809 | vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2)); |
810 | iplane=_state->ref_frame_bufs[_refi]+_pli; |
811 | stride=iplane->stride; |
812 | fullw=iplane->width+(hpadding<<1); |
813 | apix=iplane->data-hpadding; |
814 | bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding; |
815 | epix=apix-stride*(ptrdiff_t)vpadding; |
816 | while(apix!=epix){ |
817 | memcpy(apix-stride,apix,fullw); |
818 | memcpy(bpix+stride,bpix,fullw); |
819 | apix-=stride; |
820 | bpix+=stride; |
821 | } |
822 | } |
823 | |
824 | /*Duplicates the pixels on the border of the given reference image out into |
825 | the surrounding padding for use by unrestricted motion vectors. |
826 | _state: The context containing the reference buffers. |
827 | _refi: The index of the reference buffer to pad.*/ |
828 | void oc_state_borders_fill(oc_theora_state *_state,int _refi){ |
829 | int pli; |
830 | for(pli=0;pli<3;pli++){ |
831 | oc_state_borders_fill_rows(_state,_refi,pli,0, |
832 | _state->ref_frame_bufs[_refi][pli].height); |
833 | oc_state_borders_fill_caps(_state,_refi,pli); |
834 | } |
835 | } |
836 | |
837 | /*Determines the offsets in an image buffer to use for motion compensation. |
838 | _state: The Theora state the offsets are to be computed with. |
839 | _offsets: Returns the offset for the buffer(s). |
840 | _offsets[0] is always set. |
841 | _offsets[1] is set if the motion vector has non-zero fractional |
842 | components. |
843 | _pli: The color plane index. |
844 | _mv: The motion vector. |
845 | Return: The number of offsets returned: 1 or 2.*/ |
846 | int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2], |
847 | int _pli,oc_mv _mv){ |
848 | /*Here is a brief description of how Theora handles motion vectors: |
849 | Motion vector components are specified to half-pixel accuracy in |
850 | undecimated directions of each plane, and quarter-pixel accuracy in |
851 | decimated directions. |
852 | Integer parts are extracted by dividing (not shifting) by the |
853 | appropriate amount, with truncation towards zero. |
854 | These integer values are used to calculate the first offset. |
855 | |
856 | If either of the fractional parts are non-zero, then a second offset is |
857 | computed. |
858 | No third or fourth offsets are computed, even if both components have |
859 | non-zero fractional parts. |
860 | The second offset is computed by dividing (not shifting) by the |
861 | appropriate amount, always truncating _away_ from zero.*/ |
862 | #if 0 |
863 | /*This version of the code doesn't use any tables, but is slower.*/ |
864 | int ystride; |
865 | int xprec; |
866 | int yprec; |
867 | int xfrac; |
868 | int yfrac; |
869 | int offs; |
870 | int dx; |
871 | int dy; |
872 | ystride=_state->ref_ystride[_pli]; |
873 | /*These two variables decide whether we are in half- or quarter-pixel |
874 | precision in each component.*/ |
875 | xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1)); |
876 | yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2)); |
877 | dx=OC_MV_X(_mv); |
878 | dy=OC_MV_Y(_mv); |
879 | /*These two variables are either 0 if all the fractional bits are zero or -1 |
880 | if any of them are non-zero.*/ |
881 | xfrac=OC_SIGNMASK(-(dx&(xprec|1))); |
882 | yfrac=OC_SIGNMASK(-(dy&(yprec|1))); |
883 | offs=(dx>>xprec)+(dy>>yprec)*ystride; |
884 | if(xfrac||yfrac){ |
885 | int xmask; |
886 | int ymask; |
887 | xmask=OC_SIGNMASK(dx); |
888 | ymask=OC_SIGNMASK(dy); |
889 | yfrac&=ystride; |
890 | _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask); |
891 | _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask); |
892 | return 2; |
893 | } |
894 | else{ |
895 | _offsets[0]=offs; |
896 | return 1; |
897 | } |
898 | #else |
899 | /*Using tables simplifies the code, and there's enough arithmetic to hide the |
900 | latencies of the memory references.*/ |
901 | static const signed char OC_MVMAP[2][64]={ |
902 | { |
903 | -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8, |
904 | -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0, |
905 | 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, |
906 | 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15 |
907 | }, |
908 | { |
909 | -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4, |
910 | -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1, 0, 0, 0, |
911 | 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, |
912 | 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7 |
913 | } |
914 | }; |
915 | static const signed char OC_MVMAP2[2][64]={ |
916 | { |
917 | -1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, |
918 | 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, |
919 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, |
920 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 |
921 | }, |
922 | { |
923 | -1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, |
924 | 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, |
925 | 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, |
926 | 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 |
927 | } |
928 | }; |
929 | int ystride; |
930 | int qpx; |
931 | int qpy; |
932 | int mx; |
933 | int my; |
934 | int mx2; |
935 | int my2; |
936 | int offs; |
937 | int dx; |
938 | int dy; |
939 | ystride=_state->ref_ystride[_pli]; |
940 | qpy=_pli!=0&&!(_state->info.pixel_fmt&2); |
941 | dx=OC_MV_X(_mv); |
942 | dy=OC_MV_Y(_mv); |
943 | my=OC_MVMAP[qpy][dy+31]; |
944 | my2=OC_MVMAP2[qpy][dy+31]; |
945 | qpx=_pli!=0&&!(_state->info.pixel_fmt&1); |
946 | mx=OC_MVMAP[qpx][dx+31]; |
947 | mx2=OC_MVMAP2[qpx][dx+31]; |
948 | offs=my*ystride+mx; |
949 | if(mx2||my2){ |
950 | _offsets[1]=offs+my2*ystride+mx2; |
951 | _offsets[0]=offs; |
952 | return 2; |
953 | } |
954 | _offsets[0]=offs; |
955 | return 1; |
956 | #endif |
957 | } |
958 | |
959 | void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi, |
960 | int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){ |
961 | unsigned char *dst; |
962 | ptrdiff_t frag_buf_off; |
963 | int ystride; |
964 | int refi; |
965 | /*Apply the inverse transform.*/ |
966 | /*Special case only having a DC component.*/ |
967 | if(_last_zzi<2){ |
968 | ogg_int16_t p; |
969 | int ci; |
970 | /*We round this dequant product (and not any of the others) because there's |
971 | no iDCT rounding.*/ |
972 | p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); |
973 | /*LOOP VECTORIZES.*/ |
974 | for(ci=0;ci<64;ci++)_dct_coeffs[64+ci]=p; |
975 | } |
976 | else{ |
977 | /*First, dequantize the DC coefficient.*/ |
978 | _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); |
979 | oc_idct8x8(_state,_dct_coeffs+64,_dct_coeffs,_last_zzi); |
980 | } |
981 | /*Fill in the target buffer.*/ |
982 | frag_buf_off=_state->frag_buf_offs[_fragi]; |
983 | refi=_state->frags[_fragi].refi; |
984 | ystride=_state->ref_ystride[_pli]; |
985 | dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off; |
986 | if(refi==OC_FRAME_SELF)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs+64); |
987 | else{ |
988 | const unsigned char *ref; |
989 | int mvoffsets[2]; |
990 | ref=_state->ref_frame_data[refi]+frag_buf_off; |
991 | if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, |
992 | _state->frag_mvs[_fragi])>1){ |
993 | oc_frag_recon_inter2(_state, |
994 | dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs+64); |
995 | } |
996 | else{ |
997 | oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs+64); |
998 | } |
999 | } |
1000 | } |
1001 | |
1002 | static void loop_filter_h(unsigned char *_pix,int _ystride,signed char *_bv){ |
1003 | int y; |
1004 | _pix-=2; |
1005 | for(y=0;y<8;y++){ |
1006 | int f; |
1007 | f=_pix[0]-_pix[3]+3*(_pix[2]-_pix[1]); |
1008 | /*The _bv array is used to compute the function |
1009 | f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0)); |
1010 | where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/ |
1011 | f=*(_bv+(f+4>>3)); |
1012 | _pix[1]=OC_CLAMP255(_pix[1]+f); |
1013 | _pix[2]=OC_CLAMP255(_pix[2]-f); |
1014 | _pix+=_ystride; |
1015 | } |
1016 | } |
1017 | |
1018 | static void loop_filter_v(unsigned char *_pix,int _ystride,signed char *_bv){ |
1019 | int x; |
1020 | _pix-=_ystride*2; |
1021 | for(x=0;x<8;x++){ |
1022 | int f; |
1023 | f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]); |
1024 | /*The _bv array is used to compute the function |
1025 | f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0)); |
1026 | where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/ |
1027 | f=*(_bv+(f+4>>3)); |
1028 | _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f); |
1029 | _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f); |
1030 | } |
1031 | } |
1032 | |
1033 | /*Initialize the bounding values array used by the loop filter. |
1034 | _bv: Storage for the array. |
1035 | _flimit: The filter limit as defined in Section 7.10 of the spec.*/ |
1036 | void oc_loop_filter_init_c(signed char _bv[256],int _flimit){ |
1037 | int i; |
1038 | memset(_bv,0,sizeof(_bv[0])*256); |
1039 | for(i=0;i<_flimit;i++){ |
1040 | if(127-i-_flimit>=0)_bv[127-i-_flimit]=(signed char)(i-_flimit); |
1041 | _bv[127-i]=(signed char)(-i); |
1042 | _bv[127+i]=(signed char)(i); |
1043 | if(127+i+_flimit<256)_bv[127+i+_flimit]=(signed char)(_flimit-i); |
1044 | } |
1045 | } |
1046 | |
1047 | /*Apply the loop filter to a given set of fragment rows in the given plane. |
1048 | The filter may be run on the bottom edge, affecting pixels in the next row of |
1049 | fragments, so this row also needs to be available. |
1050 | _bv: The bounding values array. |
1051 | _refi: The index of the frame buffer to filter. |
1052 | _pli: The color plane to filter. |
1053 | _fragy0: The Y coordinate of the first fragment row to filter. |
1054 | _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ |
1055 | void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state, |
1056 | signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end){ |
1057 | const oc_fragment_plane *fplane; |
1058 | const oc_fragment *frags; |
1059 | const ptrdiff_t *frag_buf_offs; |
1060 | unsigned char *ref_frame_data; |
1061 | ptrdiff_t fragi_top; |
1062 | ptrdiff_t fragi_bot; |
1063 | ptrdiff_t fragi0; |
1064 | ptrdiff_t fragi0_end; |
1065 | int ystride; |
1066 | int nhfrags; |
1067 | _bv+=127; |
1068 | fplane=_state->fplanes+_pli; |
1069 | nhfrags=fplane->nhfrags; |
1070 | fragi_top=fplane->froffset; |
1071 | fragi_bot=fragi_top+fplane->nfrags; |
1072 | fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags; |
1073 | fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags; |
1074 | ystride=_state->ref_ystride[_pli]; |
1075 | frags=_state->frags; |
1076 | frag_buf_offs=_state->frag_buf_offs; |
1077 | ref_frame_data=_state->ref_frame_data[_refi]; |
1078 | /*The following loops are constructed somewhat non-intuitively on purpose. |
1079 | The main idea is: if a block boundary has at least one coded fragment on |
1080 | it, the filter is applied to it. |
1081 | However, the order that the filters are applied in matters, and VP3 chose |
1082 | the somewhat strange ordering used below.*/ |
1083 | while(fragi0<fragi0_end){ |
1084 | ptrdiff_t fragi; |
1085 | ptrdiff_t fragi_end; |
1086 | fragi=fragi0; |
1087 | fragi_end=fragi+nhfrags; |
1088 | while(fragi<fragi_end){ |
1089 | if(frags[fragi].coded){ |
1090 | unsigned char *ref; |
1091 | ref=ref_frame_data+frag_buf_offs[fragi]; |
1092 | if(fragi>fragi0)loop_filter_h(ref,ystride,_bv); |
1093 | if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv); |
1094 | if(fragi+1<fragi_end&&!frags[fragi+1].coded){ |
1095 | loop_filter_h(ref+8,ystride,_bv); |
1096 | } |
1097 | if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){ |
1098 | loop_filter_v(ref+(ystride<<3),ystride,_bv); |
1099 | } |
1100 | } |
1101 | fragi++; |
1102 | } |
1103 | fragi0+=nhfrags; |
1104 | } |
1105 | } |
1106 | |
1107 | #if defined(OC_DUMP_IMAGES) |
1108 | int oc_state_dump_frame(const oc_theora_state *_state,int _frame, |
1109 | const char *_suf){ |
1110 | /*Dump a PNG of the reconstructed image.*/ |
1111 | png_structp png; |
1112 | png_infop info; |
1113 | png_bytep *image; |
1114 | FILE *fp; |
1115 | char fname[16]; |
1116 | unsigned char *y_row; |
1117 | unsigned char *u_row; |
1118 | unsigned char *v_row; |
1119 | unsigned char *y; |
1120 | unsigned char *u; |
1121 | unsigned char *v; |
1122 | ogg_int64_t iframe; |
1123 | ogg_int64_t pframe; |
1124 | int y_stride; |
1125 | int u_stride; |
1126 | int v_stride; |
1127 | int framei; |
1128 | int width; |
1129 | int height; |
1130 | int imgi; |
1131 | int imgj; |
1132 | width=_state->info.frame_width; |
1133 | height=_state->info.frame_height; |
1134 | iframe=_state->granpos>>_state->info.keyframe_granule_shift; |
1135 | pframe=_state->granpos-(iframe<<_state->info.keyframe_granule_shift); |
1136 | sprintf(fname,"%08i%s.png" ,(int)(iframe+pframe),_suf); |
1137 | fp=fopen(fname,"wb" ); |
1138 | if(fp==NULL)return TH_EFAULT; |
1139 | image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image)); |
1140 | if(image==NULL){ |
1141 | fclose(fp); |
1142 | return TH_EFAULT; |
1143 | } |
1144 | png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL); |
1145 | if(png==NULL){ |
1146 | oc_free_2d(image); |
1147 | fclose(fp); |
1148 | return TH_EFAULT; |
1149 | } |
1150 | info=png_create_info_struct(png); |
1151 | if(info==NULL){ |
1152 | png_destroy_write_struct(&png,NULL); |
1153 | oc_free_2d(image); |
1154 | fclose(fp); |
1155 | return TH_EFAULT; |
1156 | } |
1157 | if(setjmp(png_jmpbuf(png))){ |
1158 | png_destroy_write_struct(&png,&info); |
1159 | oc_free_2d(image); |
1160 | fclose(fp); |
1161 | return TH_EFAULT; |
1162 | } |
1163 | framei=_state->ref_frame_idx[_frame]; |
1164 | y_row=_state->ref_frame_bufs[framei][0].data; |
1165 | u_row=_state->ref_frame_bufs[framei][1].data; |
1166 | v_row=_state->ref_frame_bufs[framei][2].data; |
1167 | y_stride=_state->ref_frame_bufs[framei][0].stride; |
1168 | u_stride=_state->ref_frame_bufs[framei][1].stride; |
1169 | v_stride=_state->ref_frame_bufs[framei][2].stride; |
1170 | /*Chroma up-sampling is just done with a box filter. |
1171 | This is very likely what will actually be used in practice on a real |
1172 | display, and also removes one more layer to search in for the source of |
1173 | artifacts. |
1174 | As an added bonus, it's dead simple.*/ |
1175 | for(imgi=height;imgi-->0;){ |
1176 | int dc; |
1177 | y=y_row; |
1178 | u=u_row; |
1179 | v=v_row; |
1180 | for(imgj=0;imgj<6*width;){ |
1181 | float yval; |
1182 | float uval; |
1183 | float vval; |
1184 | unsigned rval; |
1185 | unsigned gval; |
1186 | unsigned bval; |
1187 | /*This is intentionally slow and very accurate.*/ |
1188 | yval=(*y-16)*(1.0F/219); |
1189 | uval=(*u-128)*(2*(1-0.114F)/224); |
1190 | vval=(*v-128)*(2*(1-0.299F)/224); |
1191 | rval=OC_CLAMPI(0,(int)(65535*(yval+vval)+0.5F),65535); |
1192 | gval=OC_CLAMPI(0,(int)(65535*( |
1193 | yval-uval*(0.114F/0.587F)-vval*(0.299F/0.587F))+0.5F),65535); |
1194 | bval=OC_CLAMPI(0,(int)(65535*(yval+uval)+0.5F),65535); |
1195 | image[imgi][imgj++]=(unsigned char)(rval>>8); |
1196 | image[imgi][imgj++]=(unsigned char)(rval&0xFF); |
1197 | image[imgi][imgj++]=(unsigned char)(gval>>8); |
1198 | image[imgi][imgj++]=(unsigned char)(gval&0xFF); |
1199 | image[imgi][imgj++]=(unsigned char)(bval>>8); |
1200 | image[imgi][imgj++]=(unsigned char)(bval&0xFF); |
1201 | dc=(y-y_row&1)|(_state->info.pixel_fmt&1); |
1202 | y++; |
1203 | u+=dc; |
1204 | v+=dc; |
1205 | } |
1206 | dc=-((height-1-imgi&1)|_state->info.pixel_fmt>>1); |
1207 | y_row+=y_stride; |
1208 | u_row+=dc&u_stride; |
1209 | v_row+=dc&v_stride; |
1210 | } |
1211 | png_init_io(png,fp); |
1212 | png_set_compression_level(png,Z_BEST_COMPRESSION); |
1213 | png_set_IHDR(png,info,width,height,16,PNG_COLOR_TYPE_RGB, |
1214 | PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT); |
1215 | switch(_state->info.colorspace){ |
1216 | case TH_CS_ITU_REC_470M:{ |
1217 | png_set_gAMA(png,info,2.2); |
1218 | png_set_cHRM_fixed(png,info,31006,31616, |
1219 | 67000,32000,21000,71000,14000,8000); |
1220 | }break; |
1221 | case TH_CS_ITU_REC_470BG:{ |
1222 | png_set_gAMA(png,info,2.67); |
1223 | png_set_cHRM_fixed(png,info,31271,32902, |
1224 | 64000,33000,29000,60000,15000,6000); |
1225 | }break; |
1226 | default:break; |
1227 | } |
1228 | png_set_pHYs(png,info,_state->info.aspect_numerator, |
1229 | _state->info.aspect_denominator,0); |
1230 | png_set_rows(png,info,image); |
1231 | png_write_png(png,info,PNG_TRANSFORM_IDENTITY,NULL); |
1232 | png_write_end(png,info); |
1233 | png_destroy_write_struct(&png,&info); |
1234 | oc_free_2d(image); |
1235 | fclose(fp); |
1236 | return 0; |
1237 | } |
1238 | #endif |
1239 | |
1240 | |
1241 | |
1242 | ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos){ |
1243 | oc_theora_state *state; |
1244 | state=(oc_theora_state *)_encdec; |
1245 | if(_granpos>=0){ |
1246 | ogg_int64_t iframe; |
1247 | ogg_int64_t pframe; |
1248 | iframe=_granpos>>state->info.keyframe_granule_shift; |
1249 | pframe=_granpos-(iframe<<state->info.keyframe_granule_shift); |
1250 | /*3.2.0 streams store the frame index in the granule position. |
1251 | 3.2.1 and later store the frame count. |
1252 | We return the index, so adjust the value if we have a 3.2.1 or later |
1253 | stream.*/ |
1254 | return iframe+pframe-TH_VERSION_CHECK(&state->info,3,2,1); |
1255 | } |
1256 | return -1; |
1257 | } |
1258 | |
1259 | double th_granule_time(void *_encdec,ogg_int64_t _granpos){ |
1260 | oc_theora_state *state; |
1261 | state=(oc_theora_state *)_encdec; |
1262 | if(_granpos>=0){ |
1263 | return (th_granule_frame(_encdec, _granpos)+1)*( |
1264 | (double)state->info.fps_denominator/state->info.fps_numerator); |
1265 | } |
1266 | return -1; |
1267 | } |
1268 | |