1/********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
10 * *
11 ********************************************************************
12
13 function:
14 last mod: $Id$
15
16 ********************************************************************/
17
18#include <stdlib.h>
19#include <string.h>
20#include "state.h"
21#if defined(OC_DUMP_IMAGES)
22# include <stdio.h>
23# include "png.h"
24# include "zlib.h"
25#endif
26
27/*The function used to fill in the chroma plane motion vectors for a macro
28 block when 4 different motion vectors are specified in the luma plane.
29 This version is for use with chroma decimated in the X and Y directions
30 (4:2:0).
31 _cbmvs: The chroma block-level motion vectors to fill in.
32 _lbmvs: The luma block-level motion vectors.*/
33static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
34 int dx;
35 int dy;
36 dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1])
37 +OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]);
38 dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1])
39 +OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]);
40 _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,2,2),OC_DIV_ROUND_POW2(dy,2,2));
41}
42
43/*The function used to fill in the chroma plane motion vectors for a macro
44 block when 4 different motion vectors are specified in the luma plane.
45 This version is for use with chroma decimated in the Y direction.
46 _cbmvs: The chroma block-level motion vectors to fill in.
47 _lbmvs: The luma block-level motion vectors.*/
48static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
49 int dx;
50 int dy;
51 dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[2]);
52 dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[2]);
53 _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
54 dx=OC_MV_X(_lbmvs[1])+OC_MV_X(_lbmvs[3]);
55 dy=OC_MV_Y(_lbmvs[1])+OC_MV_Y(_lbmvs[3]);
56 _cbmvs[1]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
57}
58
59/*The function used to fill in the chroma plane motion vectors for a macro
60 block when 4 different motion vectors are specified in the luma plane.
61 This version is for use with chroma decimated in the X direction (4:2:2).
62 _cbmvs: The chroma block-level motion vectors to fill in.
63 _lbmvs: The luma block-level motion vectors.*/
64static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
65 int dx;
66 int dy;
67 dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1]);
68 dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1]);
69 _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
70 dx=OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]);
71 dy=OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]);
72 _cbmvs[2]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
73}
74
75/*The function used to fill in the chroma plane motion vectors for a macro
76 block when 4 different motion vectors are specified in the luma plane.
77 This version is for use with no chroma decimation (4:4:4).
78 _cbmvs: The chroma block-level motion vectors to fill in.
79 _lmbmv: The luma macro-block level motion vector to fill in for use in
80 prediction.
81 _lbmvs: The luma block-level motion vectors.*/
82static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
83 _cbmvs[0]=_lbmvs[0];
84 _cbmvs[1]=_lbmvs[1];
85 _cbmvs[2]=_lbmvs[2];
86 _cbmvs[3]=_lbmvs[3];
87}
88
89/*A table of functions used to fill in the chroma plane motion vectors for a
90 macro block when 4 different motion vectors are specified in the luma
91 plane.*/
92const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={
93 (oc_set_chroma_mvs_func)oc_set_chroma_mvs00,
94 (oc_set_chroma_mvs_func)oc_set_chroma_mvs01,
95 (oc_set_chroma_mvs_func)oc_set_chroma_mvs10,
96 (oc_set_chroma_mvs_func)oc_set_chroma_mvs11
97};
98
99
100
101/*Returns the fragment index of the top-left block in a macro block.
102 This can be used to test whether or not the whole macro block is valid.
103 _sb_map: The super block map.
104 _quadi: The quadrant number.
105 Return: The index of the fragment of the upper left block in the macro
106 block, or -1 if the block lies outside the coded frame.*/
107static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){
108 /*It so happens that under the Hilbert curve ordering described below, the
109 upper-left block in each macro block is at index 0, except in macro block
110 3, where it is at index 2.*/
111 return _sb_map[_quadi][_quadi&_quadi<<1];
112}
113
114/*Fills in the mapping from block positions to fragment numbers for a single
115 color plane.
116 This function also fills in the "valid" flag of each quadrant in the super
117 block flags.
118 _sb_maps: The array of super block maps for the color plane.
119 _sb_flags: The array of super block flags for the color plane.
120 _frag0: The index of the first fragment in the plane.
121 _hfrags: The number of horizontal fragments in a coded frame.
122 _vfrags: The number of vertical fragments in a coded frame.*/
123static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[],
124 oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){
125 /*Contains the (macro_block,block) indices for a 4x4 grid of
126 fragments.
127 The pattern is a 4x4 Hilbert space-filling curve.
128 A Hilbert curve has the nice property that as the curve grows larger, its
129 fractal dimension approaches 2.
130 The intuition is that nearby blocks in the curve are also close spatially,
131 with the previous element always an immediate neighbor, so that runs of
132 blocks should be well correlated.*/
133 static const int SB_MAP[4][4][2]={
134 {{0,0},{0,1},{3,2},{3,3}},
135 {{0,3},{0,2},{3,1},{3,0}},
136 {{1,0},{1,3},{2,0},{2,3}},
137 {{1,1},{1,2},{2,1},{2,2}}
138 };
139 ptrdiff_t yfrag;
140 unsigned sbi;
141 int y;
142 sbi=0;
143 yfrag=_frag0;
144 for(y=0;;y+=4){
145 int imax;
146 int x;
147 /*Figure out how many columns of blocks in this super block lie within the
148 image.*/
149 imax=_vfrags-y;
150 if(imax>4)imax=4;
151 else if(imax<=0)break;
152 for(x=0;;x+=4,sbi++){
153 ptrdiff_t xfrag;
154 int jmax;
155 int quadi;
156 int i;
157 /*Figure out how many rows of blocks in this super block lie within the
158 image.*/
159 jmax=_hfrags-x;
160 if(jmax>4)jmax=4;
161 else if(jmax<=0)break;
162 /*By default, set all fragment indices to -1.*/
163 memset(_sb_maps[sbi],0xFF,sizeof(_sb_maps[sbi]));
164 /*Fill in the fragment map for this super block.*/
165 xfrag=yfrag+x;
166 for(i=0;i<imax;i++){
167 int j;
168 for(j=0;j<jmax;j++){
169 _sb_maps[sbi][SB_MAP[i][j][0]][SB_MAP[i][j][1]]=xfrag+j;
170 }
171 xfrag+=_hfrags;
172 }
173 /*Mark which quadrants of this super block lie within the image.*/
174 for(quadi=0;quadi<4;quadi++){
175 _sb_flags[sbi].quad_valid|=
176 (oc_sb_quad_top_left_frag(_sb_maps[sbi],quadi)>=0)<<quadi;
177 }
178 }
179 yfrag+=_hfrags<<2;
180 }
181}
182
183/*Fills in the Y plane fragment map for a macro block given the fragment
184 coordinates of its upper-left hand corner.
185 _mb_map: The macro block map to fill.
186 _fplane: The description of the Y plane.
187 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
188 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
189static void oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3],
190 const oc_fragment_plane *_fplane,int _xfrag0,int _yfrag0){
191 int i;
192 int j;
193 for(i=0;i<2;i++)for(j=0;j<2;j++){
194 _mb_map[0][i<<1|j]=(_yfrag0+i)*(ptrdiff_t)_fplane->nhfrags+_xfrag0+j;
195 }
196}
197
198/*Fills in the chroma plane fragment maps for a macro block.
199 This version is for use with chroma decimated in the X and Y directions
200 (4:2:0).
201 _mb_map: The macro block map to fill.
202 _fplanes: The descriptions of the fragment planes.
203 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
204 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
205static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3],
206 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
207 ptrdiff_t fragi;
208 _xfrag0>>=1;
209 _yfrag0>>=1;
210 fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
211 _mb_map[1][0]=fragi+_fplanes[1].froffset;
212 _mb_map[2][0]=fragi+_fplanes[2].froffset;
213}
214
215/*Fills in the chroma plane fragment maps for a macro block.
216 This version is for use with chroma decimated in the Y direction.
217 _mb_map: The macro block map to fill.
218 _fplanes: The descriptions of the fragment planes.
219 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
220 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
221static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3],
222 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
223 ptrdiff_t fragi;
224 int j;
225 _yfrag0>>=1;
226 fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
227 for(j=0;j<2;j++){
228 _mb_map[1][j]=fragi+_fplanes[1].froffset;
229 _mb_map[2][j]=fragi+_fplanes[2].froffset;
230 fragi++;
231 }
232}
233
234/*Fills in the chroma plane fragment maps for a macro block.
235 This version is for use with chroma decimated in the X direction (4:2:2).
236 _mb_map: The macro block map to fill.
237 _fplanes: The descriptions of the fragment planes.
238 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
239 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
240static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3],
241 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
242 ptrdiff_t fragi;
243 int i;
244 _xfrag0>>=1;
245 fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
246 for(i=0;i<2;i++){
247 _mb_map[1][i<<1]=fragi+_fplanes[1].froffset;
248 _mb_map[2][i<<1]=fragi+_fplanes[2].froffset;
249 fragi+=_fplanes[1].nhfrags;
250 }
251}
252
253/*Fills in the chroma plane fragment maps for a macro block.
254 This version is for use with no chroma decimation (4:4:4).
255 This uses the already filled-in luma plane values.
256 _mb_map: The macro block map to fill.
257 _fplanes: The descriptions of the fragment planes.
258 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
259 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
260static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3],
261 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
262 int k;
263 (void)_xfrag0;
264 (void)_yfrag0;
265 for(k=0;k<4;k++){
266 _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset;
267 _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset;
268 }
269}
270
271/*The function type used to fill in the chroma plane fragment maps for a
272 macro block.
273 _mb_map: The macro block map to fill.
274 _fplanes: The descriptions of the fragment planes.
275 _xfrag0: The X location of the upper-left hand fragment in the luma plane.
276 _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
277typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3],
278 const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0);
279
280/*A table of functions used to fill in the chroma plane fragment maps for a
281 macro block for each type of chrominance decimation.*/
282static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={
283 oc_mb_fill_cmapping00,
284 oc_mb_fill_cmapping01,
285 oc_mb_fill_cmapping10,
286 oc_mb_fill_cmapping11
287};
288
289/*Fills in the mapping from macro blocks to their corresponding fragment
290 numbers in each plane.
291 _mb_maps: The list of macro block maps.
292 _mb_modes: The list of macro block modes; macro blocks completely outside
293 the coded region are marked invalid.
294 _fplanes: The descriptions of the fragment planes.
295 _pixel_fmt: The chroma decimation type.*/
296static void oc_mb_create_mapping(oc_mb_map _mb_maps[],
297 signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){
298 oc_mb_fill_cmapping_func mb_fill_cmapping;
299 unsigned sbi;
300 int y;
301 mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt];
302 /*Loop through the luma plane super blocks.*/
303 for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){
304 int x;
305 for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){
306 int ymb;
307 /*Loop through the macro blocks in each super block in display order.*/
308 for(ymb=0;ymb<2;ymb++){
309 int xmb;
310 for(xmb=0;xmb<2;xmb++){
311 unsigned mbi;
312 int mbx;
313 int mby;
314 mbi=sbi<<2|OC_MB_MAP[ymb][xmb];
315 mbx=x|xmb<<1;
316 mby=y|ymb<<1;
317 /*Initialize fragment indices to -1.*/
318 memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi]));
319 /*Make sure this macro block is within the encoded region.*/
320 if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){
321 _mb_modes[mbi]=OC_MODE_INVALID;
322 continue;
323 }
324 /*Fill in the fragment indices for the luma plane.*/
325 oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby);
326 /*Fill in the fragment indices for the chroma planes.*/
327 (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby);
328 }
329 }
330 }
331 }
332}
333
334/*Marks the fragments which fall all or partially outside the displayable
335 region of the frame.
336 _state: The Theora state containing the fragments to be marked.*/
337static void oc_state_border_init(oc_theora_state *_state){
338 oc_fragment *frag;
339 oc_fragment *yfrag_end;
340 oc_fragment *xfrag_end;
341 oc_fragment_plane *fplane;
342 int crop_x0;
343 int crop_y0;
344 int crop_xf;
345 int crop_yf;
346 int pli;
347 int y;
348 int x;
349 /*The method we use here is slow, but the code is dead simple and handles
350 all the special cases easily.
351 We only ever need to do it once.*/
352 /*Loop through the fragments, marking those completely outside the
353 displayable region and constructing a border mask for those that straddle
354 the border.*/
355 _state->nborders=0;
356 yfrag_end=frag=_state->frags;
357 for(pli=0;pli<3;pli++){
358 fplane=_state->fplanes+pli;
359 /*Set up the cropping rectangle for this plane.*/
360 crop_x0=_state->info.pic_x;
361 crop_xf=_state->info.pic_x+_state->info.pic_width;
362 crop_y0=_state->info.pic_y;
363 crop_yf=_state->info.pic_y+_state->info.pic_height;
364 if(pli>0){
365 if(!(_state->info.pixel_fmt&1)){
366 crop_x0=crop_x0>>1;
367 crop_xf=crop_xf+1>>1;
368 }
369 if(!(_state->info.pixel_fmt&2)){
370 crop_y0=crop_y0>>1;
371 crop_yf=crop_yf+1>>1;
372 }
373 }
374 y=0;
375 for(yfrag_end+=fplane->nfrags;frag<yfrag_end;y+=8){
376 x=0;
377 for(xfrag_end=frag+fplane->nhfrags;frag<xfrag_end;frag++,x+=8){
378 /*First check to see if this fragment is completely outside the
379 displayable region.*/
380 /*Note the special checks for an empty cropping rectangle.
381 This guarantees that if we count a fragment as straddling the
382 border below, at least one pixel in the fragment will be inside
383 the displayable region.*/
384 if(x+8<=crop_x0||crop_xf<=x||y+8<=crop_y0||crop_yf<=y||
385 crop_x0>=crop_xf||crop_y0>=crop_yf){
386 frag->invalid=1;
387 }
388 /*Otherwise, check to see if it straddles the border.*/
389 else if(x<crop_x0&&crop_x0<x+8||x<crop_xf&&crop_xf<x+8||
390 y<crop_y0&&crop_y0<y+8||y<crop_yf&&crop_yf<y+8){
391 ogg_int64_t mask;
392 int npixels;
393 int i;
394 mask=npixels=0;
395 for(i=0;i<8;i++){
396 int j;
397 for(j=0;j<8;j++){
398 if(x+j>=crop_x0&&x+j<crop_xf&&y+i>=crop_y0&&y+i<crop_yf){
399 mask|=(ogg_int64_t)1<<(i<<3|j);
400 npixels++;
401 }
402 }
403 }
404 /*Search the fragment array for border info with the same pattern.
405 In general, there will be at most 8 different patterns (per
406 plane).*/
407 for(i=0;;i++){
408 if(i>=_state->nborders){
409 _state->nborders++;
410 _state->borders[i].mask=mask;
411 _state->borders[i].npixels=npixels;
412 }
413 else if(_state->borders[i].mask!=mask)continue;
414 frag->borderi=i;
415 break;
416 }
417 }
418 else frag->borderi=-1;
419 }
420 }
421 }
422}
423
424static int oc_state_frarray_init(oc_theora_state *_state){
425 int yhfrags;
426 int yvfrags;
427 int chfrags;
428 int cvfrags;
429 ptrdiff_t yfrags;
430 ptrdiff_t cfrags;
431 ptrdiff_t nfrags;
432 unsigned yhsbs;
433 unsigned yvsbs;
434 unsigned chsbs;
435 unsigned cvsbs;
436 unsigned ysbs;
437 unsigned csbs;
438 unsigned nsbs;
439 size_t nmbs;
440 int hdec;
441 int vdec;
442 int pli;
443 /*Figure out the number of fragments in each plane.*/
444 /*These parameters have already been validated to be multiples of 16.*/
445 yhfrags=_state->info.frame_width>>3;
446 yvfrags=_state->info.frame_height>>3;
447 hdec=!(_state->info.pixel_fmt&1);
448 vdec=!(_state->info.pixel_fmt&2);
449 chfrags=yhfrags+hdec>>hdec;
450 cvfrags=yvfrags+vdec>>vdec;
451 yfrags=yhfrags*(ptrdiff_t)yvfrags;
452 cfrags=chfrags*(ptrdiff_t)cvfrags;
453 nfrags=yfrags+2*cfrags;
454 /*Figure out the number of super blocks in each plane.*/
455 yhsbs=yhfrags+3>>2;
456 yvsbs=yvfrags+3>>2;
457 chsbs=chfrags+3>>2;
458 cvsbs=cvfrags+3>>2;
459 ysbs=yhsbs*yvsbs;
460 csbs=chsbs*cvsbs;
461 nsbs=ysbs+2*csbs;
462 nmbs=(size_t)ysbs<<2;
463 /*Check for overflow.
464 We support the ridiculous upper limits of the specification (1048560 by
465 1048560, or 3 TB frames) if the target architecture has 64-bit pointers,
466 but for those with 32-bit pointers (or smaller!) we have to check.
467 If the caller wants to prevent denial-of-service by imposing a more
468 reasonable upper limit on the size of attempted allocations, they must do
469 so themselves; we have no platform independent way to determine how much
470 system memory there is nor an application-independent way to decide what a
471 "reasonable" allocation is.*/
472 if(yfrags/yhfrags!=yvfrags||2*cfrags<cfrags||nfrags<yfrags||
473 ysbs/yhsbs!=yvsbs||2*csbs<csbs||nsbs<ysbs||nmbs>>2!=ysbs){
474 return TH_EIMPL;
475 }
476 /*Initialize the fragment array.*/
477 _state->fplanes[0].nhfrags=yhfrags;
478 _state->fplanes[0].nvfrags=yvfrags;
479 _state->fplanes[0].froffset=0;
480 _state->fplanes[0].nfrags=yfrags;
481 _state->fplanes[0].nhsbs=yhsbs;
482 _state->fplanes[0].nvsbs=yvsbs;
483 _state->fplanes[0].sboffset=0;
484 _state->fplanes[0].nsbs=ysbs;
485 _state->fplanes[1].nhfrags=_state->fplanes[2].nhfrags=chfrags;
486 _state->fplanes[1].nvfrags=_state->fplanes[2].nvfrags=cvfrags;
487 _state->fplanes[1].froffset=yfrags;
488 _state->fplanes[2].froffset=yfrags+cfrags;
489 _state->fplanes[1].nfrags=_state->fplanes[2].nfrags=cfrags;
490 _state->fplanes[1].nhsbs=_state->fplanes[2].nhsbs=chsbs;
491 _state->fplanes[1].nvsbs=_state->fplanes[2].nvsbs=cvsbs;
492 _state->fplanes[1].sboffset=ysbs;
493 _state->fplanes[2].sboffset=ysbs+csbs;
494 _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs;
495 _state->nfrags=nfrags;
496 _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags));
497 _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs));
498 _state->nsbs=nsbs;
499 _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps));
500 _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags));
501 _state->nhmbs=yhsbs<<1;
502 _state->nvmbs=yvsbs<<1;
503 _state->nmbs=nmbs;
504 _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps));
505 _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes));
506 _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis));
507 if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL||
508 _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL||
509 _state->coded_fragis==NULL){
510 return TH_EFAULT;
511 }
512 /*Create the mapping from super blocks to fragments.*/
513 for(pli=0;pli<3;pli++){
514 oc_fragment_plane *fplane;
515 fplane=_state->fplanes+pli;
516 oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset,
517 _state->sb_flags+fplane->sboffset,fplane->froffset,
518 fplane->nhfrags,fplane->nvfrags);
519 }
520 /*Create the mapping from macro blocks to fragments.*/
521 oc_mb_create_mapping(_state->mb_maps,_state->mb_modes,
522 _state->fplanes,_state->info.pixel_fmt);
523 /*Initialize the invalid and borderi fields of each fragment.*/
524 oc_state_border_init(_state);
525 return 0;
526}
527
528static void oc_state_frarray_clear(oc_theora_state *_state){
529 _ogg_free(_state->coded_fragis);
530 _ogg_free(_state->mb_modes);
531 _ogg_free(_state->mb_maps);
532 _ogg_free(_state->sb_flags);
533 _ogg_free(_state->sb_maps);
534 _ogg_free(_state->frag_mvs);
535 _ogg_free(_state->frags);
536}
537
538
539/*Initializes the buffers used for reconstructed frames.
540 These buffers are padded with 16 extra pixels on each side, to allow
541 unrestricted motion vectors without special casing the boundary.
542 If chroma is decimated in either direction, the padding is reduced by a
543 factor of 2 on the appropriate sides.
544 _nrefs: The number of reference buffers to init; must be in the range 3...6.*/
545static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){
546 th_info *info;
547 unsigned char *ref_frame_data;
548 size_t ref_frame_data_sz;
549 size_t ref_frame_sz;
550 size_t yplane_sz;
551 size_t cplane_sz;
552 int yhstride;
553 int yheight;
554 int chstride;
555 int cheight;
556 ptrdiff_t align;
557 ptrdiff_t yoffset;
558 ptrdiff_t coffset;
559 ptrdiff_t *frag_buf_offs;
560 ptrdiff_t fragi;
561 int hdec;
562 int vdec;
563 int rfi;
564 int pli;
565 if(_nrefs<3||_nrefs>6)return TH_EINVAL;
566 info=&_state->info;
567 /*Compute the image buffer parameters for each plane.*/
568 hdec=!(info->pixel_fmt&1);
569 vdec=!(info->pixel_fmt&2);
570 yhstride=info->frame_width+2*OC_UMV_PADDING;
571 yheight=info->frame_height+2*OC_UMV_PADDING;
572 /*Require 16-byte aligned rows in the chroma planes.*/
573 chstride=(yhstride>>hdec)+15&~15;
574 cheight=yheight>>vdec;
575 yplane_sz=yhstride*(size_t)yheight;
576 cplane_sz=chstride*(size_t)cheight;
577 yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride;
578 coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride;
579 /*Although we guarantee the rows of the chroma planes are a multiple of 16
580 bytes, the initial padding on the first row may only be 8 bytes.
581 Compute the offset needed to the actual image data to a multiple of 16.*/
582 align=-coffset&15;
583 ref_frame_sz=yplane_sz+2*cplane_sz+16;
584 ref_frame_data_sz=_nrefs*ref_frame_sz;
585 /*Check for overflow.
586 The same caveats apply as for oc_state_frarray_init().*/
587 if(yplane_sz/yhstride!=(size_t)yheight||2*cplane_sz+16<cplane_sz||
588 ref_frame_sz<yplane_sz||ref_frame_data_sz/_nrefs!=ref_frame_sz){
589 return TH_EIMPL;
590 }
591 ref_frame_data=oc_aligned_malloc(ref_frame_data_sz,16);
592 frag_buf_offs=_state->frag_buf_offs=
593 _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs));
594 if(ref_frame_data==NULL||frag_buf_offs==NULL){
595 _ogg_free(frag_buf_offs);
596 oc_aligned_free(ref_frame_data);
597 return TH_EFAULT;
598 }
599 /*Set up the width, height and stride for the image buffers.*/
600 _state->ref_frame_bufs[0][0].width=info->frame_width;
601 _state->ref_frame_bufs[0][0].height=info->frame_height;
602 _state->ref_frame_bufs[0][0].stride=yhstride;
603 _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width=
604 info->frame_width>>hdec;
605 _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height=
606 info->frame_height>>vdec;
607 _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride=
608 chstride;
609 for(rfi=1;rfi<_nrefs;rfi++){
610 memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0],
611 sizeof(_state->ref_frame_bufs[0]));
612 }
613 _state->ref_frame_handle=ref_frame_data;
614 /*Set up the data pointers for the image buffers.*/
615 for(rfi=0;rfi<_nrefs;rfi++){
616 _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset;
617 ref_frame_data+=yplane_sz+align;
618 _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset;
619 ref_frame_data+=cplane_sz;
620 _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset;
621 ref_frame_data+=cplane_sz+(16-align);
622 /*Flip the buffer upside down.
623 This allows us to decode Theora's bottom-up frames in their natural
624 order, yet return a top-down buffer with a positive stride to the user.*/
625 oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi],
626 _state->ref_frame_bufs[rfi]);
627 }
628 _state->ref_ystride[0]=-yhstride;
629 _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride;
630 /*Initialize the fragment buffer offsets.*/
631 ref_frame_data=_state->ref_frame_bufs[0][0].data;
632 fragi=0;
633 for(pli=0;pli<3;pli++){
634 th_img_plane *iplane;
635 oc_fragment_plane *fplane;
636 unsigned char *vpix;
637 ptrdiff_t stride;
638 ptrdiff_t vfragi_end;
639 int nhfrags;
640 iplane=_state->ref_frame_bufs[0]+pli;
641 fplane=_state->fplanes+pli;
642 vpix=iplane->data;
643 vfragi_end=fplane->froffset+fplane->nfrags;
644 nhfrags=fplane->nhfrags;
645 stride=iplane->stride;
646 while(fragi<vfragi_end){
647 ptrdiff_t hfragi_end;
648 unsigned char *hpix;
649 hpix=vpix;
650 for(hfragi_end=fragi+nhfrags;fragi<hfragi_end;fragi++){
651 frag_buf_offs[fragi]=hpix-ref_frame_data;
652 hpix+=8;
653 }
654 vpix+=stride<<3;
655 }
656 }
657 /*Initialize the reference frame pointers and indices.*/
658 _state->ref_frame_idx[OC_FRAME_GOLD]=
659 _state->ref_frame_idx[OC_FRAME_PREV]=
660 _state->ref_frame_idx[OC_FRAME_GOLD_ORIG]=
661 _state->ref_frame_idx[OC_FRAME_PREV_ORIG]=
662 _state->ref_frame_idx[OC_FRAME_SELF]=
663 _state->ref_frame_idx[OC_FRAME_IO]=-1;
664 _state->ref_frame_data[OC_FRAME_GOLD]=
665 _state->ref_frame_data[OC_FRAME_PREV]=
666 _state->ref_frame_data[OC_FRAME_GOLD_ORIG]=
667 _state->ref_frame_data[OC_FRAME_PREV_ORIG]=
668 _state->ref_frame_data[OC_FRAME_SELF]=
669 _state->ref_frame_data[OC_FRAME_IO]=NULL;
670 return 0;
671}
672
673static void oc_state_ref_bufs_clear(oc_theora_state *_state){
674 _ogg_free(_state->frag_buf_offs);
675 oc_aligned_free(_state->ref_frame_handle);
676}
677
678
679void oc_state_accel_init_c(oc_theora_state *_state){
680 _state->cpu_flags=0;
681#if defined(OC_STATE_USE_VTABLE)
682 _state->opt_vtable.frag_copy=oc_frag_copy_c;
683 _state->opt_vtable.frag_copy_list=oc_frag_copy_list_c;
684 _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
685 _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
686 _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c;
687 _state->opt_vtable.idct8x8=oc_idct8x8_c;
688 _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c;
689 _state->opt_vtable.loop_filter_init=oc_loop_filter_init_c;
690 _state->opt_vtable.state_loop_filter_frag_rows=
691 oc_state_loop_filter_frag_rows_c;
692 _state->opt_vtable.restore_fpu=oc_restore_fpu_c;
693#endif
694 _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG;
695}
696
697
698int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){
699 int ret;
700 /*First validate the parameters.*/
701 if(_info==NULL)return TH_EFAULT;
702 /*The width and height of the encoded frame must be multiples of 16.
703 They must also, when divided by 16, fit into a 16-bit unsigned integer.
704 The displayable frame offset coordinates must fit into an 8-bit unsigned
705 integer.
706 Note that the offset Y in the API is specified on the opposite side from
707 how it is specified in the bitstream, because the Y axis is flipped in
708 the bitstream.
709 The displayable frame must fit inside the encoded frame.
710 The color space must be one known by the encoder.
711 The framerate ratio must not contain a zero value.*/
712 if((_info->frame_width&0xF)||(_info->frame_height&0xF)||
713 _info->frame_width<=0||_info->frame_width>=0x100000||
714 _info->frame_height<=0||_info->frame_height>=0x100000||
715 _info->pic_x+_info->pic_width>_info->frame_width||
716 _info->pic_y+_info->pic_height>_info->frame_height||
717 _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255||
718 /*Note: the following <0 comparisons may generate spurious warnings on
719 platforms where enums are unsigned.
720 We could cast them to unsigned and just use the following >= comparison,
721 but there are a number of compilers which will mis-optimize this.
722 It's better to live with the spurious warnings.*/
723 _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES||
724 _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS||
725 _info->fps_numerator<1||_info->fps_denominator<1){
726 return TH_EINVAL;
727 }
728 memset(_state,0,sizeof(*_state));
729 memcpy(&_state->info,_info,sizeof(*_info));
730 /*Invert the sense of pic_y to match Theora's right-handed coordinate
731 system.*/
732 _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
733 _state->frame_type=OC_UNKWN_FRAME;
734 oc_state_accel_init(_state);
735 ret=oc_state_frarray_init(_state);
736 if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs);
737 if(ret<0){
738 oc_state_frarray_clear(_state);
739 return ret;
740 }
741 /*If the keyframe_granule_shift is out of range, use the maximum allowable
742 value.*/
743 if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){
744 _state->info.keyframe_granule_shift=31;
745 }
746 _state->keyframe_num=0;
747 _state->curframe_num=-1;
748 /*3.2.0 streams mark the frame index instead of the frame count.
749 This was changed with stream version 3.2.1 to conform to other Ogg
750 codecs.
751 We add an extra bias when computing granule positions for new streams.*/
752 _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1);
753 return 0;
754}
755
756void oc_state_clear(oc_theora_state *_state){
757 oc_state_ref_bufs_clear(_state);
758 oc_state_frarray_clear(_state);
759}
760
761
762/*Duplicates the pixels on the border of the image plane out into the
763 surrounding padding for use by unrestricted motion vectors.
764 This function only adds the left and right borders, and only for the fragment
765 rows specified.
766 _refi: The index of the reference buffer to pad.
767 _pli: The color plane.
768 _y0: The Y coordinate of the first row to pad.
769 _yend: The Y coordinate of the row to stop padding at.*/
770void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
771 int _y0,int _yend){
772 th_img_plane *iplane;
773 unsigned char *apix;
774 unsigned char *bpix;
775 unsigned char *epix;
776 int stride;
777 int hpadding;
778 hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
779 iplane=_state->ref_frame_bufs[_refi]+_pli;
780 stride=iplane->stride;
781 apix=iplane->data+_y0*(ptrdiff_t)stride;
782 bpix=apix+iplane->width-1;
783 epix=iplane->data+_yend*(ptrdiff_t)stride;
784 /*Note the use of != instead of <, which allows the stride to be negative.*/
785 while(apix!=epix){
786 memset(apix-hpadding,apix[0],hpadding);
787 memset(bpix+1,bpix[0],hpadding);
788 apix+=stride;
789 bpix+=stride;
790 }
791}
792
793/*Duplicates the pixels on the border of the image plane out into the
794 surrounding padding for use by unrestricted motion vectors.
795 This function only adds the top and bottom borders, and must be called after
796 the left and right borders are added.
797 _refi: The index of the reference buffer to pad.
798 _pli: The color plane.*/
799void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){
800 th_img_plane *iplane;
801 unsigned char *apix;
802 unsigned char *bpix;
803 unsigned char *epix;
804 int stride;
805 int hpadding;
806 int vpadding;
807 int fullw;
808 hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
809 vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2));
810 iplane=_state->ref_frame_bufs[_refi]+_pli;
811 stride=iplane->stride;
812 fullw=iplane->width+(hpadding<<1);
813 apix=iplane->data-hpadding;
814 bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding;
815 epix=apix-stride*(ptrdiff_t)vpadding;
816 while(apix!=epix){
817 memcpy(apix-stride,apix,fullw);
818 memcpy(bpix+stride,bpix,fullw);
819 apix-=stride;
820 bpix+=stride;
821 }
822}
823
824/*Duplicates the pixels on the border of the given reference image out into
825 the surrounding padding for use by unrestricted motion vectors.
826 _state: The context containing the reference buffers.
827 _refi: The index of the reference buffer to pad.*/
828void oc_state_borders_fill(oc_theora_state *_state,int _refi){
829 int pli;
830 for(pli=0;pli<3;pli++){
831 oc_state_borders_fill_rows(_state,_refi,pli,0,
832 _state->ref_frame_bufs[_refi][pli].height);
833 oc_state_borders_fill_caps(_state,_refi,pli);
834 }
835}
836
837/*Determines the offsets in an image buffer to use for motion compensation.
838 _state: The Theora state the offsets are to be computed with.
839 _offsets: Returns the offset for the buffer(s).
840 _offsets[0] is always set.
841 _offsets[1] is set if the motion vector has non-zero fractional
842 components.
843 _pli: The color plane index.
844 _mv: The motion vector.
845 Return: The number of offsets returned: 1 or 2.*/
846int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
847 int _pli,oc_mv _mv){
848 /*Here is a brief description of how Theora handles motion vectors:
849 Motion vector components are specified to half-pixel accuracy in
850 undecimated directions of each plane, and quarter-pixel accuracy in
851 decimated directions.
852 Integer parts are extracted by dividing (not shifting) by the
853 appropriate amount, with truncation towards zero.
854 These integer values are used to calculate the first offset.
855
856 If either of the fractional parts are non-zero, then a second offset is
857 computed.
858 No third or fourth offsets are computed, even if both components have
859 non-zero fractional parts.
860 The second offset is computed by dividing (not shifting) by the
861 appropriate amount, always truncating _away_ from zero.*/
862#if 0
863 /*This version of the code doesn't use any tables, but is slower.*/
864 int ystride;
865 int xprec;
866 int yprec;
867 int xfrac;
868 int yfrac;
869 int offs;
870 int dx;
871 int dy;
872 ystride=_state->ref_ystride[_pli];
873 /*These two variables decide whether we are in half- or quarter-pixel
874 precision in each component.*/
875 xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1));
876 yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2));
877 dx=OC_MV_X(_mv);
878 dy=OC_MV_Y(_mv);
879 /*These two variables are either 0 if all the fractional bits are zero or -1
880 if any of them are non-zero.*/
881 xfrac=OC_SIGNMASK(-(dx&(xprec|1)));
882 yfrac=OC_SIGNMASK(-(dy&(yprec|1)));
883 offs=(dx>>xprec)+(dy>>yprec)*ystride;
884 if(xfrac||yfrac){
885 int xmask;
886 int ymask;
887 xmask=OC_SIGNMASK(dx);
888 ymask=OC_SIGNMASK(dy);
889 yfrac&=ystride;
890 _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask);
891 _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask);
892 return 2;
893 }
894 else{
895 _offsets[0]=offs;
896 return 1;
897 }
898#else
899 /*Using tables simplifies the code, and there's enough arithmetic to hide the
900 latencies of the memory references.*/
901 static const signed char OC_MVMAP[2][64]={
902 {
903 -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8,
904 -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0,
905 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
906 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15
907 },
908 {
909 -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4,
910 -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1, 0, 0, 0,
911 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
912 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7
913 }
914 };
915 static const signed char OC_MVMAP2[2][64]={
916 {
917 -1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1,
918 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1,
919 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
920 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
921 },
922 {
923 -1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1,
924 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1,
925 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
926 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1
927 }
928 };
929 int ystride;
930 int qpx;
931 int qpy;
932 int mx;
933 int my;
934 int mx2;
935 int my2;
936 int offs;
937 int dx;
938 int dy;
939 ystride=_state->ref_ystride[_pli];
940 qpy=_pli!=0&&!(_state->info.pixel_fmt&2);
941 dx=OC_MV_X(_mv);
942 dy=OC_MV_Y(_mv);
943 my=OC_MVMAP[qpy][dy+31];
944 my2=OC_MVMAP2[qpy][dy+31];
945 qpx=_pli!=0&&!(_state->info.pixel_fmt&1);
946 mx=OC_MVMAP[qpx][dx+31];
947 mx2=OC_MVMAP2[qpx][dx+31];
948 offs=my*ystride+mx;
949 if(mx2||my2){
950 _offsets[1]=offs+my2*ystride+mx2;
951 _offsets[0]=offs;
952 return 2;
953 }
954 _offsets[0]=offs;
955 return 1;
956#endif
957}
958
959void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
960 int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
961 unsigned char *dst;
962 ptrdiff_t frag_buf_off;
963 int ystride;
964 int refi;
965 /*Apply the inverse transform.*/
966 /*Special case only having a DC component.*/
967 if(_last_zzi<2){
968 ogg_int16_t p;
969 int ci;
970 /*We round this dequant product (and not any of the others) because there's
971 no iDCT rounding.*/
972 p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
973 /*LOOP VECTORIZES.*/
974 for(ci=0;ci<64;ci++)_dct_coeffs[64+ci]=p;
975 }
976 else{
977 /*First, dequantize the DC coefficient.*/
978 _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
979 oc_idct8x8(_state,_dct_coeffs+64,_dct_coeffs,_last_zzi);
980 }
981 /*Fill in the target buffer.*/
982 frag_buf_off=_state->frag_buf_offs[_fragi];
983 refi=_state->frags[_fragi].refi;
984 ystride=_state->ref_ystride[_pli];
985 dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
986 if(refi==OC_FRAME_SELF)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs+64);
987 else{
988 const unsigned char *ref;
989 int mvoffsets[2];
990 ref=_state->ref_frame_data[refi]+frag_buf_off;
991 if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
992 _state->frag_mvs[_fragi])>1){
993 oc_frag_recon_inter2(_state,
994 dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs+64);
995 }
996 else{
997 oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
998 }
999 }
1000}
1001
1002static void loop_filter_h(unsigned char *_pix,int _ystride,signed char *_bv){
1003 int y;
1004 _pix-=2;
1005 for(y=0;y<8;y++){
1006 int f;
1007 f=_pix[0]-_pix[3]+3*(_pix[2]-_pix[1]);
1008 /*The _bv array is used to compute the function
1009 f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
1010 where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
1011 f=*(_bv+(f+4>>3));
1012 _pix[1]=OC_CLAMP255(_pix[1]+f);
1013 _pix[2]=OC_CLAMP255(_pix[2]-f);
1014 _pix+=_ystride;
1015 }
1016}
1017
1018static void loop_filter_v(unsigned char *_pix,int _ystride,signed char *_bv){
1019 int x;
1020 _pix-=_ystride*2;
1021 for(x=0;x<8;x++){
1022 int f;
1023 f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]);
1024 /*The _bv array is used to compute the function
1025 f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
1026 where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
1027 f=*(_bv+(f+4>>3));
1028 _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f);
1029 _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f);
1030 }
1031}
1032
1033/*Initialize the bounding values array used by the loop filter.
1034 _bv: Storage for the array.
1035 _flimit: The filter limit as defined in Section 7.10 of the spec.*/
1036void oc_loop_filter_init_c(signed char _bv[256],int _flimit){
1037 int i;
1038 memset(_bv,0,sizeof(_bv[0])*256);
1039 for(i=0;i<_flimit;i++){
1040 if(127-i-_flimit>=0)_bv[127-i-_flimit]=(signed char)(i-_flimit);
1041 _bv[127-i]=(signed char)(-i);
1042 _bv[127+i]=(signed char)(i);
1043 if(127+i+_flimit<256)_bv[127+i+_flimit]=(signed char)(_flimit-i);
1044 }
1045}
1046
1047/*Apply the loop filter to a given set of fragment rows in the given plane.
1048 The filter may be run on the bottom edge, affecting pixels in the next row of
1049 fragments, so this row also needs to be available.
1050 _bv: The bounding values array.
1051 _refi: The index of the frame buffer to filter.
1052 _pli: The color plane to filter.
1053 _fragy0: The Y coordinate of the first fragment row to filter.
1054 _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
1055void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
1056 signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end){
1057 const oc_fragment_plane *fplane;
1058 const oc_fragment *frags;
1059 const ptrdiff_t *frag_buf_offs;
1060 unsigned char *ref_frame_data;
1061 ptrdiff_t fragi_top;
1062 ptrdiff_t fragi_bot;
1063 ptrdiff_t fragi0;
1064 ptrdiff_t fragi0_end;
1065 int ystride;
1066 int nhfrags;
1067 _bv+=127;
1068 fplane=_state->fplanes+_pli;
1069 nhfrags=fplane->nhfrags;
1070 fragi_top=fplane->froffset;
1071 fragi_bot=fragi_top+fplane->nfrags;
1072 fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
1073 fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags;
1074 ystride=_state->ref_ystride[_pli];
1075 frags=_state->frags;
1076 frag_buf_offs=_state->frag_buf_offs;
1077 ref_frame_data=_state->ref_frame_data[_refi];
1078 /*The following loops are constructed somewhat non-intuitively on purpose.
1079 The main idea is: if a block boundary has at least one coded fragment on
1080 it, the filter is applied to it.
1081 However, the order that the filters are applied in matters, and VP3 chose
1082 the somewhat strange ordering used below.*/
1083 while(fragi0<fragi0_end){
1084 ptrdiff_t fragi;
1085 ptrdiff_t fragi_end;
1086 fragi=fragi0;
1087 fragi_end=fragi+nhfrags;
1088 while(fragi<fragi_end){
1089 if(frags[fragi].coded){
1090 unsigned char *ref;
1091 ref=ref_frame_data+frag_buf_offs[fragi];
1092 if(fragi>fragi0)loop_filter_h(ref,ystride,_bv);
1093 if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv);
1094 if(fragi+1<fragi_end&&!frags[fragi+1].coded){
1095 loop_filter_h(ref+8,ystride,_bv);
1096 }
1097 if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
1098 loop_filter_v(ref+(ystride<<3),ystride,_bv);
1099 }
1100 }
1101 fragi++;
1102 }
1103 fragi0+=nhfrags;
1104 }
1105}
1106
1107#if defined(OC_DUMP_IMAGES)
1108int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
1109 const char *_suf){
1110 /*Dump a PNG of the reconstructed image.*/
1111 png_structp png;
1112 png_infop info;
1113 png_bytep *image;
1114 FILE *fp;
1115 char fname[16];
1116 unsigned char *y_row;
1117 unsigned char *u_row;
1118 unsigned char *v_row;
1119 unsigned char *y;
1120 unsigned char *u;
1121 unsigned char *v;
1122 ogg_int64_t iframe;
1123 ogg_int64_t pframe;
1124 int y_stride;
1125 int u_stride;
1126 int v_stride;
1127 int framei;
1128 int width;
1129 int height;
1130 int imgi;
1131 int imgj;
1132 width=_state->info.frame_width;
1133 height=_state->info.frame_height;
1134 iframe=_state->granpos>>_state->info.keyframe_granule_shift;
1135 pframe=_state->granpos-(iframe<<_state->info.keyframe_granule_shift);
1136 sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf);
1137 fp=fopen(fname,"wb");
1138 if(fp==NULL)return TH_EFAULT;
1139 image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image));
1140 if(image==NULL){
1141 fclose(fp);
1142 return TH_EFAULT;
1143 }
1144 png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL);
1145 if(png==NULL){
1146 oc_free_2d(image);
1147 fclose(fp);
1148 return TH_EFAULT;
1149 }
1150 info=png_create_info_struct(png);
1151 if(info==NULL){
1152 png_destroy_write_struct(&png,NULL);
1153 oc_free_2d(image);
1154 fclose(fp);
1155 return TH_EFAULT;
1156 }
1157 if(setjmp(png_jmpbuf(png))){
1158 png_destroy_write_struct(&png,&info);
1159 oc_free_2d(image);
1160 fclose(fp);
1161 return TH_EFAULT;
1162 }
1163 framei=_state->ref_frame_idx[_frame];
1164 y_row=_state->ref_frame_bufs[framei][0].data;
1165 u_row=_state->ref_frame_bufs[framei][1].data;
1166 v_row=_state->ref_frame_bufs[framei][2].data;
1167 y_stride=_state->ref_frame_bufs[framei][0].stride;
1168 u_stride=_state->ref_frame_bufs[framei][1].stride;
1169 v_stride=_state->ref_frame_bufs[framei][2].stride;
1170 /*Chroma up-sampling is just done with a box filter.
1171 This is very likely what will actually be used in practice on a real
1172 display, and also removes one more layer to search in for the source of
1173 artifacts.
1174 As an added bonus, it's dead simple.*/
1175 for(imgi=height;imgi-->0;){
1176 int dc;
1177 y=y_row;
1178 u=u_row;
1179 v=v_row;
1180 for(imgj=0;imgj<6*width;){
1181 float yval;
1182 float uval;
1183 float vval;
1184 unsigned rval;
1185 unsigned gval;
1186 unsigned bval;
1187 /*This is intentionally slow and very accurate.*/
1188 yval=(*y-16)*(1.0F/219);
1189 uval=(*u-128)*(2*(1-0.114F)/224);
1190 vval=(*v-128)*(2*(1-0.299F)/224);
1191 rval=OC_CLAMPI(0,(int)(65535*(yval+vval)+0.5F),65535);
1192 gval=OC_CLAMPI(0,(int)(65535*(
1193 yval-uval*(0.114F/0.587F)-vval*(0.299F/0.587F))+0.5F),65535);
1194 bval=OC_CLAMPI(0,(int)(65535*(yval+uval)+0.5F),65535);
1195 image[imgi][imgj++]=(unsigned char)(rval>>8);
1196 image[imgi][imgj++]=(unsigned char)(rval&0xFF);
1197 image[imgi][imgj++]=(unsigned char)(gval>>8);
1198 image[imgi][imgj++]=(unsigned char)(gval&0xFF);
1199 image[imgi][imgj++]=(unsigned char)(bval>>8);
1200 image[imgi][imgj++]=(unsigned char)(bval&0xFF);
1201 dc=(y-y_row&1)|(_state->info.pixel_fmt&1);
1202 y++;
1203 u+=dc;
1204 v+=dc;
1205 }
1206 dc=-((height-1-imgi&1)|_state->info.pixel_fmt>>1);
1207 y_row+=y_stride;
1208 u_row+=dc&u_stride;
1209 v_row+=dc&v_stride;
1210 }
1211 png_init_io(png,fp);
1212 png_set_compression_level(png,Z_BEST_COMPRESSION);
1213 png_set_IHDR(png,info,width,height,16,PNG_COLOR_TYPE_RGB,
1214 PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT);
1215 switch(_state->info.colorspace){
1216 case TH_CS_ITU_REC_470M:{
1217 png_set_gAMA(png,info,2.2);
1218 png_set_cHRM_fixed(png,info,31006,31616,
1219 67000,32000,21000,71000,14000,8000);
1220 }break;
1221 case TH_CS_ITU_REC_470BG:{
1222 png_set_gAMA(png,info,2.67);
1223 png_set_cHRM_fixed(png,info,31271,32902,
1224 64000,33000,29000,60000,15000,6000);
1225 }break;
1226 default:break;
1227 }
1228 png_set_pHYs(png,info,_state->info.aspect_numerator,
1229 _state->info.aspect_denominator,0);
1230 png_set_rows(png,info,image);
1231 png_write_png(png,info,PNG_TRANSFORM_IDENTITY,NULL);
1232 png_write_end(png,info);
1233 png_destroy_write_struct(&png,&info);
1234 oc_free_2d(image);
1235 fclose(fp);
1236 return 0;
1237}
1238#endif
1239
1240
1241
1242ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos){
1243 oc_theora_state *state;
1244 state=(oc_theora_state *)_encdec;
1245 if(_granpos>=0){
1246 ogg_int64_t iframe;
1247 ogg_int64_t pframe;
1248 iframe=_granpos>>state->info.keyframe_granule_shift;
1249 pframe=_granpos-(iframe<<state->info.keyframe_granule_shift);
1250 /*3.2.0 streams store the frame index in the granule position.
1251 3.2.1 and later store the frame count.
1252 We return the index, so adjust the value if we have a 3.2.1 or later
1253 stream.*/
1254 return iframe+pframe-TH_VERSION_CHECK(&state->info,3,2,1);
1255 }
1256 return -1;
1257}
1258
1259double th_granule_time(void *_encdec,ogg_int64_t _granpos){
1260 oc_theora_state *state;
1261 state=(oc_theora_state *)_encdec;
1262 if(_granpos>=0){
1263 return (th_granule_frame(_encdec, _granpos)+1)*(
1264 (double)state->info.fps_denominator/state->info.fps_numerator);
1265 }
1266 return -1;
1267}
1268