1 | // Copyright 2011 Google Inc. All Rights Reserved. |
2 | // |
3 | // Use of this source code is governed by a BSD-style license |
4 | // that can be found in the COPYING file in the root of the source |
5 | // tree. An additional intellectual property rights grant can be found |
6 | // in the file PATENTS. All contributing project authors may |
7 | // be found in the AUTHORS file in the root of the source tree. |
8 | // ----------------------------------------------------------------------------- |
9 | // |
10 | // VP8Iterator: block iterator |
11 | // |
12 | // Author: Skal (pascal.massimino@gmail.com) |
13 | |
14 | #include <string.h> |
15 | |
16 | #include "src/enc/vp8i_enc.h" |
17 | |
18 | //------------------------------------------------------------------------------ |
19 | // VP8Iterator |
20 | //------------------------------------------------------------------------------ |
21 | |
22 | static void InitLeft(VP8EncIterator* const it) { |
23 | it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] = |
24 | (it->y_ > 0) ? 129 : 127; |
25 | memset(it->y_left_, 129, 16); |
26 | memset(it->u_left_, 129, 8); |
27 | memset(it->v_left_, 129, 8); |
28 | it->left_nz_[8] = 0; |
29 | if (it->top_derr_ != NULL) { |
30 | memset(&it->left_derr_, 0, sizeof(it->left_derr_)); |
31 | } |
32 | } |
33 | |
34 | static void InitTop(VP8EncIterator* const it) { |
35 | const VP8Encoder* const enc = it->enc_; |
36 | const size_t top_size = enc->mb_w_ * 16; |
37 | memset(enc->y_top_, 127, 2 * top_size); |
38 | memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_)); |
39 | if (enc->top_derr_ != NULL) { |
40 | memset(enc->top_derr_, 0, enc->mb_w_ * sizeof(*enc->top_derr_)); |
41 | } |
42 | } |
43 | |
44 | void VP8IteratorSetRow(VP8EncIterator* const it, int y) { |
45 | VP8Encoder* const enc = it->enc_; |
46 | it->x_ = 0; |
47 | it->y_ = y; |
48 | it->bw_ = &enc->parts_[y & (enc->num_parts_ - 1)]; |
49 | it->preds_ = enc->preds_ + y * 4 * enc->preds_w_; |
50 | it->nz_ = enc->nz_; |
51 | it->mb_ = enc->mb_info_ + y * enc->mb_w_; |
52 | it->y_top_ = enc->y_top_; |
53 | it->uv_top_ = enc->uv_top_; |
54 | InitLeft(it); |
55 | } |
56 | |
57 | void VP8IteratorReset(VP8EncIterator* const it) { |
58 | VP8Encoder* const enc = it->enc_; |
59 | VP8IteratorSetRow(it, 0); |
60 | VP8IteratorSetCountDown(it, enc->mb_w_ * enc->mb_h_); // default |
61 | InitTop(it); |
62 | memset(it->bit_count_, 0, sizeof(it->bit_count_)); |
63 | it->do_trellis_ = 0; |
64 | } |
65 | |
66 | void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down) { |
67 | it->count_down_ = it->count_down0_ = count_down; |
68 | } |
69 | |
70 | int VP8IteratorIsDone(const VP8EncIterator* const it) { |
71 | return (it->count_down_ <= 0); |
72 | } |
73 | |
74 | void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) { |
75 | it->enc_ = enc; |
76 | it->yuv_in_ = (uint8_t*)WEBP_ALIGN(it->yuv_mem_); |
77 | it->yuv_out_ = it->yuv_in_ + YUV_SIZE_ENC; |
78 | it->yuv_out2_ = it->yuv_out_ + YUV_SIZE_ENC; |
79 | it->yuv_p_ = it->yuv_out2_ + YUV_SIZE_ENC; |
80 | it->lf_stats_ = enc->lf_stats_; |
81 | it->percent0_ = enc->percent_; |
82 | it->y_left_ = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem_ + 1); |
83 | it->u_left_ = it->y_left_ + 16 + 16; |
84 | it->v_left_ = it->u_left_ + 16; |
85 | it->top_derr_ = enc->top_derr_; |
86 | VP8IteratorReset(it); |
87 | } |
88 | |
89 | int VP8IteratorProgress(const VP8EncIterator* const it, int delta) { |
90 | VP8Encoder* const enc = it->enc_; |
91 | if (delta && enc->pic_->progress_hook != NULL) { |
92 | const int done = it->count_down0_ - it->count_down_; |
93 | const int percent = (it->count_down0_ <= 0) |
94 | ? it->percent0_ |
95 | : it->percent0_ + delta * done / it->count_down0_; |
96 | return WebPReportProgress(enc->pic_, percent, &enc->percent_); |
97 | } |
98 | return 1; |
99 | } |
100 | |
101 | //------------------------------------------------------------------------------ |
102 | // Import the source samples into the cache. Takes care of replicating |
103 | // boundary pixels if necessary. |
104 | |
105 | static WEBP_INLINE int MinSize(int a, int b) { return (a < b) ? a : b; } |
106 | |
107 | static void ImportBlock(const uint8_t* src, int src_stride, |
108 | uint8_t* dst, int w, int h, int size) { |
109 | int i; |
110 | for (i = 0; i < h; ++i) { |
111 | memcpy(dst, src, w); |
112 | if (w < size) { |
113 | memset(dst + w, dst[w - 1], size - w); |
114 | } |
115 | dst += BPS; |
116 | src += src_stride; |
117 | } |
118 | for (i = h; i < size; ++i) { |
119 | memcpy(dst, dst - BPS, size); |
120 | dst += BPS; |
121 | } |
122 | } |
123 | |
124 | static void ImportLine(const uint8_t* src, int src_stride, |
125 | uint8_t* dst, int len, int total_len) { |
126 | int i; |
127 | for (i = 0; i < len; ++i, src += src_stride) dst[i] = *src; |
128 | for (; i < total_len; ++i) dst[i] = dst[len - 1]; |
129 | } |
130 | |
131 | void VP8IteratorImport(VP8EncIterator* const it, uint8_t* const tmp_32) { |
132 | const VP8Encoder* const enc = it->enc_; |
133 | const int x = it->x_, y = it->y_; |
134 | const WebPPicture* const pic = enc->pic_; |
135 | const uint8_t* const ysrc = pic->y + (y * pic->y_stride + x) * 16; |
136 | const uint8_t* const usrc = pic->u + (y * pic->uv_stride + x) * 8; |
137 | const uint8_t* const vsrc = pic->v + (y * pic->uv_stride + x) * 8; |
138 | const int w = MinSize(pic->width - x * 16, 16); |
139 | const int h = MinSize(pic->height - y * 16, 16); |
140 | const int uv_w = (w + 1) >> 1; |
141 | const int uv_h = (h + 1) >> 1; |
142 | |
143 | ImportBlock(ysrc, pic->y_stride, it->yuv_in_ + Y_OFF_ENC, w, h, 16); |
144 | ImportBlock(usrc, pic->uv_stride, it->yuv_in_ + U_OFF_ENC, uv_w, uv_h, 8); |
145 | ImportBlock(vsrc, pic->uv_stride, it->yuv_in_ + V_OFF_ENC, uv_w, uv_h, 8); |
146 | |
147 | if (tmp_32 == NULL) return; |
148 | |
149 | // Import source (uncompressed) samples into boundary. |
150 | if (x == 0) { |
151 | InitLeft(it); |
152 | } else { |
153 | if (y == 0) { |
154 | it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] = 127; |
155 | } else { |
156 | it->y_left_[-1] = ysrc[- 1 - pic->y_stride]; |
157 | it->u_left_[-1] = usrc[- 1 - pic->uv_stride]; |
158 | it->v_left_[-1] = vsrc[- 1 - pic->uv_stride]; |
159 | } |
160 | ImportLine(ysrc - 1, pic->y_stride, it->y_left_, h, 16); |
161 | ImportLine(usrc - 1, pic->uv_stride, it->u_left_, uv_h, 8); |
162 | ImportLine(vsrc - 1, pic->uv_stride, it->v_left_, uv_h, 8); |
163 | } |
164 | |
165 | it->y_top_ = tmp_32 + 0; |
166 | it->uv_top_ = tmp_32 + 16; |
167 | if (y == 0) { |
168 | memset(tmp_32, 127, 32 * sizeof(*tmp_32)); |
169 | } else { |
170 | ImportLine(ysrc - pic->y_stride, 1, tmp_32, w, 16); |
171 | ImportLine(usrc - pic->uv_stride, 1, tmp_32 + 16, uv_w, 8); |
172 | ImportLine(vsrc - pic->uv_stride, 1, tmp_32 + 16 + 8, uv_w, 8); |
173 | } |
174 | } |
175 | |
176 | //------------------------------------------------------------------------------ |
177 | // Copy back the compressed samples into user space if requested. |
178 | |
179 | static void ExportBlock(const uint8_t* src, uint8_t* dst, int dst_stride, |
180 | int w, int h) { |
181 | while (h-- > 0) { |
182 | memcpy(dst, src, w); |
183 | dst += dst_stride; |
184 | src += BPS; |
185 | } |
186 | } |
187 | |
188 | void VP8IteratorExport(const VP8EncIterator* const it) { |
189 | const VP8Encoder* const enc = it->enc_; |
190 | if (enc->config_->show_compressed) { |
191 | const int x = it->x_, y = it->y_; |
192 | const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC; |
193 | const uint8_t* const usrc = it->yuv_out_ + U_OFF_ENC; |
194 | const uint8_t* const vsrc = it->yuv_out_ + V_OFF_ENC; |
195 | const WebPPicture* const pic = enc->pic_; |
196 | uint8_t* const ydst = pic->y + (y * pic->y_stride + x) * 16; |
197 | uint8_t* const udst = pic->u + (y * pic->uv_stride + x) * 8; |
198 | uint8_t* const vdst = pic->v + (y * pic->uv_stride + x) * 8; |
199 | int w = (pic->width - x * 16); |
200 | int h = (pic->height - y * 16); |
201 | |
202 | if (w > 16) w = 16; |
203 | if (h > 16) h = 16; |
204 | |
205 | // Luma plane |
206 | ExportBlock(ysrc, ydst, pic->y_stride, w, h); |
207 | |
208 | { // U/V planes |
209 | const int uv_w = (w + 1) >> 1; |
210 | const int uv_h = (h + 1) >> 1; |
211 | ExportBlock(usrc, udst, pic->uv_stride, uv_w, uv_h); |
212 | ExportBlock(vsrc, vdst, pic->uv_stride, uv_w, uv_h); |
213 | } |
214 | } |
215 | } |
216 | |
217 | //------------------------------------------------------------------------------ |
218 | // Non-zero contexts setup/teardown |
219 | |
220 | // Nz bits: |
221 | // 0 1 2 3 Y |
222 | // 4 5 6 7 |
223 | // 8 9 10 11 |
224 | // 12 13 14 15 |
225 | // 16 17 U |
226 | // 18 19 |
227 | // 20 21 V |
228 | // 22 23 |
229 | // 24 DC-intra16 |
230 | |
231 | // Convert packed context to byte array |
232 | #define BIT(nz, n) (!!((nz) & (1 << (n)))) |
233 | |
234 | void VP8IteratorNzToBytes(VP8EncIterator* const it) { |
235 | const int tnz = it->nz_[0], lnz = it->nz_[-1]; |
236 | int* const top_nz = it->top_nz_; |
237 | int* const left_nz = it->left_nz_; |
238 | |
239 | // Top-Y |
240 | top_nz[0] = BIT(tnz, 12); |
241 | top_nz[1] = BIT(tnz, 13); |
242 | top_nz[2] = BIT(tnz, 14); |
243 | top_nz[3] = BIT(tnz, 15); |
244 | // Top-U |
245 | top_nz[4] = BIT(tnz, 18); |
246 | top_nz[5] = BIT(tnz, 19); |
247 | // Top-V |
248 | top_nz[6] = BIT(tnz, 22); |
249 | top_nz[7] = BIT(tnz, 23); |
250 | // DC |
251 | top_nz[8] = BIT(tnz, 24); |
252 | |
253 | // left-Y |
254 | left_nz[0] = BIT(lnz, 3); |
255 | left_nz[1] = BIT(lnz, 7); |
256 | left_nz[2] = BIT(lnz, 11); |
257 | left_nz[3] = BIT(lnz, 15); |
258 | // left-U |
259 | left_nz[4] = BIT(lnz, 17); |
260 | left_nz[5] = BIT(lnz, 19); |
261 | // left-V |
262 | left_nz[6] = BIT(lnz, 21); |
263 | left_nz[7] = BIT(lnz, 23); |
264 | // left-DC is special, iterated separately |
265 | } |
266 | |
267 | void VP8IteratorBytesToNz(VP8EncIterator* const it) { |
268 | uint32_t nz = 0; |
269 | const int* const top_nz = it->top_nz_; |
270 | const int* const left_nz = it->left_nz_; |
271 | // top |
272 | nz |= (top_nz[0] << 12) | (top_nz[1] << 13); |
273 | nz |= (top_nz[2] << 14) | (top_nz[3] << 15); |
274 | nz |= (top_nz[4] << 18) | (top_nz[5] << 19); |
275 | nz |= (top_nz[6] << 22) | (top_nz[7] << 23); |
276 | nz |= (top_nz[8] << 24); // we propagate the _top_ bit, esp. for intra4 |
277 | // left |
278 | nz |= (left_nz[0] << 3) | (left_nz[1] << 7); |
279 | nz |= (left_nz[2] << 11); |
280 | nz |= (left_nz[4] << 17) | (left_nz[6] << 21); |
281 | |
282 | *it->nz_ = nz; |
283 | } |
284 | |
285 | #undef BIT |
286 | |
287 | //------------------------------------------------------------------------------ |
288 | // Advance to the next position, doing the bookkeeping. |
289 | |
290 | void VP8IteratorSaveBoundary(VP8EncIterator* const it) { |
291 | VP8Encoder* const enc = it->enc_; |
292 | const int x = it->x_, y = it->y_; |
293 | const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC; |
294 | const uint8_t* const uvsrc = it->yuv_out_ + U_OFF_ENC; |
295 | if (x < enc->mb_w_ - 1) { // left |
296 | int i; |
297 | for (i = 0; i < 16; ++i) { |
298 | it->y_left_[i] = ysrc[15 + i * BPS]; |
299 | } |
300 | for (i = 0; i < 8; ++i) { |
301 | it->u_left_[i] = uvsrc[7 + i * BPS]; |
302 | it->v_left_[i] = uvsrc[15 + i * BPS]; |
303 | } |
304 | // top-left (before 'top'!) |
305 | it->y_left_[-1] = it->y_top_[15]; |
306 | it->u_left_[-1] = it->uv_top_[0 + 7]; |
307 | it->v_left_[-1] = it->uv_top_[8 + 7]; |
308 | } |
309 | if (y < enc->mb_h_ - 1) { // top |
310 | memcpy(it->y_top_, ysrc + 15 * BPS, 16); |
311 | memcpy(it->uv_top_, uvsrc + 7 * BPS, 8 + 8); |
312 | } |
313 | } |
314 | |
315 | int VP8IteratorNext(VP8EncIterator* const it) { |
316 | if (++it->x_ == it->enc_->mb_w_) { |
317 | VP8IteratorSetRow(it, ++it->y_); |
318 | } else { |
319 | it->preds_ += 4; |
320 | it->mb_ += 1; |
321 | it->nz_ += 1; |
322 | it->y_top_ += 16; |
323 | it->uv_top_ += 16; |
324 | } |
325 | return (0 < --it->count_down_); |
326 | } |
327 | |
328 | //------------------------------------------------------------------------------ |
329 | // Helper function to set mode properties |
330 | |
331 | void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode) { |
332 | uint8_t* preds = it->preds_; |
333 | int y; |
334 | for (y = 0; y < 4; ++y) { |
335 | memset(preds, mode, 4); |
336 | preds += it->enc_->preds_w_; |
337 | } |
338 | it->mb_->type_ = 1; |
339 | } |
340 | |
341 | void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes) { |
342 | uint8_t* preds = it->preds_; |
343 | int y; |
344 | for (y = 4; y > 0; --y) { |
345 | memcpy(preds, modes, 4 * sizeof(*modes)); |
346 | preds += it->enc_->preds_w_; |
347 | modes += 4; |
348 | } |
349 | it->mb_->type_ = 0; |
350 | } |
351 | |
352 | void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode) { |
353 | it->mb_->uv_mode_ = mode; |
354 | } |
355 | |
356 | void VP8SetSkip(const VP8EncIterator* const it, int skip) { |
357 | it->mb_->skip_ = skip; |
358 | } |
359 | |
360 | void VP8SetSegment(const VP8EncIterator* const it, int segment) { |
361 | it->mb_->segment_ = segment; |
362 | } |
363 | |
364 | //------------------------------------------------------------------------------ |
365 | // Intra4x4 sub-blocks iteration |
366 | // |
367 | // We store and update the boundary samples into an array of 37 pixels. They |
368 | // are updated as we iterate and reconstructs each intra4x4 blocks in turn. |
369 | // The position of the samples has the following snake pattern: |
370 | // |
371 | // 16|17 18 19 20|21 22 23 24|25 26 27 28|29 30 31 32|33 34 35 36 <- Top-right |
372 | // --+-----------+-----------+-----------+-----------+ |
373 | // 15| 19| 23| 27| 31| |
374 | // 14| 18| 22| 26| 30| |
375 | // 13| 17| 21| 25| 29| |
376 | // 12|13 14 15 16|17 18 19 20|21 22 23 24|25 26 27 28| |
377 | // --+-----------+-----------+-----------+-----------+ |
378 | // 11| 15| 19| 23| 27| |
379 | // 10| 14| 18| 22| 26| |
380 | // 9| 13| 17| 21| 25| |
381 | // 8| 9 10 11 12|13 14 15 16|17 18 19 20|21 22 23 24| |
382 | // --+-----------+-----------+-----------+-----------+ |
383 | // 7| 11| 15| 19| 23| |
384 | // 6| 10| 14| 18| 22| |
385 | // 5| 9| 13| 17| 21| |
386 | // 4| 5 6 7 8| 9 10 11 12|13 14 15 16|17 18 19 20| |
387 | // --+-----------+-----------+-----------+-----------+ |
388 | // 3| 7| 11| 15| 19| |
389 | // 2| 6| 10| 14| 18| |
390 | // 1| 5| 9| 13| 17| |
391 | // 0| 1 2 3 4| 5 6 7 8| 9 10 11 12|13 14 15 16| |
392 | // --+-----------+-----------+-----------+-----------+ |
393 | |
394 | // Array to record the position of the top sample to pass to the prediction |
395 | // functions in dsp.c. |
396 | static const uint8_t VP8TopLeftI4[16] = { |
397 | 17, 21, 25, 29, |
398 | 13, 17, 21, 25, |
399 | 9, 13, 17, 21, |
400 | 5, 9, 13, 17 |
401 | }; |
402 | |
403 | void VP8IteratorStartI4(VP8EncIterator* const it) { |
404 | const VP8Encoder* const enc = it->enc_; |
405 | int i; |
406 | |
407 | it->i4_ = 0; // first 4x4 sub-block |
408 | it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[0]; |
409 | |
410 | // Import the boundary samples |
411 | for (i = 0; i < 17; ++i) { // left |
412 | it->i4_boundary_[i] = it->y_left_[15 - i]; |
413 | } |
414 | for (i = 0; i < 16; ++i) { // top |
415 | it->i4_boundary_[17 + i] = it->y_top_[i]; |
416 | } |
417 | // top-right samples have a special case on the far right of the picture |
418 | if (it->x_ < enc->mb_w_ - 1) { |
419 | for (i = 16; i < 16 + 4; ++i) { |
420 | it->i4_boundary_[17 + i] = it->y_top_[i]; |
421 | } |
422 | } else { // else, replicate the last valid pixel four times |
423 | for (i = 16; i < 16 + 4; ++i) { |
424 | it->i4_boundary_[17 + i] = it->i4_boundary_[17 + 15]; |
425 | } |
426 | } |
427 | VP8IteratorNzToBytes(it); // import the non-zero context |
428 | } |
429 | |
430 | int VP8IteratorRotateI4(VP8EncIterator* const it, |
431 | const uint8_t* const yuv_out) { |
432 | const uint8_t* const blk = yuv_out + VP8Scan[it->i4_]; |
433 | uint8_t* const top = it->i4_top_; |
434 | int i; |
435 | |
436 | // Update the cache with 7 fresh samples |
437 | for (i = 0; i <= 3; ++i) { |
438 | top[-4 + i] = blk[i + 3 * BPS]; // store future top samples |
439 | } |
440 | if ((it->i4_ & 3) != 3) { // if not on the right sub-blocks #3, #7, #11, #15 |
441 | for (i = 0; i <= 2; ++i) { // store future left samples |
442 | top[i] = blk[3 + (2 - i) * BPS]; |
443 | } |
444 | } else { // else replicate top-right samples, as says the specs. |
445 | for (i = 0; i <= 3; ++i) { |
446 | top[i] = top[i + 4]; |
447 | } |
448 | } |
449 | // move pointers to next sub-block |
450 | ++it->i4_; |
451 | if (it->i4_ == 16) { // we're done |
452 | return 0; |
453 | } |
454 | |
455 | it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[it->i4_]; |
456 | return 1; |
457 | } |
458 | |
459 | //------------------------------------------------------------------------------ |
460 | |