1// Copyright 2011 Google Inc. All Rights Reserved.
2//
3// Use of this source code is governed by a BSD-style license
4// that can be found in the COPYING file in the root of the source
5// tree. An additional intellectual property rights grant can be found
6// in the file PATENTS. All contributing project authors may
7// be found in the AUTHORS file in the root of the source tree.
8// -----------------------------------------------------------------------------
9//
10// WebP encoder: internal header.
11//
12// Author: Skal (pascal.massimino@gmail.com)
13
14#ifndef WEBP_ENC_VP8I_ENC_H_
15#define WEBP_ENC_VP8I_ENC_H_
16
17#include <string.h> // for memcpy()
18#include "src/dec/common_dec.h"
19#include "src/dsp/dsp.h"
20#include "src/utils/bit_writer_utils.h"
21#include "src/utils/thread_utils.h"
22#include "src/utils/utils.h"
23#include "src/webp/encode.h"
24
25#ifdef __cplusplus
26extern "C" {
27#endif
28
29//------------------------------------------------------------------------------
30// Various defines and enums
31
32// version numbers
33#define ENC_MAJ_VERSION 1
34#define ENC_MIN_VERSION 1
35#define ENC_REV_VERSION 0
36
37enum { MAX_LF_LEVELS = 64, // Maximum loop filter level
38 MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost
39 MAX_LEVEL = 2047 // max level (note: max codable is 2047 + 67)
40 };
41
42typedef enum { // Rate-distortion optimization levels
43 RD_OPT_NONE = 0, // no rd-opt
44 RD_OPT_BASIC = 1, // basic scoring (no trellis)
45 RD_OPT_TRELLIS = 2, // perform trellis-quant on the final decision only
46 RD_OPT_TRELLIS_ALL = 3 // trellis-quant for every scoring (much slower)
47} VP8RDLevel;
48
49// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
50// The original or reconstructed samples can be accessed using VP8Scan[].
51// The predicted blocks can be accessed using offsets to yuv_p_ and
52// the arrays VP8*ModeOffsets[].
53// * YUV Samples area (yuv_in_/yuv_out_/yuv_out2_)
54// (see VP8Scan[] for accessing the blocks, along with
55// Y_OFF_ENC/U_OFF_ENC/V_OFF_ENC):
56// +----+----+
57// Y_OFF_ENC |YYYY|UUVV|
58// U_OFF_ENC |YYYY|UUVV|
59// V_OFF_ENC |YYYY|....| <- 25% wasted U/V area
60// |YYYY|....|
61// +----+----+
62// * Prediction area ('yuv_p_', size = PRED_SIZE_ENC)
63// Intra16 predictions (16x16 block each, two per row):
64// |I16DC16|I16TM16|
65// |I16VE16|I16HE16|
66// Chroma U/V predictions (16x8 block each, two per row):
67// |C8DC8|C8TM8|
68// |C8VE8|C8HE8|
69// Intra 4x4 predictions (4x4 block each)
70// |I4DC4 I4TM4 I4VE4 I4HE4|I4RD4 I4VR4 I4LD4 I4VL4|
71// |I4HD4 I4HU4 I4TMP .....|.......................| <- ~31% wasted
72#define YUV_SIZE_ENC (BPS * 16)
73#define PRED_SIZE_ENC (32 * BPS + 16 * BPS + 8 * BPS) // I16+Chroma+I4 preds
74#define Y_OFF_ENC (0)
75#define U_OFF_ENC (16)
76#define V_OFF_ENC (16 + 8)
77
78extern const uint16_t VP8Scan[16];
79extern const uint16_t VP8UVModeOffsets[4];
80extern const uint16_t VP8I16ModeOffsets[4];
81extern const uint16_t VP8I4ModeOffsets[NUM_BMODES];
82
83// Layout of prediction blocks
84// intra 16x16
85#define I16DC16 (0 * 16 * BPS)
86#define I16TM16 (I16DC16 + 16)
87#define I16VE16 (1 * 16 * BPS)
88#define I16HE16 (I16VE16 + 16)
89// chroma 8x8, two U/V blocks side by side (hence: 16x8 each)
90#define C8DC8 (2 * 16 * BPS)
91#define C8TM8 (C8DC8 + 1 * 16)
92#define C8VE8 (2 * 16 * BPS + 8 * BPS)
93#define C8HE8 (C8VE8 + 1 * 16)
94// intra 4x4
95#define I4DC4 (3 * 16 * BPS + 0)
96#define I4TM4 (I4DC4 + 4)
97#define I4VE4 (I4DC4 + 8)
98#define I4HE4 (I4DC4 + 12)
99#define I4RD4 (I4DC4 + 16)
100#define I4VR4 (I4DC4 + 20)
101#define I4LD4 (I4DC4 + 24)
102#define I4VL4 (I4DC4 + 28)
103#define I4HD4 (3 * 16 * BPS + 4 * BPS)
104#define I4HU4 (I4HD4 + 4)
105#define I4TMP (I4HD4 + 8)
106
107typedef int64_t score_t; // type used for scores, rate, distortion
108// Note that MAX_COST is not the maximum allowed by sizeof(score_t),
109// in order to allow overflowing computations.
110#define MAX_COST ((score_t)0x7fffffffffffffLL)
111
112#define QFIX 17
113#define BIAS(b) ((b) << (QFIX - 8))
114// Fun fact: this is the _only_ line where we're actually being lossy and
115// discarding bits.
116static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) {
117 return (int)((n * iQ + B) >> QFIX);
118}
119
120// Uncomment the following to remove token-buffer code:
121// #define DISABLE_TOKEN_BUFFER
122
123// quality below which error-diffusion is enabled
124#define ERROR_DIFFUSION_QUALITY 98
125
126//------------------------------------------------------------------------------
127// Headers
128
129typedef uint32_t proba_t; // 16b + 16b
130typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
131typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];
132typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
133typedef const uint16_t* (*CostArrayPtr)[NUM_CTX]; // for easy casting
134typedef const uint16_t* CostArrayMap[16][NUM_CTX];
135typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats
136
137typedef struct VP8Encoder VP8Encoder;
138
139// segment features
140typedef struct {
141 int num_segments_; // Actual number of segments. 1 segment only = unused.
142 int update_map_; // whether to update the segment map or not.
143 // must be 0 if there's only 1 segment.
144 int size_; // bit-cost for transmitting the segment map
145} VP8EncSegmentHeader;
146
147// Struct collecting all frame-persistent probabilities.
148typedef struct {
149 uint8_t segments_[3]; // probabilities for segment tree
150 uint8_t skip_proba_; // final probability of being skipped.
151 ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 1056 bytes
152 StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes
153 CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 13056 bytes
154 CostArrayMap remapped_costs_[NUM_TYPES]; // 1536 bytes
155 int dirty_; // if true, need to call VP8CalculateLevelCosts()
156 int use_skip_proba_; // Note: we always use skip_proba for now.
157 int nb_skip_; // number of skipped blocks
158} VP8EncProba;
159
160// Filter parameters. Not actually used in the code (we don't perform
161// the in-loop filtering), but filled from user's config
162typedef struct {
163 int simple_; // filtering type: 0=complex, 1=simple
164 int level_; // base filter level [0..63]
165 int sharpness_; // [0..7]
166 int i4x4_lf_delta_; // delta filter level for i4x4 relative to i16x16
167} VP8EncFilterHeader;
168
169//------------------------------------------------------------------------------
170// Informations about the macroblocks.
171
172typedef struct {
173 // block type
174 unsigned int type_:2; // 0=i4x4, 1=i16x16
175 unsigned int uv_mode_:2;
176 unsigned int skip_:1;
177 unsigned int segment_:2;
178 uint8_t alpha_; // quantization-susceptibility
179} VP8MBInfo;
180
181typedef struct VP8Matrix {
182 uint16_t q_[16]; // quantizer steps
183 uint16_t iq_[16]; // reciprocals, fixed point.
184 uint32_t bias_[16]; // rounding bias
185 uint32_t zthresh_[16]; // value below which a coefficient is zeroed
186 uint16_t sharpen_[16]; // frequency boosters for slight sharpening
187} VP8Matrix;
188
189typedef struct {
190 VP8Matrix y1_, y2_, uv_; // quantization matrices
191 int alpha_; // quant-susceptibility, range [-127,127]. Zero is neutral.
192 // Lower values indicate a lower risk of blurriness.
193 int beta_; // filter-susceptibility, range [0,255].
194 int quant_; // final segment quantizer.
195 int fstrength_; // final in-loop filtering strength
196 int max_edge_; // max edge delta (for filtering strength)
197 int min_disto_; // minimum distortion required to trigger filtering record
198 // reactivities
199 int lambda_i16_, lambda_i4_, lambda_uv_;
200 int lambda_mode_, lambda_trellis_, tlambda_;
201 int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_;
202
203 // lambda values for distortion-based evaluation
204 score_t i4_penalty_; // penalty for using Intra4
205} VP8SegmentInfo;
206
207typedef int8_t DError[2 /* u/v */][2 /* top or left */];
208
209// Handy transient struct to accumulate score and info during RD-optimization
210// and mode evaluation.
211typedef struct {
212 score_t D, SD; // Distortion, spectral distortion
213 score_t H, R, score; // header bits, rate, score.
214 int16_t y_dc_levels[16]; // Quantized levels for luma-DC, luma-AC, chroma.
215 int16_t y_ac_levels[16][16];
216 int16_t uv_levels[4 + 4][16];
217 int mode_i16; // mode number for intra16 prediction
218 uint8_t modes_i4[16]; // mode numbers for intra4 predictions
219 int mode_uv; // mode number of chroma prediction
220 uint32_t nz; // non-zero blocks
221 int8_t derr[2][3]; // DC diffusion errors for U/V for blocks #1/2/3
222} VP8ModeScore;
223
224// Iterator structure to iterate through macroblocks, pointing to the
225// right neighbouring data (samples, predictions, contexts, ...)
226typedef struct {
227 int x_, y_; // current macroblock
228 uint8_t* yuv_in_; // input samples
229 uint8_t* yuv_out_; // output samples
230 uint8_t* yuv_out2_; // secondary buffer swapped with yuv_out_.
231 uint8_t* yuv_p_; // scratch buffer for prediction
232 VP8Encoder* enc_; // back-pointer
233 VP8MBInfo* mb_; // current macroblock
234 VP8BitWriter* bw_; // current bit-writer
235 uint8_t* preds_; // intra mode predictors (4x4 blocks)
236 uint32_t* nz_; // non-zero pattern
237 uint8_t i4_boundary_[37]; // 32+5 boundary samples needed by intra4x4
238 uint8_t* i4_top_; // pointer to the current top boundary sample
239 int i4_; // current intra4x4 mode being tested
240 int top_nz_[9]; // top-non-zero context.
241 int left_nz_[9]; // left-non-zero. left_nz[8] is independent.
242 uint64_t bit_count_[4][3]; // bit counters for coded levels.
243 uint64_t luma_bits_; // macroblock bit-cost for luma
244 uint64_t uv_bits_; // macroblock bit-cost for chroma
245 LFStats* lf_stats_; // filter stats (borrowed from enc_)
246 int do_trellis_; // if true, perform extra level optimisation
247 int count_down_; // number of mb still to be processed
248 int count_down0_; // starting counter value (for progress)
249 int percent0_; // saved initial progress percent
250
251 DError left_derr_; // left error diffusion (u/v)
252 DError* top_derr_; // top diffusion error - NULL if disabled
253
254 uint8_t* y_left_; // left luma samples (addressable from index -1 to 15).
255 uint8_t* u_left_; // left u samples (addressable from index -1 to 7)
256 uint8_t* v_left_; // left v samples (addressable from index -1 to 7)
257
258 uint8_t* y_top_; // top luma samples at position 'x_'
259 uint8_t* uv_top_; // top u/v samples at position 'x_', packed as 16 bytes
260
261 // memory for storing y/u/v_left_
262 uint8_t yuv_left_mem_[17 + 16 + 16 + 8 + WEBP_ALIGN_CST];
263 // memory for yuv_*
264 uint8_t yuv_mem_[3 * YUV_SIZE_ENC + PRED_SIZE_ENC + WEBP_ALIGN_CST];
265} VP8EncIterator;
266
267 // in iterator.c
268// must be called first
269void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it);
270// restart a scan
271void VP8IteratorReset(VP8EncIterator* const it);
272// reset iterator position to row 'y'
273void VP8IteratorSetRow(VP8EncIterator* const it, int y);
274// set count down (=number of iterations to go)
275void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down);
276// return true if iteration is finished
277int VP8IteratorIsDone(const VP8EncIterator* const it);
278// Import uncompressed samples from source.
279// If tmp_32 is not NULL, import boundary samples too.
280// tmp_32 is a 32-bytes scratch buffer that must be aligned in memory.
281void VP8IteratorImport(VP8EncIterator* const it, uint8_t* const tmp_32);
282// export decimated samples
283void VP8IteratorExport(const VP8EncIterator* const it);
284// go to next macroblock. Returns false if not finished.
285int VP8IteratorNext(VP8EncIterator* const it);
286// save the yuv_out_ boundary values to top_/left_ arrays for next iterations.
287void VP8IteratorSaveBoundary(VP8EncIterator* const it);
288// Report progression based on macroblock rows. Return 0 for user-abort request.
289int VP8IteratorProgress(const VP8EncIterator* const it,
290 int final_delta_percent);
291// Intra4x4 iterations
292void VP8IteratorStartI4(VP8EncIterator* const it);
293// returns true if not done.
294int VP8IteratorRotateI4(VP8EncIterator* const it,
295 const uint8_t* const yuv_out);
296
297// Non-zero context setup/teardown
298void VP8IteratorNzToBytes(VP8EncIterator* const it);
299void VP8IteratorBytesToNz(VP8EncIterator* const it);
300
301// Helper functions to set mode properties
302void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode);
303void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes);
304void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode);
305void VP8SetSkip(const VP8EncIterator* const it, int skip);
306void VP8SetSegment(const VP8EncIterator* const it, int segment);
307
308//------------------------------------------------------------------------------
309// Paginated token buffer
310
311typedef struct VP8Tokens VP8Tokens; // struct details in token.c
312
313typedef struct {
314#if !defined(DISABLE_TOKEN_BUFFER)
315 VP8Tokens* pages_; // first page
316 VP8Tokens** last_page_; // last page
317 uint16_t* tokens_; // set to (*last_page_)->tokens_
318 int left_; // how many free tokens left before the page is full
319 int page_size_; // number of tokens per page
320#endif
321 int error_; // true in case of malloc error
322} VP8TBuffer;
323
324// initialize an empty buffer
325void VP8TBufferInit(VP8TBuffer* const b, int page_size);
326void VP8TBufferClear(VP8TBuffer* const b); // de-allocate pages memory
327
328#if !defined(DISABLE_TOKEN_BUFFER)
329
330// Finalizes bitstream when probabilities are known.
331// Deletes the allocated token memory if final_pass is true.
332int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,
333 const uint8_t* const probas, int final_pass);
334
335// record the coding of coefficients without knowing the probabilities yet
336int VP8RecordCoeffTokens(int ctx, const struct VP8Residual* const res,
337 VP8TBuffer* const tokens);
338
339// Estimate the final coded size given a set of 'probas'.
340size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas);
341
342#endif // !DISABLE_TOKEN_BUFFER
343
344//------------------------------------------------------------------------------
345// VP8Encoder
346
347struct VP8Encoder {
348 const WebPConfig* config_; // user configuration and parameters
349 WebPPicture* pic_; // input / output picture
350
351 // headers
352 VP8EncFilterHeader filter_hdr_; // filtering information
353 VP8EncSegmentHeader segment_hdr_; // segment information
354
355 int profile_; // VP8's profile, deduced from Config.
356
357 // dimension, in macroblock units.
358 int mb_w_, mb_h_;
359 int preds_w_; // stride of the *preds_ prediction plane (=4*mb_w + 1)
360
361 // number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS)
362 int num_parts_;
363
364 // per-partition boolean decoders.
365 VP8BitWriter bw_; // part0
366 VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions
367 VP8TBuffer tokens_; // token buffer
368
369 int percent_; // for progress
370
371 // transparency blob
372 int has_alpha_;
373 uint8_t* alpha_data_; // non-NULL if transparency is present
374 uint32_t alpha_data_size_;
375 WebPWorker alpha_worker_;
376
377 // quantization info (one set of DC/AC dequant factor per segment)
378 VP8SegmentInfo dqm_[NUM_MB_SEGMENTS];
379 int base_quant_; // nominal quantizer value. Only used
380 // for relative coding of segments' quant.
381 int alpha_; // global susceptibility (<=> complexity)
382 int uv_alpha_; // U/V quantization susceptibility
383 // global offset of quantizers, shared by all segments
384 int dq_y1_dc_;
385 int dq_y2_dc_, dq_y2_ac_;
386 int dq_uv_dc_, dq_uv_ac_;
387
388 // probabilities and statistics
389 VP8EncProba proba_;
390 uint64_t sse_[4]; // sum of Y/U/V/A squared errors for all macroblocks
391 uint64_t sse_count_; // pixel count for the sse_[] stats
392 int coded_size_;
393 int residual_bytes_[3][4];
394 int block_count_[3];
395
396 // quality/speed settings
397 int method_; // 0=fastest, 6=best/slowest.
398 VP8RDLevel rd_opt_level_; // Deduced from method_.
399 int max_i4_header_bits_; // partition #0 safeness factor
400 int mb_header_limit_; // rough limit for header bits per MB
401 int thread_level_; // derived from config->thread_level
402 int do_search_; // derived from config->target_XXX
403 int use_tokens_; // if true, use token buffer
404
405 // Memory
406 VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1)
407 uint8_t* preds_; // predictions modes: (4*mb_w+1) * (4*mb_h+1)
408 uint32_t* nz_; // non-zero bit context: mb_w+1
409 uint8_t* y_top_; // top luma samples.
410 uint8_t* uv_top_; // top u/v samples.
411 // U and V are packed into 16 bytes (8 U + 8 V)
412 LFStats* lf_stats_; // autofilter stats (if NULL, autofilter is off)
413 DError* top_derr_; // diffusion error (NULL if disabled)
414};
415
416//------------------------------------------------------------------------------
417// internal functions. Not public.
418
419 // in tree.c
420extern const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
421extern const uint8_t
422 VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
423// Reset the token probabilities to their initial (default) values
424void VP8DefaultProbas(VP8Encoder* const enc);
425// Write the token probabilities
426void VP8WriteProbas(VP8BitWriter* const bw, const VP8EncProba* const probas);
427// Writes the partition #0 modes (that is: all intra modes)
428void VP8CodeIntraModes(VP8Encoder* const enc);
429
430 // in syntax.c
431// Generates the final bitstream by coding the partition0 and headers,
432// and appending an assembly of all the pre-coded token partitions.
433// Return true if everything is ok.
434int VP8EncWrite(VP8Encoder* const enc);
435// Release memory allocated for bit-writing in VP8EncLoop & seq.
436void VP8EncFreeBitWriters(VP8Encoder* const enc);
437
438 // in frame.c
439extern const uint8_t VP8Cat3[];
440extern const uint8_t VP8Cat4[];
441extern const uint8_t VP8Cat5[];
442extern const uint8_t VP8Cat6[];
443
444// Form all the four Intra16x16 predictions in the yuv_p_ cache
445void VP8MakeLuma16Preds(const VP8EncIterator* const it);
446// Form all the four Chroma8x8 predictions in the yuv_p_ cache
447void VP8MakeChroma8Preds(const VP8EncIterator* const it);
448// Form all the ten Intra4x4 predictions in the yuv_p_ cache
449// for the 4x4 block it->i4_
450void VP8MakeIntra4Preds(const VP8EncIterator* const it);
451// Rate calculation
452int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);
453int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);
454int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd);
455// Main coding calls
456int VP8EncLoop(VP8Encoder* const enc);
457int VP8EncTokenLoop(VP8Encoder* const enc);
458
459 // in webpenc.c
460// Assign an error code to a picture. Return false for convenience.
461int WebPEncodingSetError(const WebPPicture* const pic, WebPEncodingError error);
462int WebPReportProgress(const WebPPicture* const pic,
463 int percent, int* const percent_store);
464
465 // in analysis.c
466// Main analysis loop. Decides the segmentations and complexity.
467// Assigns a first guess for Intra16 and uvmode_ prediction modes.
468int VP8EncAnalyze(VP8Encoder* const enc);
469
470 // in quant.c
471// Sets up segment's quantization values, base_quant_ and filter strengths.
472void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
473// Pick best modes and fills the levels. Returns true if skipped.
474int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
475 VP8RDLevel rd_opt);
476
477 // in alpha.c
478void VP8EncInitAlpha(VP8Encoder* const enc); // initialize alpha compression
479int VP8EncStartAlpha(VP8Encoder* const enc); // start alpha coding process
480int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data
481int VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data
482
483// autofilter
484void VP8InitFilter(VP8EncIterator* const it);
485void VP8StoreFilterStats(VP8EncIterator* const it);
486void VP8AdjustFilterStrength(VP8EncIterator* const it);
487
488// returns the approximate filtering strength needed to smooth a edge
489// step of 'delta', given a sharpness parameter 'sharpness'.
490int VP8FilterStrengthFromDelta(int sharpness, int delta);
491
492 // misc utils for picture_*.c:
493
494// Remove reference to the ARGB/YUVA buffer (doesn't free anything).
495void WebPPictureResetBuffers(WebPPicture* const picture);
496
497// Allocates ARGB buffer of given dimension (previous one is always free'd).
498// Preserves the YUV(A) buffer. Returns false in case of error (invalid param,
499// out-of-memory).
500int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height);
501
502// Allocates YUVA buffer of given dimension (previous one is always free'd).
503// Uses picture->csp to determine whether an alpha buffer is needed.
504// Preserves the ARGB buffer.
505// Returns false in case of error (invalid param, out-of-memory).
506int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height);
507
508// Clean-up the RGB samples under fully transparent area, to help lossless
509// compressibility (no guarantee, though). Assumes that pic->use_argb is true.
510void WebPCleanupTransparentAreaLossless(WebPPicture* const pic);
511
512//------------------------------------------------------------------------------
513
514#ifdef __cplusplus
515} // extern "C"
516#endif
517
518#endif // WEBP_ENC_VP8I_ENC_H_
519