cost_sse2.c source code [Godot/thirdparty/libwebp/src/dsp/cost_sse2.c]

1	// Copyright 2015 Google Inc. All Rights Reserved.
2	//
3	// Use of this source code is governed by a BSD-style license
4	// that can be found in the COPYING file in the root of the source
5	// tree. An additional intellectual property rights grant can be found
6	// in the file PATENTS. All contributing project authors may
7	// be found in the AUTHORS file in the root of the source tree.
8	// -----------------------------------------------------------------------------
9	//
10	// SSE2 version of cost functions
11	//
12	// Author: Skal (pascal.massimino@gmail.com)
13
14	#include "src/dsp/dsp.h"
15
16	#if defined(WEBP_USE_SSE2)
17	#include <emmintrin.h>
18
19	#include "src/enc/cost_enc.h"
20	#include "src/enc/vp8i_enc.h"
21	#include "src/utils/utils.h"
22
23	//------------------------------------------------------------------------------
24
25	static void SetResidualCoeffs_SSE2(const int16_t* const coeffs,
26	VP8Residual* const res) {
27	const __m128i c0 = _mm_loadu_si128((const __m128i*)(coeffs + `0`));
28	const __m128i c1 = _mm_loadu_si128((const __m128i*)(coeffs + `8`));
29	// Use SSE2 to compare 16 values with a single instruction.
30	const __m128i zero = _mm_setzero_si128();
31	const __m128i m0 = _mm_packs_epi16(c0, c1);
32	const __m128i m1 = _mm_cmpeq_epi8(m0, zero);
33	// Get the comparison results as a bitmask into 16bits. Negate the mask to get
34	// the position of entries that are not equal to zero. We don't need to mask
35	// out least significant bits according to res->first, since coeffs[0] is 0
36	// if res->first > 0.
37	const uint32_t mask = `0x0000ffffu` ^ (uint32_t)_mm_movemask_epi8(m1);
38	// The position of the most significant non-zero bit indicates the position of
39	// the last non-zero value.
40	assert(res->first == `0` \|\| coeffs[`0`] == `0`);
41	res->last = mask ? BitsLog2Floor(mask) : -`1`;
42	res->coeffs = coeffs;
43	}
44
45	static int GetResidualCost_SSE2(int ctx0, const VP8Residual* const res) {
46	uint8_t levels[`16`], ctxs[`16`];
47	uint16_t abs_levels[`16`];
48	int n = res->first;
49	// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
50	const int p0 = res->prob[n][ctx0][`0`];
51	CostArrayPtr const costs = res->costs;
52	const uint16_t* t = costs[n][ctx0];
53	// bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
54	// (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
55	// be missing during the loop.
56	int cost = (ctx0 == `0`) ? VP8BitCost(`1`, p0) : `0`;
57
58	if (res->last < `0`) {
59	return VP8BitCost(`0`, p0);
60	}
61
62	{ // precompute clamped levels and contexts, packed to 8b.
63	const __m128i zero = _mm_setzero_si128();
64	const __m128i kCst2 = _mm_set1_epi8(`2`);
65	const __m128i kCst67 = _mm_set1_epi8(MAX_VARIABLE_LEVEL);
66	const __m128i c0 = _mm_loadu_si128((const __m128i*)&res->coeffs[`0`]);
67	const __m128i c1 = _mm_loadu_si128((const __m128i*)&res->coeffs[`8`]);
68	const __m128i D0 = _mm_sub_epi16(zero, c0);
69	const __m128i D1 = _mm_sub_epi16(zero, c1);
70	const __m128i E0 = _mm_max_epi16(c0, D0); // abs(v), 16b
71	const __m128i E1 = _mm_max_epi16(c1, D1);
72	const __m128i F = _mm_packs_epi16(E0, E1);
73	const __m128i G = _mm_min_epu8(F, kCst2); // context = 0,1,2
74	const __m128i H = _mm_min_epu8(F, kCst67); // clamp_level in [0..67]
75
76	_mm_storeu_si128((__m128i*)&ctxs[`0`], G);
77	_mm_storeu_si128((__m128i*)&levels[`0`], H);
78
79	_mm_storeu_si128((__m128i*)&abs_levels[`0`], E0);
80	_mm_storeu_si128((__m128i*)&abs_levels[`8`], E1);
81	}
82	for (; n < res->last; ++n) {
83	const int ctx = ctxs[n];
84	const int level = levels[n];
85	const int flevel = abs_levels[n]; // full level
86	cost += VP8LevelFixedCosts[flevel] + t[level]; // simplified VP8LevelCost()
87	t = costs[n + `1`][ctx];
88	}
89	// Last coefficient is always non-zero
90	{
91	const int level = levels[n];
92	const int flevel = abs_levels[n];
93	assert(flevel != `0`);
94	cost += VP8LevelFixedCosts[flevel] + t[level];
95	if (n < `15`) {
96	const int b = VP8EncBands[n + `1`];
97	const int ctx = ctxs[n];
98	const int last_p0 = res->prob[b][ctx][`0`];
99	cost += VP8BitCost(`0`, last_p0);
100	}
101	}
102	return cost;
103	}
104
105	//------------------------------------------------------------------------------
106	// Entry point
107
108	extern void VP8EncDspCostInitSSE2(void);
109
110	WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitSSE2(void) {
111	VP8SetResidualCoeffs = SetResidualCoeffs_SSE2;
112	VP8GetResidualCost = GetResidualCost_SSE2;
113	}
114
115	#else // !WEBP_USE_SSE2
116
117	WEBP_DSP_INIT_STUB(VP8EncDspCostInitSSE2)
118
119	#endif // WEBP_USE_SSE2
120

Browse the source code of Godot/thirdparty/libwebp/src/dsp/cost_sse2.c