alpha_processing_neon.c source code [engine/third_party/libwebp/src/dsp/alpha_processing_neon.c]

1	// Copyright 2017 Google Inc. All Rights Reserved.
2	//
3	// Use of this source code is governed by a BSD-style license
4	// that can be found in the COPYING file in the root of the source
5	// tree. An additional intellectual property rights grant can be found
6	// in the file PATENTS. All contributing project authors may
7	// be found in the AUTHORS file in the root of the source tree.
8	// -----------------------------------------------------------------------------
9	//
10	// Utilities for processing transparent channel, NEON version.
11	//
12	// Author: Skal (pascal.massimino@gmail.com)
13
14	#include "./dsp.h"
15
16	#if defined(WEBP_USE_NEON)
17
18	#include "./neon.h"
19
20	//------------------------------------------------------------------------------
21
22	#define MULTIPLIER(a) ((a) * 0x8081)
23	#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
24
25	#define MULTIPLY_BY_ALPHA(V, ALPHA, OTHER) do { \
26	const uint8x8_t alpha = (V).val[(ALPHA)]; \
27	const uint16x8_t r1 = vmull_u8((V).val[1], alpha); \
28	const uint16x8_t g1 = vmull_u8((V).val[2], alpha); \
29	const uint16x8_t b1 = vmull_u8((V).val[(OTHER)], alpha); \
30	/* we use: v / 255 = (v + 1 + (v >> 8)) >> 8 */ \
31	const uint16x8_t r2 = vsraq_n_u16(r1, r1, 8); \
32	const uint16x8_t g2 = vsraq_n_u16(g1, g1, 8); \
33	const uint16x8_t b2 = vsraq_n_u16(b1, b1, 8); \
34	const uint16x8_t r3 = vaddq_u16(r2, kOne); \
35	const uint16x8_t g3 = vaddq_u16(g2, kOne); \
36	const uint16x8_t b3 = vaddq_u16(b2, kOne); \
37	(V).val[1] = vshrn_n_u16(r3, 8); \
38	(V).val[2] = vshrn_n_u16(g3, 8); \
39	(V).val[(OTHER)] = vshrn_n_u16(b3, 8); \
40	} while (0)
41
42	static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first,
43	int w, int h, int stride) {
44	const uint16x8_t kOne = vdupq_n_u16(`1u`);
45	while (h-- > `0`) {
46	uint32_t* const rgbx = (uint32_t*)rgba;
47	int i = `0`;
48	if (alpha_first) {
49	for (; i + `8` <= w; i += `8`) {
50	// load aaaa...\|rrrr...\|gggg...\|bbbb...
51	uint8x8x4_t RGBX = vld4_u8((const uint8_t*)(rgbx + i));
52	MULTIPLY_BY_ALPHA(RGBX, `0`, `3`);
53	vst4_u8((uint8_t*)(rgbx + i), RGBX);
54	}
55	} else {
56	for (; i + `8` <= w; i += `8`) {
57	uint8x8x4_t RGBX = vld4_u8((const uint8_t*)(rgbx + i));
58	MULTIPLY_BY_ALPHA(RGBX, `3`, `0`);
59	vst4_u8((uint8_t*)(rgbx + i), RGBX);
60	}
61	}
62	// Finish with left-overs.
63	for (; i < w; ++i) {
64	uint8_t* const rgb = rgba + (alpha_first ? `1` : `0`);
65	const uint8_t* const alpha = rgba + (alpha_first ? `0` : `3`);
66	const uint32_t a = alpha[`4` * i];
67	if (a != `0xff`) {
68	const uint32_t mult = MULTIPLIER(a);
69	rgb[`4` * i + `0`] = PREMULTIPLY(rgb[`4` * i + `0`], mult);
70	rgb[`4` * i + `1`] = PREMULTIPLY(rgb[`4` * i + `1`], mult);
71	rgb[`4` * i + `2`] = PREMULTIPLY(rgb[`4` * i + `2`], mult);
72	}
73	}
74	rgba += stride;
75	}
76	}
77	#undef MULTIPLY_BY_ALPHA
78	#undef MULTIPLIER
79	#undef PREMULTIPLY
80
81	//------------------------------------------------------------------------------
82
83	static int DispatchAlpha_NEON(const uint8_t* alpha, int alpha_stride,
84	int width, int height,
85	uint8_t* dst, int dst_stride) {
86	uint32_t alpha_mask = `0xffffffffu`;
87	uint8x8_t mask8 = vdup_n_u8(`0xff`);
88	uint32_t tmp[`2`];
89	int i, j;
90	for (j = `0`; j < height; ++j) {
91	// We don't know if alpha is first or last in dst[] (depending on rgbA/Argb
92	// mode). So we must be sure dst[4i + 8 - 1] is writable for the store.*
93	// Hence the test with 'width - 1' instead of just 'width'.
94	for (i = `0`; i + `8` <= width - `1`; i += `8`) {
95	uint8x8x4_t rgbX = vld4_u8((const uint8_t)(dst + `4` i));
96	const uint8x8_t alphas = vld1_u8(alpha + i);
97	rgbX.val[`0`] = alphas;
98	vst4_u8((uint8_t)(dst + `4` i), rgbX);
99	mask8 = vand_u8(mask8, alphas);
100	}
101	for (; i < width; ++i) {
102	const uint32_t alpha_value = alpha[i];
103	dst[`4` * i] = alpha_value;
104	alpha_mask &= alpha_value;
105	}
106	alpha += alpha_stride;
107	dst += dst_stride;
108	}
109	vst1_u8((uint8_t*)tmp, mask8);
110	alpha_mask &= tmp[`0`];
111	alpha_mask &= tmp[`1`];
112	return (alpha_mask != `0xffffffffu`);
113	}
114
115	static void DispatchAlphaToGreen_NEON(const uint8_t* alpha, int alpha_stride,
116	int width, int height,
117	uint32_t* dst, int dst_stride) {
118	int i, j;
119	uint8x8x4_t greens; // leave A/R/B channels zero'd.
120	greens.val[`0`] = vdup_n_u8(`0`);
121	greens.val[`2`] = vdup_n_u8(`0`);
122	greens.val[`3`] = vdup_n_u8(`0`);
123	for (j = `0`; j < height; ++j) {
124	for (i = `0`; i + `8` <= width; i += `8`) {
125	greens.val[`1`] = vld1_u8(alpha + i);
126	vst4_u8((uint8_t*)(dst + i), greens);
127	}
128	for (; i < width; ++i) dst[i] = alpha[i] << `8`;
129	alpha += alpha_stride;
130	dst += dst_stride;
131	}
132	}
133
134	static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride,
135	int width, int height,
136	uint8_t* alpha, int alpha_stride) {
137	uint32_t alpha_mask = `0xffffffffu`;
138	uint8x8_t mask8 = vdup_n_u8(`0xff`);
139	uint32_t tmp[`2`];
140	int i, j;
141	for (j = `0`; j < height; ++j) {
142	// We don't know if alpha is first or last in dst[] (depending on rgbA/Argb
143	// mode). So we must be sure dst[4i + 8 - 1] is writable for the store.*
144	// Hence the test with 'width - 1' instead of just 'width'.
145	for (i = `0`; i + `8` <= width - `1`; i += `8`) {
146	const uint8x8x4_t rgbX = vld4_u8((const uint8_t)(argb + `4` i));
147	const uint8x8_t alphas = rgbX.val[`0`];
148	vst1_u8((uint8_t*)(alpha + i), alphas);
149	mask8 = vand_u8(mask8, alphas);
150	}
151	for (; i < width; ++i) {
152	alpha[i] = argb[`4` * i];
153	alpha_mask &= alpha[i];
154	}
155	argb += argb_stride;
156	alpha += alpha_stride;
157	}
158	vst1_u8((uint8_t*)tmp, mask8);
159	alpha_mask &= tmp[`0`];
160	alpha_mask &= tmp[`1`];
161	return (alpha_mask == `0xffffffffu`);
162	}
163
164	static void ExtractGreen_NEON(const uint32_t* argb,
165	uint8_t* alpha, int size) {
166	int i;
167	for (i = `0`; i + `16` <= size; i += `16`) {
168	const uint8x16x4_t rgbX = vld4q_u8((const uint8_t*)(argb + i));
169	const uint8x16_t greens = rgbX.val[`1`];
170	vst1q_u8(alpha + i, greens);
171	}
172	for (; i < size; ++i) alpha[i] = (argb[i] >> `8`) & `0xff`;
173	}
174
175	//------------------------------------------------------------------------------
176
177	extern void WebPInitAlphaProcessingNEON(void);
178
179	WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingNEON(void) {
180	WebPApplyAlphaMultiply = ApplyAlphaMultiply_NEON;
181	WebPDispatchAlpha = DispatchAlpha_NEON;
182	WebPDispatchAlphaToGreen = DispatchAlphaToGreen_NEON;
183	WebPExtractAlpha = ExtractAlpha_NEON;
184	WebPExtractGreen = ExtractGreen_NEON;
185	}
186
187	#else // !WEBP_USE_NEON
188
189	WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingNEON)
190
191	#endif // WEBP_USE_NEON
192

Browse the source code of engine/third_party/libwebp/src/dsp/alpha_processing_neon.c