alpha_processing_neon.c source code [Godot/thirdparty/libwebp/src/dsp/alpha_processing_neon.c]

1	// Copyright 2017 Google Inc. All Rights Reserved.
2	//
3	// Use of this source code is governed by a BSD-style license
4	// that can be found in the COPYING file in the root of the source
5	// tree. An additional intellectual property rights grant can be found
6	// in the file PATENTS. All contributing project authors may
7	// be found in the AUTHORS file in the root of the source tree.
8	// -----------------------------------------------------------------------------
9	//
10	// Utilities for processing transparent channel, NEON version.
11	//
12	// Author: Skal (pascal.massimino@gmail.com)
13
14	#include "src/dsp/dsp.h"
15
16	#if defined(WEBP_USE_NEON)
17
18	#include "src/dsp/neon.h"
19
20	//------------------------------------------------------------------------------
21
22	#define MULTIPLIER(a) ((a) * 0x8081)
23	#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
24
25	#define MULTIPLY_BY_ALPHA(V, ALPHA, OTHER) do { \
26	const uint8x8_t alpha = (V).val[(ALPHA)]; \
27	const uint16x8_t r1 = vmull_u8((V).val[1], alpha); \
28	const uint16x8_t g1 = vmull_u8((V).val[2], alpha); \
29	const uint16x8_t b1 = vmull_u8((V).val[(OTHER)], alpha); \
30	/* we use: v / 255 = (v + 1 + (v >> 8)) >> 8 */ \
31	const uint16x8_t r2 = vsraq_n_u16(r1, r1, 8); \
32	const uint16x8_t g2 = vsraq_n_u16(g1, g1, 8); \
33	const uint16x8_t b2 = vsraq_n_u16(b1, b1, 8); \
34	const uint16x8_t r3 = vaddq_u16(r2, kOne); \
35	const uint16x8_t g3 = vaddq_u16(g2, kOne); \
36	const uint16x8_t b3 = vaddq_u16(b2, kOne); \
37	(V).val[1] = vshrn_n_u16(r3, 8); \
38	(V).val[2] = vshrn_n_u16(g3, 8); \
39	(V).val[(OTHER)] = vshrn_n_u16(b3, 8); \
40	} while (0)
41
42	static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first,
43	int w, int h, int stride) {
44	const uint16x8_t kOne = vdupq_n_u16(`1u`);
45	while (h-- > `0`) {
46	uint32_t* const rgbx = (uint32_t*)rgba;
47	int i = `0`;
48	if (alpha_first) {
49	for (; i + `8` <= w; i += `8`) {
50	// load aaaa...\|rrrr...\|gggg...\|bbbb...
51	uint8x8x4_t RGBX = vld4_u8((const uint8_t*)(rgbx + i));
52	MULTIPLY_BY_ALPHA(RGBX, `0`, `3`);
53	vst4_u8((uint8_t*)(rgbx + i), RGBX);
54	}
55	} else {
56	for (; i + `8` <= w; i += `8`) {
57	uint8x8x4_t RGBX = vld4_u8((const uint8_t*)(rgbx + i));
58	MULTIPLY_BY_ALPHA(RGBX, `3`, `0`);
59	vst4_u8((uint8_t*)(rgbx + i), RGBX);
60	}
61	}
62	// Finish with left-overs.
63	for (; i < w; ++i) {
64	uint8_t* const rgb = rgba + (alpha_first ? `1` : `0`);
65	const uint8_t* const alpha = rgba + (alpha_first ? `0` : `3`);
66	const uint32_t a = alpha[`4` * i];
67	if (a != `0xff`) {
68	const uint32_t mult = MULTIPLIER(a);
69	rgb[`4` * i + `0`] = PREMULTIPLY(rgb[`4` * i + `0`], mult);
70	rgb[`4` * i + `1`] = PREMULTIPLY(rgb[`4` * i + `1`], mult);
71	rgb[`4` * i + `2`] = PREMULTIPLY(rgb[`4` * i + `2`], mult);
72	}
73	}
74	rgba += stride;
75	}
76	}
77	#undef MULTIPLY_BY_ALPHA
78	#undef MULTIPLIER
79	#undef PREMULTIPLY
80
81	//------------------------------------------------------------------------------
82
83	static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha,
84	int alpha_stride, int width, int height,
85	uint8_t* WEBP_RESTRICT dst, int dst_stride) {
86	uint32_t alpha_mask = `0xffu`;
87	uint8x8_t mask8 = vdup_n_u8(`0xff`);
88	uint32_t tmp[`2`];
89	int i, j;
90	for (j = `0`; j < height; ++j) {
91	// We don't know if alpha is first or last in dst[] (depending on rgbA/Argb
92	// mode). So we must be sure dst[4i + 8 - 1] is writable for the store.*
93	// Hence the test with 'width - 1' instead of just 'width'.
94	for (i = `0`; i + `8` <= width - `1`; i += `8`) {
95	uint8x8x4_t rgbX = vld4_u8((const uint8_t)(dst + `4` i));
96	const uint8x8_t alphas = vld1_u8(alpha + i);
97	rgbX.val[`0`] = alphas;
98	vst4_u8((uint8_t)(dst + `4` i), rgbX);
99	mask8 = vand_u8(mask8, alphas);
100	}
101	for (; i < width; ++i) {
102	const uint32_t alpha_value = alpha[i];
103	dst[`4` * i] = alpha_value;
104	alpha_mask &= alpha_value;
105	}
106	alpha += alpha_stride;
107	dst += dst_stride;
108	}
109	vst1_u8((uint8_t*)tmp, mask8);
110	alpha_mask *= `0x01010101`;
111	alpha_mask &= tmp[`0`];
112	alpha_mask &= tmp[`1`];
113	return (alpha_mask != `0xffffffffu`);
114	}
115
116	static void DispatchAlphaToGreen_NEON(const uint8_t* WEBP_RESTRICT alpha,
117	int alpha_stride, int width, int height,
118	uint32_t* WEBP_RESTRICT dst,
119	int dst_stride) {
120	int i, j;
121	uint8x8x4_t greens; // leave A/R/B channels zero'd.
122	greens.val[`0`] = vdup_n_u8(`0`);
123	greens.val[`2`] = vdup_n_u8(`0`);
124	greens.val[`3`] = vdup_n_u8(`0`);
125	for (j = `0`; j < height; ++j) {
126	for (i = `0`; i + `8` <= width; i += `8`) {
127	greens.val[`1`] = vld1_u8(alpha + i);
128	vst4_u8((uint8_t*)(dst + i), greens);
129	}
130	for (; i < width; ++i) dst[i] = alpha[i] << `8`;
131	alpha += alpha_stride;
132	dst += dst_stride;
133	}
134	}
135
136	static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
137	int width, int height,
138	uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
139	uint32_t alpha_mask = `0xffu`;
140	uint8x8_t mask8 = vdup_n_u8(`0xff`);
141	uint32_t tmp[`2`];
142	int i, j;
143	for (j = `0`; j < height; ++j) {
144	// We don't know if alpha is first or last in dst[] (depending on rgbA/Argb
145	// mode). So we must be sure dst[4i + 8 - 1] is writable for the store.*
146	// Hence the test with 'width - 1' instead of just 'width'.
147	for (i = `0`; i + `8` <= width - `1`; i += `8`) {
148	const uint8x8x4_t rgbX = vld4_u8((const uint8_t)(argb + `4` i));
149	const uint8x8_t alphas = rgbX.val[`0`];
150	vst1_u8((uint8_t*)(alpha + i), alphas);
151	mask8 = vand_u8(mask8, alphas);
152	}
153	for (; i < width; ++i) {
154	alpha[i] = argb[`4` * i];
155	alpha_mask &= alpha[i];
156	}
157	argb += argb_stride;
158	alpha += alpha_stride;
159	}
160	vst1_u8((uint8_t*)tmp, mask8);
161	alpha_mask *= `0x01010101`;
162	alpha_mask &= tmp[`0`];
163	alpha_mask &= tmp[`1`];
164	return (alpha_mask == `0xffffffffu`);
165	}
166
167	static void ExtractGreen_NEON(const uint32_t* WEBP_RESTRICT argb,
168	uint8_t* WEBP_RESTRICT alpha, int size) {
169	int i;
170	for (i = `0`; i + `16` <= size; i += `16`) {
171	const uint8x16x4_t rgbX = vld4q_u8((const uint8_t*)(argb + i));
172	const uint8x16_t greens = rgbX.val[`1`];
173	vst1q_u8(alpha + i, greens);
174	}
175	for (; i < size; ++i) alpha[i] = (argb[i] >> `8`) & `0xff`;
176	}
177
178	//------------------------------------------------------------------------------
179
180	extern void WebPInitAlphaProcessingNEON(void);
181
182	WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingNEON(void) {
183	WebPApplyAlphaMultiply = ApplyAlphaMultiply_NEON;
184	WebPDispatchAlpha = DispatchAlpha_NEON;
185	WebPDispatchAlphaToGreen = DispatchAlphaToGreen_NEON;
186	WebPExtractAlpha = ExtractAlpha_NEON;
187	WebPExtractGreen = ExtractGreen_NEON;
188	}
189
190	#else // !WEBP_USE_NEON
191
192	WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingNEON)
193
194	#endif // WEBP_USE_NEON
195

Browse the source code of Godot/thirdparty/libwebp/src/dsp/alpha_processing_neon.c