1 | |
---|---|
2 | /* intel_init.c - SSE2 optimized filter functions |
3 | * |
4 | * Copyright (c) 2018 Cosmin Truta |
5 | * Copyright (c) 2016-2017 Glenn Randers-Pehrson |
6 | * Written by Mike Klein and Matt Sarett, Google, Inc. |
7 | * Derived from arm/arm_init.c |
8 | * |
9 | * This code is released under the libpng license. |
10 | * For conditions of distribution and use, see the disclaimer |
11 | * and license in png.h |
12 | */ |
13 | |
14 | #include "../pngpriv.h" |
15 | |
16 | #ifdef PNG_READ_SUPPORTED |
17 | #if PNG_INTEL_SSE_IMPLEMENTATION > 0 |
18 | |
19 | void |
20 | png_init_filter_functions_sse2(png_structp pp, unsigned int bpp) |
21 | { |
22 | /* The techniques used to implement each of these filters in SSE operate on |
23 | * one pixel at a time. |
24 | * So they generally speed up 3bpp images about 3x, 4bpp images about 4x. |
25 | * They can scale up to 6 and 8 bpp images and down to 2 bpp images, |
26 | * but they'd not likely have any benefit for 1bpp images. |
27 | * Most of these can be implemented using only MMX and 64-bit registers, |
28 | * but they end up a bit slower than using the equally-ubiquitous SSE2. |
29 | */ |
30 | png_debug(1, "in png_init_filter_functions_sse2"); |
31 | if (bpp == 3) |
32 | { |
33 | pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2; |
34 | pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_sse2; |
35 | pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = |
36 | png_read_filter_row_paeth3_sse2; |
37 | } |
38 | else if (bpp == 4) |
39 | { |
40 | pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_sse2; |
41 | pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_sse2; |
42 | pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = |
43 | png_read_filter_row_paeth4_sse2; |
44 | } |
45 | |
46 | /* No need optimize PNG_FILTER_VALUE_UP. The compiler should |
47 | * autovectorize. |
48 | */ |
49 | } |
50 | |
51 | #endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */ |
52 | #endif /* PNG_READ_SUPPORTED */ |
53 |