1/*-------------------------------------------------------------------------
2 *
3 * pg_crc32c_sse42.c
4 * Compute CRC-32C checksum using Intel SSE 4.2 instructions.
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/port/pg_crc32c_sse42.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "c.h"
16
17#include "port/pg_crc32c.h"
18
19#include <nmmintrin.h>
20
21pg_crc32c
22pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len)
23{
24 const unsigned char *p = data;
25 const unsigned char *pend = p + len;
26
27 /*
28 * Process eight bytes of data at a time.
29 *
30 * NB: We do unaligned accesses here. The Intel architecture allows that,
31 * and performance testing didn't show any performance gain from aligning
32 * the begin address.
33 */
34#ifdef __x86_64__
35 while (p + 8 <= pend)
36 {
37 crc = (uint32) _mm_crc32_u64(crc, *((const uint64 *) p));
38 p += 8;
39 }
40
41 /* Process remaining full four bytes if any */
42 if (p + 4 <= pend)
43 {
44 crc = _mm_crc32_u32(crc, *((const unsigned int *) p));
45 p += 4;
46 }
47#else
48
49 /*
50 * Process four bytes at a time. (The eight byte instruction is not
51 * available on the 32-bit x86 architecture).
52 */
53 while (p + 4 <= pend)
54 {
55 crc = _mm_crc32_u32(crc, *((const unsigned int *) p));
56 p += 4;
57 }
58#endif /* __x86_64__ */
59
60 /* Process any remaining bytes one at a time. */
61 while (p < pend)
62 {
63 crc = _mm_crc32_u8(crc, *p);
64 p++;
65 }
66
67 return crc;
68}
69