| 1 | /* |
| 2 | * Copyright (c) 2015-2016, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | /** \file |
| 30 | * \brief Uniformly-named primitives named by target type. |
| 31 | * |
| 32 | * The following are a set of primitives named by target type, so that we can |
| 33 | * macro the hell out of all our NFA implementations. Hurrah! |
| 34 | */ |
| 35 | |
| 36 | #ifndef UNIFORM_OPS_H |
| 37 | #define UNIFORM_OPS_H |
| 38 | |
| 39 | #include "ue2common.h" |
| 40 | #include "simd_utils.h" |
| 41 | #include "unaligned.h" |
| 42 | |
| 43 | // Aligned loads |
| 44 | #define load_u8(a) (*(const u8 *)(a)) |
| 45 | #define load_u16(a) (*(const u16 *)(a)) |
| 46 | #define load_u32(a) (*(const u32 *)(a)) |
| 47 | #define load_u64a(a) (*(const u64a *)(a)) |
| 48 | #define load_m128(a) load128(a) |
| 49 | #define load_m256(a) load256(a) |
| 50 | #define load_m384(a) load384(a) |
| 51 | #define load_m512(a) load512(a) |
| 52 | |
| 53 | // Unaligned loads |
| 54 | #define loadu_u8(a) (*(const u8 *)(a)) |
| 55 | #define loadu_u16(a) unaligned_load_u16((const u8 *)(a)) |
| 56 | #define loadu_u32(a) unaligned_load_u32((const u8 *)(a)) |
| 57 | #define loadu_u64a(a) unaligned_load_u64a((const u8 *)(a)) |
| 58 | #define loadu_m128(a) loadu128(a) |
| 59 | #define loadu_m256(a) loadu256(a) |
| 60 | #define loadu_m384(a) loadu384(a) |
| 61 | #define loadu_m512(a) loadu512(a) |
| 62 | |
| 63 | // Aligned stores |
| 64 | #define store_u8(ptr, a) do { *(u8 *)(ptr) = (a); } while(0) |
| 65 | #define store_u16(ptr, a) do { *(u16 *)(ptr) = (a); } while(0) |
| 66 | #define store_u32(ptr, a) do { *(u32 *)(ptr) = (a); } while(0) |
| 67 | #define store_u64a(ptr, a) do { *(u64a *)(ptr) = (a); } while(0) |
| 68 | #define store_m128(ptr, a) store128(ptr, a) |
| 69 | #define store_m256(ptr, a) store256(ptr, a) |
| 70 | #define store_m384(ptr, a) store384(ptr, a) |
| 71 | #define store_m512(ptr, a) store512(ptr, a) |
| 72 | |
| 73 | // Unaligned stores |
| 74 | #define storeu_u8(ptr, a) do { *(u8 *)(ptr) = (a); } while(0) |
| 75 | #define storeu_u16(ptr, a) unaligned_store_u16(ptr, a) |
| 76 | #define storeu_u32(ptr, a) unaligned_store_u32(ptr, a) |
| 77 | #define storeu_u64a(ptr, a) unaligned_store_u64a(ptr, a) |
| 78 | #define storeu_m128(ptr, a) storeu128(ptr, a) |
| 79 | |
| 80 | #define zero_u8 0 |
| 81 | #define zero_u32 0 |
| 82 | #define zero_u64a 0 |
| 83 | #define zero_m128 zeroes128() |
| 84 | #define zero_m256 zeroes256() |
| 85 | #define zero_m384 zeroes384() |
| 86 | #define zero_m512 zeroes512() |
| 87 | |
| 88 | #define ones_u8 0xff |
| 89 | #define ones_u32 0xfffffffful |
| 90 | #define ones_u64a 0xffffffffffffffffull |
| 91 | #define ones_m128 ones128() |
| 92 | #define ones_m256 ones256() |
| 93 | #define ones_m384 ones384() |
| 94 | #define ones_m512 ones512() |
| 95 | |
| 96 | #define or_u8(a, b) ((a) | (b)) |
| 97 | #define or_u32(a, b) ((a) | (b)) |
| 98 | #define or_u64a(a, b) ((a) | (b)) |
| 99 | #define or_m128(a, b) (or128(a, b)) |
| 100 | #define or_m256(a, b) (or256(a, b)) |
| 101 | #define or_m384(a, b) (or384(a, b)) |
| 102 | #define or_m512(a, b) (or512(a, b)) |
| 103 | |
| 104 | #define and_u8(a, b) ((a) & (b)) |
| 105 | #define and_u32(a, b) ((a) & (b)) |
| 106 | #define and_u64a(a, b) ((a) & (b)) |
| 107 | #define and_m128(a, b) (and128(a, b)) |
| 108 | #define and_m256(a, b) (and256(a, b)) |
| 109 | #define and_m384(a, b) (and384(a, b)) |
| 110 | #define and_m512(a, b) (and512(a, b)) |
| 111 | |
| 112 | #define not_u8(a) (~(a)) |
| 113 | #define not_u32(a) (~(a)) |
| 114 | #define not_u64a(a) (~(a)) |
| 115 | #define not_m128(a) (not128(a)) |
| 116 | #define not_m256(a) (not256(a)) |
| 117 | #define not_m384(a) (not384(a)) |
| 118 | #define not_m512(a) (not512(a)) |
| 119 | |
| 120 | #define andnot_u8(a, b) ((~(a)) & (b)) |
| 121 | #define andnot_u32(a, b) ((~(a)) & (b)) |
| 122 | #define andnot_u64a(a, b) ((~(a)) & (b)) |
| 123 | #define andnot_m128(a, b) (andnot128(a, b)) |
| 124 | #define andnot_m256(a, b) (andnot256(a, b)) |
| 125 | #define andnot_m384(a, b) (andnot384(a, b)) |
| 126 | #define andnot_m512(a, b) (andnot512(a, b)) |
| 127 | |
| 128 | #define lshift_u32(a, b) ((a) << (b)) |
| 129 | #define lshift_u64a(a, b) ((a) << (b)) |
| 130 | #define lshift_m128(a, b) (lshift64_m128(a, b)) |
| 131 | #define lshift_m256(a, b) (lshift64_m256(a, b)) |
| 132 | #define lshift_m384(a, b) (lshift64_m384(a, b)) |
| 133 | #define lshift_m512(a, b) (lshift64_m512(a, b)) |
| 134 | |
| 135 | #define isZero_u8(a) ((a) == 0) |
| 136 | #define isZero_u32(a) ((a) == 0) |
| 137 | #define isZero_u64a(a) ((a) == 0) |
| 138 | #define isZero_m128(a) (!isnonzero128(a)) |
| 139 | #define isZero_m256(a) (!isnonzero256(a)) |
| 140 | #define isZero_m384(a) (!isnonzero384(a)) |
| 141 | #define isZero_m512(a) (!isnonzero512(a)) |
| 142 | |
| 143 | #define isNonZero_u8(a) ((a) != 0) |
| 144 | #define isNonZero_u32(a) ((a) != 0) |
| 145 | #define isNonZero_u64a(a) ((a) != 0) |
| 146 | #define isNonZero_m128(a) (isnonzero128(a)) |
| 147 | #define isNonZero_m256(a) (isnonzero256(a)) |
| 148 | #define isNonZero_m384(a) (isnonzero384(a)) |
| 149 | #define isNonZero_m512(a) (isnonzero512(a)) |
| 150 | |
| 151 | #define diffrich_u32(a, b) ((a) != (b)) |
| 152 | #define diffrich_u64a(a, b) ((a) != (b) ? 3 : 0) //TODO: impl 32bit granularity |
| 153 | #define diffrich_m128(a, b) (diffrich128(a, b)) |
| 154 | #define diffrich_m256(a, b) (diffrich256(a, b)) |
| 155 | #define diffrich_m384(a, b) (diffrich384(a, b)) |
| 156 | #define diffrich_m512(a, b) (diffrich512(a, b)) |
| 157 | |
| 158 | #define diffrich64_u32(a, b) ((a) != (b)) |
| 159 | #define diffrich64_u64a(a, b) ((a) != (b) ? 1 : 0) |
| 160 | #define diffrich64_m128(a, b) (diffrich64_128(a, b)) |
| 161 | #define diffrich64_m256(a, b) (diffrich64_256(a, b)) |
| 162 | #define diffrich64_m384(a, b) (diffrich64_384(a, b)) |
| 163 | #define diffrich64_m512(a, b) (diffrich64_512(a, b)) |
| 164 | |
| 165 | #define noteq_u8(a, b) ((a) != (b)) |
| 166 | #define noteq_u32(a, b) ((a) != (b)) |
| 167 | #define noteq_u64a(a, b) ((a) != (b)) |
| 168 | #define noteq_m128(a, b) (diff128(a, b)) |
| 169 | #define noteq_m256(a, b) (diff256(a, b)) |
| 170 | #define noteq_m384(a, b) (diff384(a, b)) |
| 171 | #define noteq_m512(a, b) (diff512(a, b)) |
| 172 | |
| 173 | #define partial_store_m128(ptr, v, sz) storebytes128(ptr, v, sz) |
| 174 | #define partial_store_m256(ptr, v, sz) storebytes256(ptr, v, sz) |
| 175 | #define partial_store_m384(ptr, v, sz) storebytes384(ptr, v, sz) |
| 176 | #define partial_store_m512(ptr, v, sz) storebytes512(ptr, v, sz) |
| 177 | |
| 178 | #define partial_load_m128(ptr, sz) loadbytes128(ptr, sz) |
| 179 | #define partial_load_m256(ptr, sz) loadbytes256(ptr, sz) |
| 180 | #define partial_load_m384(ptr, sz) loadbytes384(ptr, sz) |
| 181 | #define partial_load_m512(ptr, sz) loadbytes512(ptr, sz) |
| 182 | |
| 183 | #define store_compressed_u32(ptr, x, m, len) storecompressed32(ptr, x, m, len) |
| 184 | #define store_compressed_u64a(ptr, x, m, len) storecompressed64(ptr, x, m, len) |
| 185 | #define store_compressed_m128(ptr, x, m, len) storecompressed128(ptr, x, m, len) |
| 186 | #define store_compressed_m256(ptr, x, m, len) storecompressed256(ptr, x, m, len) |
| 187 | #define store_compressed_m384(ptr, x, m, len) storecompressed384(ptr, x, m, len) |
| 188 | #define store_compressed_m512(ptr, x, m, len) storecompressed512(ptr, x, m, len) |
| 189 | |
| 190 | #define load_compressed_u32(x, ptr, m, len) loadcompressed32(x, ptr, m, len) |
| 191 | #define load_compressed_u64a(x, ptr, m, len) loadcompressed64(x, ptr, m, len) |
| 192 | #define load_compressed_m128(x, ptr, m, len) loadcompressed128(x, ptr, m, len) |
| 193 | #define load_compressed_m256(x, ptr, m, len) loadcompressed256(x, ptr, m, len) |
| 194 | #define load_compressed_m384(x, ptr, m, len) loadcompressed384(x, ptr, m, len) |
| 195 | #define load_compressed_m512(x, ptr, m, len) loadcompressed512(x, ptr, m, len) |
| 196 | |
| 197 | static really_inline |
| 198 | void clearbit_u32(u32 *p, u32 n) { |
| 199 | assert(n < sizeof(*p) * 8); |
| 200 | *p &= ~(1U << n); |
| 201 | } |
| 202 | |
| 203 | static really_inline |
| 204 | void clearbit_u64a(u64a *p, u32 n) { |
| 205 | assert(n < sizeof(*p) * 8); |
| 206 | *p &= ~(1ULL << n); |
| 207 | } |
| 208 | |
| 209 | #define clearbit_m128(ptr, n) (clearbit128(ptr, n)) |
| 210 | #define clearbit_m256(ptr, n) (clearbit256(ptr, n)) |
| 211 | #define clearbit_m384(ptr, n) (clearbit384(ptr, n)) |
| 212 | #define clearbit_m512(ptr, n) (clearbit512(ptr, n)) |
| 213 | |
| 214 | static really_inline |
| 215 | char testbit_u32(u32 val, u32 n) { |
| 216 | assert(n < sizeof(val) * 8); |
| 217 | return !!(val & (1U << n)); |
| 218 | } |
| 219 | |
| 220 | static really_inline |
| 221 | char testbit_u64a(u64a val, u32 n) { |
| 222 | assert(n < sizeof(val) * 8); |
| 223 | return !!(val & (1ULL << n)); |
| 224 | } |
| 225 | |
| 226 | #define testbit_m128(val, n) (testbit128(val, n)) |
| 227 | #define testbit_m256(val, n) (testbit256(val, n)) |
| 228 | #define testbit_m384(val, n) (testbit384(val, n)) |
| 229 | #define testbit_m512(val, n) (testbit512(val, n)) |
| 230 | |
| 231 | #endif |
| 232 | |