| 1 | /* |
| 2 | * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. |
| 8 | * |
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 12 | * version 2 for more details (a copy is included in the LICENSE file that |
| 13 | * accompanied this code). |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License version |
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | * |
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 20 | * or visit www.oracle.com if you need additional information or have any |
| 21 | * questions. |
| 22 | * |
| 23 | */ |
| 24 | |
| 25 | #ifndef CPU_X86_CRC32C_H |
| 26 | #define CPU_X86_CRC32C_H |
| 27 | |
| 28 | enum { |
| 29 | // S. Gueron / Information Processing Letters 112 (2012) 184 |
| 30 | // shows than anything above 6K and below 32K is a good choice |
| 31 | // 32K does not deliver any further performance gains |
| 32 | // 6K=8*256 (*3 as we compute 3 blocks together) |
| 33 | // |
| 34 | // Thus selecting the smallest value so it could apply to the largest number |
| 35 | // of buffer sizes. |
| 36 | CRC32C_HIGH = 8 * 256, |
| 37 | |
| 38 | // empirical |
| 39 | // based on ubench study using methodology described in |
| 40 | // V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 8 |
| 41 | // |
| 42 | // arbitrary value between 27 and 256 |
| 43 | CRC32C_MIDDLE = 8 * 86, |
| 44 | |
| 45 | // V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 9 |
| 46 | // shows that 240 and 1024 are equally good choices as the 216==8*27 |
| 47 | // |
| 48 | // Selecting the smallest value which resulted in a significant performance improvement over |
| 49 | // sequential version |
| 50 | CRC32C_LOW = 8 * 27, |
| 51 | |
| 52 | CRC32C_NUM_ChunkSizeInBytes = 3, |
| 53 | |
| 54 | // We need to compute powers of 64N and 128N for each "chunk" size |
| 55 | CRC32C_NUM_PRECOMPUTED_CONSTANTS = ( 2 * CRC32C_NUM_ChunkSizeInBytes ) |
| 56 | }; |
| 57 | // Notes: |
| 58 | // 1. Why we need to choose a "chunk" approach? |
| 59 | // Overhead of computing a powers and powers of for an arbitrary buffer of size N is significant |
| 60 | // (implementation approaches a library perf.) |
| 61 | // 2. Why only 3 "chunks"? |
| 62 | // Performance experiments results showed that a HIGH+LOW was not delivering a stable speedup |
| 63 | // curve. |
| 64 | // |
| 65 | // Disclaimer: |
| 66 | // If you ever decide to increase/decrease number of "chunks" be sure to modify |
| 67 | // a) constants table generation (hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp) |
| 68 | // b) constant fetch from that table (macroAssembler_x86.cpp) |
| 69 | // c) unrolled for loop (macroAssembler_x86.cpp) |
| 70 | |
| 71 | #endif /* !CPU_X86_CRC32C_H */ |
| 72 | |