1 | /* ****************************************************************** |
2 | * hist : Histogram functions |
3 | * part of Finite State Entropy project |
4 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
5 | * |
6 | * You can contact the author at : |
7 | * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy |
8 | * - Public forum : https://groups.google.com/forum/#!forum/lz4c |
9 | * |
10 | * This source code is licensed under both the BSD-style license (found in the |
11 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
12 | * in the COPYING file in the root directory of this source tree). |
13 | * You may select, at your option, one of the above-listed licenses. |
14 | ****************************************************************** */ |
15 | |
16 | /* --- dependencies --- */ |
17 | #include "../common/zstd_deps.h" /* size_t */ |
18 | |
19 | |
20 | /* --- simple histogram functions --- */ |
21 | |
22 | /*! HIST_count(): |
23 | * Provides the precise count of each byte within a table 'count'. |
24 | * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). |
25 | * Updates *maxSymbolValuePtr with actual largest symbol value detected. |
26 | * @return : count of the most frequent symbol (which isn't identified). |
27 | * or an error code, which can be tested using HIST_isError(). |
28 | * note : if return == srcSize, there is only one symbol. |
29 | */ |
30 | size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, |
31 | const void* src, size_t srcSize); |
32 | |
33 | unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */ |
34 | |
35 | |
36 | /* --- advanced histogram functions --- */ |
37 | |
38 | #define HIST_WKSP_SIZE_U32 1024 |
39 | #define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned)) |
40 | /** HIST_count_wksp() : |
41 | * Same as HIST_count(), but using an externally provided scratch buffer. |
42 | * Benefit is this function will use very little stack space. |
43 | * `workSpace` is a writable buffer which must be 4-bytes aligned, |
44 | * `workSpaceSize` must be >= HIST_WKSP_SIZE |
45 | */ |
46 | size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, |
47 | const void* src, size_t srcSize, |
48 | void* workSpace, size_t workSpaceSize); |
49 | |
50 | /** HIST_countFast() : |
51 | * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. |
52 | * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` |
53 | */ |
54 | size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, |
55 | const void* src, size_t srcSize); |
56 | |
57 | /** HIST_countFast_wksp() : |
58 | * Same as HIST_countFast(), but using an externally provided scratch buffer. |
59 | * `workSpace` is a writable buffer which must be 4-bytes aligned, |
60 | * `workSpaceSize` must be >= HIST_WKSP_SIZE |
61 | */ |
62 | size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, |
63 | const void* src, size_t srcSize, |
64 | void* workSpace, size_t workSpaceSize); |
65 | |
66 | /*! HIST_count_simple() : |
67 | * Same as HIST_countFast(), this function is unsafe, |
68 | * and will segfault if any value within `src` is `> *maxSymbolValuePtr`. |
69 | * It is also a bit slower for large inputs. |
70 | * However, it does not need any additional memory (not even on stack). |
71 | * @return : count of the most frequent symbol. |
72 | * Note this function doesn't produce any error (i.e. it must succeed). |
73 | */ |
74 | unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, |
75 | const void* src, size_t srcSize); |
76 | |