1// Copyright (c) 2018 Kenton Varda and contributors
2// Licensed under the MIT License:
3//
4// Permission is hereby granted, free of charge, to any person obtaining a copy
5// of this software and associated documentation files (the "Software"), to deal
6// in the Software without restriction, including without limitation the rights
7// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8// copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in
12// all copies or substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20// THE SOFTWARE.
21
22#pragma once
23
24#if defined(__GNUC__) && !KJ_HEADER_WARNINGS
25#pragma GCC system_header
26#endif
27
28#include "string.h"
29
30namespace kj {
31namespace _ { // private
32
33struct HashCoder {
34 // This is a dummy type with only one instance: HASHCODER (below). To make an arbitrary type
35 // hashable, define `operator*(HashCoder, T)` to return any other type that is already hashable.
36 // Be sure to declare the operator in the same namespace as `T` **or** in the global scope.
37 // You can use the KJ_HASHCODE() macro as syntax sugar for this.
38 //
39 // A more usual way to accomplish what we're doing here would be to require that you define
40 // a function like `hashCode(T)` and then rely on argument-dependent lookup. However, this has
41 // the problem that it pollutes other people's namespaces and even the global namespace. For
42 // example, some other project may already have functions called `hashCode` which do something
43 // different. Declaring `operator*` with `HashCoder` as the left operand cannot conflict with
44 // anything.
45
46 uint operator*(ArrayPtr<const byte> s) const;
47 inline uint operator*(ArrayPtr<byte> s) const { return operator*(s.asConst()); }
48
49 inline uint operator*(ArrayPtr<const char> s) const { return operator*(s.asBytes()); }
50 inline uint operator*(ArrayPtr<char> s) const { return operator*(s.asBytes()); }
51 inline uint operator*(const Array<const char>& s) const { return operator*(s.asBytes()); }
52 inline uint operator*(const Array<char>& s) const { return operator*(s.asBytes()); }
53 inline uint operator*(const String& s) const { return operator*(s.asBytes()); }
54 inline uint operator*(const StringPtr& s) const { return operator*(s.asBytes()); }
55
56 inline uint operator*(decltype(nullptr)) const { return 0; }
57 inline uint operator*(bool b) const { return b; }
58 inline uint operator*(char i) const { return i; }
59 inline uint operator*(signed char i) const { return i; }
60 inline uint operator*(unsigned char i) const { return i; }
61 inline uint operator*(signed short i) const { return i; }
62 inline uint operator*(unsigned short i) const { return i; }
63 inline uint operator*(signed int i) const { return i; }
64 inline uint operator*(unsigned int i) const { return i; }
65
66 inline uint operator*(signed long i) const {
67 if (sizeof(i) == sizeof(uint)) {
68 return operator*(static_cast<uint>(i));
69 } else {
70 return operator*(static_cast<unsigned long long>(i));
71 }
72 }
73 inline uint operator*(unsigned long i) const {
74 if (sizeof(i) == sizeof(uint)) {
75 return operator*(static_cast<uint>(i));
76 } else {
77 return operator*(static_cast<unsigned long long>(i));
78 }
79 }
80 inline uint operator*(signed long long i) const {
81 return operator*(static_cast<unsigned long long>(i));
82 }
83 inline uint operator*(unsigned long long i) const {
84 // Mix 64 bits to 32 bits in such a way that if our input values differ primarily in the upper
85 // 32 bits, we still get good diffusion. (I.e. we cannot just truncate!)
86 //
87 // 49123 is an arbitrarily-chosen prime that is vaguely close to 2^16.
88 //
89 // TODO(perf): I just made this up. Is it OK?
90 return static_cast<uint>(i) + static_cast<uint>(i >> 32) * 49123;
91 }
92
93 template <typename T>
94 uint operator*(T* ptr) const {
95 if (sizeof(ptr) == sizeof(uint)) {
96 // TODO(cleanup): In C++17, make the if() above be `if constexpr ()`, then change this to
97 // reinterpret_cast<uint>(ptr).
98 return reinterpret_cast<unsigned long long>(ptr);
99 } else {
100 return operator*(reinterpret_cast<unsigned long long>(ptr));
101 }
102 }
103
104 template <typename T, typename = decltype(instance<const HashCoder&>() * instance<const T&>())>
105 uint operator*(ArrayPtr<T> arr) const;
106 template <typename T, typename = decltype(instance<const HashCoder&>() * instance<const T&>())>
107 uint operator*(const Array<T>& arr) const;
108
109 template <typename T, typename Result = decltype(instance<T>().hashCode())>
110 inline Result operator*(T&& value) const { return kj::fwd<T>(value).hashCode(); }
111};
112static KJ_CONSTEXPR(const) HashCoder HASHCODER = HashCoder();
113
114} // namespace _ (private)
115
116#define KJ_HASHCODE(...) operator*(::kj::_::HashCoder, __VA_ARGS__)
117// Defines a hash function for a custom type. Example:
118//
119// class Foo {...};
120// inline uint KJ_HASHCODE(const Foo& foo) { return kj::hashCode(foo.x, foo.y); }
121//
122// This allows Foo to be passed to hashCode().
123//
124// The function should be declared either in the same namespace as the target type or in the global
125// namespace. It can return any type which itself is hashable -- that value will be hashed in turn
126// until a `uint` comes out.
127
128inline uint hashCode(uint value) { return value; }
129template <typename T>
130inline uint hashCode(T&& value) { return hashCode(_::HASHCODER * kj::fwd<T>(value)); }
131template <typename... T>
132inline uint hashCode(T&&... values) {
133 uint hashes[] = { hashCode(kj::fwd<T>(values))... };
134 return hashCode(kj::ArrayPtr<uint>(hashes).asBytes());
135}
136// kj::hashCode() is a universal hashing function, like kj::str() is a universal stringification
137// function. Throw stuff in, get a hash code.
138//
139// Hash codes may differ between different processes, even running exactly the same code.
140//
141// NOT SUITABLE FOR CRYPTOGRAPHY. This is for hash tables, not crypto.
142
143// =======================================================================================
144// inline implementation details
145
146namespace _ { // private
147
148template <typename T, typename>
149inline uint HashCoder::operator*(ArrayPtr<T> arr) const {
150 // Hash each array element to create a string of hashes, then murmur2 over those.
151 //
152 // TODO(perf): Choose a more-modern hash. (See hash.c++.)
153
154 constexpr uint m = 0x5bd1e995;
155 constexpr uint r = 24;
156 uint h = arr.size() * sizeof(uint);
157
158 for (auto& e: arr) {
159 uint k = kj::hashCode(e);
160 k *= m;
161 k ^= k >> r;
162 k *= m;
163 h *= m;
164 h ^= k;
165 }
166
167 h ^= h >> 13;
168 h *= m;
169 h ^= h >> 15;
170 return h;
171}
172template <typename T, typename>
173inline uint HashCoder::operator*(const Array<T>& arr) const {
174 return operator*(arr.asPtr());
175}
176
177} // namespace _ (private)
178} // namespace kj
179