tbprobe.cpp source code [Stockfish/syzygy/tbprobe.cpp]

1	/*
2	Stockfish, a UCI chess playing engine derived from Glaurung 2.1
3	Copyright (c) 2013 Ronald de Man
4	Copyright (C) 2016-2019 Marco Costalba, Lucas Braesch
5
6	Stockfish is free software: you can redistribute it and/or modify
7	it under the terms of the GNU General Public License as published by
8	the Free Software Foundation, either version 3 of the License, or
9	(at your option) any later version.
10
11	Stockfish is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	GNU General Public License for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with this program. If not, see <http://www.gnu.org/licenses/>.
18	*/
19
20	#include <algorithm>
21	#include <atomic>
22	#include <cstdint>
23	#include <cstring> // For std::memset and std::memcpy
24	#include <deque>
25	#include <fstream>
26	#include <iostream>
27	#include <list>
28	#include <sstream>
29	#include <type_traits>
30
31	#include "../bitboard.h"
32	#include "../movegen.h"
33	#include "../position.h"
34	#include "../search.h"
35	#include "../thread_win32_osx.h"
36	#include "../types.h"
37	#include "../uci.h"
38
39	#include "tbprobe.h"
40
41	#ifndef _WIN32
42	#include <fcntl.h>
43	#include <unistd.h>
44	#include <sys/mman.h>
45	#include <sys/stat.h>
46	#else
47	#define WIN32_LEAN_AND_MEAN
48	#define NOMINMAX
49	#include <windows.h>
50	#endif
51
52	using namespace Tablebases;
53
54	int Tablebases::MaxCardinality;
55
56	namespace {
57
58	constexpr int TBPIECES = `7`; // Max number of supported pieces
59
60	enum { BigEndian, LittleEndian };
61	enum TBType { KEY, WDL, DTZ }; // Used as template parameter
62
63	// Each table has a set of flags: all of them refer to DTZ tables, the last one to WDL tables
64	enum TBFlag { STM = `1`, Mapped = `2`, WinPlies = `4`, LossPlies = `8`, Wide = `16`, SingleValue = `128` };
65
66	inline WDLScore operator-(WDLScore d) { return WDLScore(-int(d)); }
67	inline Square operator^=(Square& s, int i) { return s = Square(int(s) ^ i); }
68	inline Square operator^(Square s, int i) { return Square(int(s) ^ i); }
69
70	const std::string PieceToChar = " PNBRQK pnbrqk";
71
72	int MapPawns[SQUARE_NB];
73	int MapB1H1H7[SQUARE_NB];
74	int MapA1D1D4[SQUARE_NB];
75	int MapKK[`10`][SQUARE_NB]; // [MapA1D1D4][SQUARE_NB]
76
77	int Binomial[`6`][SQUARE_NB]; // [k][n] k elements from a set of n elements
78	int LeadPawnIdx[`6`][SQUARE_NB]; // [leadPawnsCnt][SQUARE_NB]
79	int LeadPawnsSize[`6`][`4`]; // [leadPawnsCnt][FILE_A..FILE_D]
80
81	// Comparison function to sort leading pawns in ascending MapPawns[] order
82	bool pawns_comp(Square i, Square j) { return MapPawns[i] < MapPawns[j]; }
83	int off_A1H8(Square sq) { return int(rank_of(sq)) - file_of(sq); }
84
85	constexpr Value WDL_to_value[] = {
86	-VALUE_MATE + MAX_PLY + `1`,
87	VALUE_DRAW - `2`,
88	VALUE_DRAW,
89	VALUE_DRAW + `2`,
90	VALUE_MATE - MAX_PLY - `1`
91	};
92
93	template<typename T, int Half = sizeof(T) / `2`, int End = sizeof(T) - `1`>
94	inline void swap_endian(T& x)
95	{
96	static_assert(std::is_unsigned<T>::value, "Argument of swap_endian not unsigned");
97
98	uint8_t tmp, c = (uint8_t)&x;
99	for (int i = `0`; i < Half; ++i)
100	tmp = c[i], c[i] = c[End - i], c[End - i] = tmp;
101	}
102	template<> inline void swap_endian<uint8_t>(uint8_t&) {}
103
104	template<typename T, int LE> T number(void* addr)
105	{
106	static const union { uint32_t i; char c[`4`]; } Le = { `0x01020304` };
107	static const bool IsLittleEndian = (Le.c[`0`] == `4`);
108
109	T v;
110
111	if ((uintptr_t)addr & (alignof(T) - `1`)) // Unaligned pointer (very rare)
112	std::memcpy(&v, addr, sizeof(T));
113	else
114	v = ((T)addr);
115
116	if (LE != IsLittleEndian)
117	swap_endian(v);
118	return v;
119	}
120
121	// DTZ tables don't store valid scores for moves that reset the rule50 counter
122	// like captures and pawn moves but we can easily recover the correct dtz of the
123	// previous move if we know the position's WDL score.
124	int dtz_before_zeroing(WDLScore wdl) {
125	return wdl == WDLWin ? `1` :
126	wdl == WDLCursedWin ? `101` :
127	wdl == WDLBlessedLoss ? -`101` :
128	wdl == WDLLoss ? -`1` : `0`;
129	}
130
131	// Return the sign of a number (-1, 0, 1)
132	template <typename T> int sign_of(T val) {
133	return (T(`0`) < val) - (val < T(`0`));
134	}
135
136	// Numbers in little endian used by sparseIndex[] to point into blockLength[]
137	struct SparseEntry {
138	char block[`4`]; // Number of block
139	char offset[`2`]; // Offset within the block
140	};
141
142	static_assert(sizeof(SparseEntry) == `6`, "SparseEntry must be 6 bytes");
143
144	typedef uint16_t Sym; // Huffman symbol
145
146	struct LR {
147	enum Side { Left, Right };
148
149	uint8_t lr[`3`]; // The first 12 bits is the left-hand symbol, the second 12
150	// bits is the right-hand symbol. If symbol has length 1,
151	// then the left-hand symbol is the stored value.
152	template<Side S>
153	Sym get() {
154	return S == Left ? ((lr[`1`] & `0xF`) << `8`) \| lr[`0`] :
155	S == Right ? (lr[`2`] << `4`) \| (lr[`1`] >> `4`) : (assert(false), Sym(-`1`));
156	}
157	};
158
159	static_assert(sizeof(LR) == `3`, "LR tree entry must be 3 bytes");
160
161	// Tablebases data layout is structured as following:
162	//
163	// TBFile: memory maps/unmaps the physical .rtbw and .rtbz files
164	// TBTable: one object for each file with corresponding indexing information
165	// TBTables: has ownership of TBTable objects, keeping a list and a hash
166
167	// class TBFile memory maps/unmaps the single .rtbw and .rtbz files. Files are
168	// memory mapped for best performance. Files are mapped at first access: at init
169	// time only existence of the file is checked.
170	class TBFile : public std::ifstream {
171
172	std::string fname;
173
174	public:
175	// Look for and open the file among the Paths directories where the .rtbw
176	// and .rtbz files can be found. Multiple directories are separated by ";"
177	// on Windows and by ":" on Unix-based operating systems.
178	//
179	// Example:
180	// C:\tb\wdl345;C:\tb\wdl6;D:\tb\dtz345;D:\tb\dtz6
181	static std::string Paths;
182
183	TBFile(const std::string& f) {
184
185	#ifndef _WIN32
186	constexpr char SepChar = `':'`;
187	#else
188	constexpr char SepChar = `';'`;
189	#endif
190	std::stringstream ss(Paths);
191	std::string path;
192
193	while (std::getline(ss, path, SepChar)) {
194	fname = path + "/" + f;
195	std::ifstream::open(fname);
196	if (is_open())
197	return;
198	}
199	}
200
201	// Memory map the file and check it. File should be already open and will be
202	// closed after mapping.
203	uint8_t* map(void** baseAddress, uint64_t* mapping, TBType type) {
204
205	assert(is_open());
206
207	close(); // Need to re-open to get native file descriptor
208
209	#ifndef _WIN32
210	struct stat statbuf;
211	int fd = ::open(fname.c_str(), O_RDONLY);
212
213	if (fd == -`1`)
214	return baseAddress = nullptr, nullptr*;
215
216	fstat(fd, &statbuf);
217
218	if (statbuf.st_size % `64` != `16`)
219	{
220	std::cerr << "Corrupt tablebase file " << fname << std::endl;
221	exit(EXIT_FAILURE);
222	}
223
224	*mapping = statbuf.st_size;
225	baseAddress = mmap(nullptr*, statbuf.st_size, PROT_READ, MAP_SHARED, fd, `0`);
226	madvise(*baseAddress, statbuf.st_size, MADV_RANDOM);
227	::close(fd);
228
229	if (*baseAddress == MAP_FAILED)
230	{
231	std::cerr << "Could not mmap() " << fname << std::endl;
232	exit(EXIT_FAILURE);
233	}
234	#else
235	// Note FILE_FLAG_RANDOM_ACCESS is only a hint to Windows and as such may get ignored.
236	HANDLE fd = CreateFile(fname.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr,
237	OPEN_EXISTING, FILE_FLAG_RANDOM_ACCESS, nullptr);
238
239	if (fd == INVALID_HANDLE_VALUE)
240	return baseAddress = nullptr, nullptr*;
241
242	DWORD size_high;
243	DWORD size_low = GetFileSize(fd, &size_high);
244
245	if (size_low % `64` != `16`)
246	{
247	std::cerr << "Corrupt tablebase file " << fname << std::endl;
248	exit(EXIT_FAILURE);
249	}
250
251	HANDLE mmap = CreateFileMapping(fd, nullptr, PAGE_READONLY, size_high, size_low, nullptr);
252	CloseHandle(fd);
253
254	if (!mmap)
255	{
256	std::cerr << "CreateFileMapping() failed" << std::endl;
257	exit(EXIT_FAILURE);
258	}
259
260	*mapping = (uint64_t)mmap;
261	*baseAddress = MapViewOfFile(mmap, FILE_MAP_READ, `0`, `0`, `0`);
262
263	if (!*baseAddress)
264	{
265	std::cerr << "MapViewOfFile() failed, name = " << fname
266	<< ", error = " << GetLastError() << std::endl;
267	exit(EXIT_FAILURE);
268	}
269	#endif
270	uint8_t* data = (uint8_t)baseAddress;
271
272	constexpr uint8_t Magics[][`4`] = { { `0xD7`, `0x66`, `0x0C`, `0xA5` },
273	{ `0x71`, `0xE8`, `0x23`, `0x5D` } };
274
275	if (memcmp(data, Magics[type == WDL], `4`))
276	{
277	std::cerr << "Corrupted table in file " << fname << std::endl;
278	unmap(baseAddress, mapping);
279	return baseAddress = nullptr, nullptr*;
280	}
281
282	return data + `4`; // Skip Magics's header
283	}
284
285	static void unmap(void* baseAddress, uint64_t mapping) {
286
287	#ifndef _WIN32
288	munmap(baseAddress, mapping);
289	#else
290	UnmapViewOfFile(baseAddress);
291	CloseHandle((HANDLE)mapping);
292	#endif
293	}
294	};
295
296	std::string TBFile::Paths;
297
298	// struct PairsData contains low level indexing information to access TB data.
299	// There are 8, 4 or 2 PairsData records for each TBTable, according to type of
300	// table and if positions have pawns or not. It is populated at first access.
301	struct PairsData {
302	uint8_t flags; // Table flags, see enum TBFlag
303	uint8_t maxSymLen; // Maximum length in bits of the Huffman symbols
304	uint8_t minSymLen; // Minimum length in bits of the Huffman symbols
305	uint32_t blocksNum; // Number of blocks in the TB file
306	size_t sizeofBlock; // Block size in bytes
307	size_t span; // About every span values there is a SparseIndex[] entry
308	Sym* lowestSym; // lowestSym[l] is the symbol of length l with the lowest value
309	LR* btree; // btree[sym] stores the left and right symbols that expand sym
310	uint16_t* blockLength; // Number of stored positions (minus one) for each block: 1..65536
311	uint32_t blockLengthSize; // Size of blockLength[] table: padded so it's bigger than blocksNum
312	SparseEntry* sparseIndex; // Partial indices into blockLength[]
313	size_t sparseIndexSize; // Size of SparseIndex[] table
314	uint8_t* data; // Start of Huffman compressed data
315	std::vector<uint64_t> base64; // base64[l - min_sym_len] is the 64bit-padded lowest symbol of length l
316	std::vector<uint8_t> symlen; // Number of values (-1) represented by a given Huffman symbol: 1..256
317	Piece pieces[TBPIECES]; // Position pieces: the order of pieces defines the groups
318	uint64_t groupIdx[TBPIECES+`1`]; // Start index used for the encoding of the group's pieces
319	int groupLen[TBPIECES+`1`]; // Number of pieces in a given group: KRKN -> (3, 1)
320	uint16_t map_idx[`4`]; // WDLWin, WDLLoss, WDLCursedWin, WDLBlessedLoss (used in DTZ)
321	};
322
323	// struct TBTable contains indexing information to access the corresponding TBFile.
324	// There are 2 types of TBTable, corresponding to a WDL or a DTZ file. TBTable
325	// is populated at init time but the nested PairsData records are populated at
326	// first access, when the corresponding file is memory mapped.
327	template<TBType Type>
328	struct TBTable {
329	typedef typename std::conditional<Type == WDL, WDLScore, int>::type Ret;
330
331	static constexpr int Sides = Type == WDL ? `2` : `1`;
332
333	std::atomic_bool ready;
334	void* baseAddress;
335	uint8_t* map;
336	uint64_t mapping;
337	Key key;
338	Key key2;
339	int pieceCount;
340	bool hasPawns;
341	bool hasUniquePieces;
342	uint8_t pawnCount[`2`]; // [Lead color / other color]
343	PairsData items[Sides][`4`]; // [wtm / btm][FILE_A..FILE_D or 0]
344
345	PairsData* get(int stm, int f) {
346	return &items[stm % Sides][hasPawns ? f : `0`];
347	}
348
349	TBTable() : ready (false), baseAddress(nullptr) {}
350	explicit TBTable(const std::string& code);
351	explicit TBTable(const TBTable<WDL>& wdl);
352
353	~TBTable() {
354	if (baseAddress)
355	TBFile::unmap(baseAddress, mapping);
356	}
357	};
358
359	template<>
360	TBTable<WDL>::TBTable(const std::string& code) : TBTable () {
361
362	StateInfo st;
363	Position pos;
364
365	key = pos.set(code, WHITE, &st).material_key();
366	pieceCount = pos.count<ALL_PIECES>();
367	hasPawns = pos.pieces(PAWN);
368
369	hasUniquePieces = false;
370	for (Color c : { WHITE, BLACK })
371	for (PieceType pt = PAWN; pt < KING; ++pt)
372	if (popcount(pos.pieces(c, pt)) == `1`)
373	hasUniquePieces = true;
374
375	// Set the leading color. In case both sides have pawns the leading color
376	// is the side with less pawns because this leads to better compression.
377	bool c = !pos.count<PAWN>(BLACK)
378	\|\| ( pos.count<PAWN>(WHITE)
379	&& pos.count<PAWN>(BLACK) >= pos.count<PAWN>(WHITE));
380
381	pawnCount[`0`] = pos.count<PAWN>(c ? WHITE : BLACK);
382	pawnCount[`1`] = pos.count<PAWN>(c ? BLACK : WHITE);
383
384	key2 = pos.set(code, BLACK, &st).material_key();
385	}
386
387	template<>
388	TBTable<DTZ>::TBTable(const TBTable<WDL>& wdl) : TBTable () {
389
390	// Use the corresponding WDL table to avoid recalculating all from scratch
391	key = wdl.key;
392	key2 = wdl.key2;
393	pieceCount = wdl.pieceCount;
394	hasPawns = wdl.hasPawns;
395	hasUniquePieces = wdl.hasUniquePieces;
396	pawnCount[`0`] = wdl.pawnCount[`0`];
397	pawnCount[`1`] = wdl.pawnCount[`1`];
398	}
399
400	// class TBTables creates and keeps ownership of the TBTable objects, one for
401	// each TB file found. It supports a fast, hash based, table lookup. Populated
402	// at init time, accessed at probe time.
403	class TBTables {
404
405	typedef std::tuple<Key, TBTable<WDL>, TBTable<DTZ>> Entry;
406
407	static constexpr int Size = `1` << `12`; // 4K table, indexed by key's 12 lsb
408	static constexpr int Overflow = `1`; // Number of elements allowed to map to the last bucket
409
410	Entry hashTable[Size + Overflow];
411
412	std::deque<TBTable<WDL>> wdlTable;
413	std::deque<TBTable<DTZ>> dtzTable;
414
415	void insert(Key key, TBTable<WDL>* wdl, TBTable<DTZ>* dtz) {
416	uint32_t homeBucket = (uint32_t)key & (Size - `1`);
417	Entry entry = std::make_tuple(key, wdl, dtz);
418
419	// Ensure last element is empty to avoid overflow when looking up
420	for (uint32_t bucket = homeBucket; bucket < Size + Overflow - `1`; ++bucket) {
421	Key otherKey = std::get<KEY>(hashTable[bucket]);
422	if (otherKey == key \|\| !std::get<WDL>(hashTable[bucket])) {
423	hashTable[bucket] = entry;
424	return;
425	}
426
427	// Robin Hood hashing: If we've probed for longer than this element,
428	// insert here and search for a new spot for the other element instead.
429	uint32_t otherHomeBucket = (uint32_t)otherKey & (Size - `1`);
430	if (otherHomeBucket > homeBucket) {
431	swap(entry, hashTable[bucket]);
432	key = otherKey;
433	homeBucket = otherHomeBucket;
434	}
435	}
436	std::cerr << "TB hash table size too low!" << std::endl;
437	exit(EXIT_FAILURE);
438	}
439
440	public:
441	template<TBType Type>
442	TBTable<Type>* get(Key key) {
443	for (const Entry* entry = &hashTable[(uint32_t)key & (Size - `1`)]; ; ++entry) {
444	if (std::get<KEY>(entry) == key \|\| !std::get<Type>(entry))
445	return std::get<Type>(*entry);
446	}
447	}
448
449	void clear() {
450	memset(hashTable, `0`, sizeof(hashTable));
451	wdlTable.clear();
452	dtzTable.clear();
453	}
454	size_t size() const { return wdlTable.size(); }
455	void add(const std::vector<PieceType>& pieces);
456	};
457
458	TBTables TBTables;
459
460	// If the corresponding file exists two new objects TBTable<WDL> and TBTable<DTZ>
461	// are created and added to the lists and hash table. Called at init time.
462	void TBTables::add(const std::vector<PieceType>& pieces) {
463
464	std::string code;
465
466	for (PieceType pt : pieces)
467	code += PieceToChar [pt];
468
469	TBFile file(code.insert(code.find(`'K'`, `1`), "v") + ".rtbw"); // KRK -> KRvK
470
471	if (!file.is_open()) // Only WDL file is checked
472	return;
473
474	file.close();
475
476	MaxCardinality = std::max((int)pieces.size(), MaxCardinality);
477
478	wdlTable.emplace_back(code);
479	dtzTable.emplace_back(wdlTable.back());
480
481	// Insert into the hash keys for both colors: KRvK with KR white and black
482	insert(wdlTable.back().key , &wdlTable.back(), &dtzTable.back());
483	insert(wdlTable.back().key2, &wdlTable.back(), &dtzTable.back());
484	}
485
486	// TB tables are compressed with canonical Huffman code. The compressed data is divided into
487	// blocks of size d->sizeofBlock, and each block stores a variable number of symbols.
488	// Each symbol represents either a WDL or a (remapped) DTZ value, or a pair of other symbols
489	// (recursively). If you keep expanding the symbols in a block, you end up with up to 65536
490	// WDL or DTZ values. Each symbol represents up to 256 values and will correspond after
491	// Huffman coding to at least 1 bit. So a block of 32 bytes corresponds to at most
492	// 32 x 8 x 256 = 65536 values. This maximum is only reached for tables that consist mostly
493	// of draws or mostly of wins, but such tables are actually quite common. In principle, the
494	// blocks in WDL tables are 64 bytes long (and will be aligned on cache lines). But for
495	// mostly-draw or mostly-win tables this can leave many 64-byte blocks only half-filled, so
496	// in such cases blocks are 32 bytes long. The blocks of DTZ tables are up to 1024 bytes long.
497	// The generator picks the size that leads to the smallest table. The "book" of symbols and
498	// Huffman codes is the same for all blocks in the table. A non-symmetric pawnless TB file
499	// will have one table for wtm and one for btm, a TB file with pawns will have tables per
500	// file a,b,c,d also in this case one set for wtm and one for btm.
501	int decompress_pairs(PairsData* d, uint64_t idx) {
502
503	// Special case where all table positions store the same value
504	if (d->flags & TBFlag::SingleValue)
505	return d->minSymLen;
506
507	// First we need to locate the right block that stores the value at index "idx".
508	// Because each block n stores blockLength[n] + 1 values, the index i of the block
509	// that contains the value at position idx is:
510	//
511	// for (i = -1, sum = 0; sum <= idx; i++)
512	// sum += blockLength[i + 1] + 1;
513	//
514	// This can be slow, so we use SparseIndex[] populated with a set of SparseEntry that
515	// point to known indices into blockLength[]. Namely SparseIndex[k] is a SparseEntry
516	// that stores the blockLength[] index and the offset within that block of the value
517	// with index I(k), where:
518	//
519	// I(k) = k d->span + d->span / 2 (1)*
520
521	// First step is to get the 'k' of the I(k) nearest to our idx, using definition (1)
522	uint32_t k = idx / d->span;
523
524	// Then we read the corresponding SparseIndex[] entry
525	uint32_t block = number<uint32_t, LittleEndian>(&d->sparseIndex[k].block);
526	int offset = number<uint16_t, LittleEndian>(&d->sparseIndex[k].offset);
527
528	// Now compute the difference idx - I(k). From definition of k we know that
529	//
530	// idx = k d->span + idx % d->span (2)*
531	//
532	// So from (1) and (2) we can compute idx - I(K):
533	int diff = idx % d->span - d->span / `2`;
534
535	// Sum the above to offset to find the offset corresponding to our idx
536	offset += diff;
537
538	// Move to previous/next block, until we reach the correct block that contains idx,
539	// that is when 0 <= offset <= d->blockLength[block]
540	while (offset < `0`)
541	offset += d->blockLength[--block] + `1`;
542
543	while (offset > d->blockLength[block])
544	offset -= d->blockLength[block++] + `1`;
545
546	// Finally, we find the start address of our block of canonical Huffman symbols
547	uint32_t* ptr = (uint32_t)(d->data + ((uint64_t)block d->sizeofBlock));
548
549	// Read the first 64 bits in our block, this is a (truncated) sequence of
550	// unknown number of symbols of unknown length but we know the first one
551	// is at the beginning of this 64 bits sequence.
552	uint64_t buf64 = number<uint64_t, BigEndian>(ptr); ptr += `2`;
553	int buf64Size = `64`;
554	Sym sym;
555
556	while (true) {
557	int len = `0`; // This is the symbol length - d->min_sym_len
558
559	// Now get the symbol length. For any symbol s64 of length l right-padded
560	// to 64 bits we know that d->base64[l-1] >= s64 >= d->base64[l] so we
561	// can find the symbol length iterating through base64[].
562	while (buf64 < d->base64 [len])
563	++len;
564
565	// All the symbols of a given length are consecutive integers (numerical
566	// sequence property), so we can compute the offset of our symbol of
567	// length len, stored at the beginning of buf64.
568	sym = (buf64 - d->base64 [len]) >> (`64` - len - d->minSymLen);
569
570	// Now add the value of the lowest symbol of length len to get our symbol
571	sym += number<Sym, LittleEndian>(&d->lowestSym[len]);
572
573	// If our offset is within the number of values represented by symbol sym
574	// we are done...
575	if (offset < d->symlen [sym] + `1`)
576	break;
577
578	// ...otherwise update the offset and continue to iterate
579	offset -= d->symlen [sym] + `1`;
580	len += d->minSymLen; // Get the real length
581	buf64 <<= len; // Consume the just processed symbol
582	buf64Size -= len;
583
584	if (buf64Size <= `32`) { // Refill the buffer
585	buf64Size += `32`;
586	buf64 \|= (uint64_t)number<uint32_t, BigEndian>(ptr++) << (`64` - buf64Size);
587	}
588	}
589
590	// Ok, now we have our symbol that expands into d->symlen[sym] + 1 symbols.
591	// We binary-search for our value recursively expanding into the left and
592	// right child symbols until we reach a leaf node where symlen[sym] + 1 == 1
593	// that will store the value we need.
594	while (d->symlen [sym]) {
595
596	Sym left = d->btree[sym].get<LR::Left>();
597
598	// If a symbol contains 36 sub-symbols (d->symlen[sym] + 1 = 36) and
599	// expands in a pair (d->symlen[left] = 23, d->symlen[right] = 11), then
600	// we know that, for instance the ten-th value (offset = 10) will be on
601	// the left side because in Recursive Pairing child symbols are adjacent.
602	if (offset < d->symlen [left] + `1`)
603	sym = left;
604	else {
605	offset -= d->symlen [left] + `1`;
606	sym = d->btree[sym].get<LR::Right>();
607	}
608	}
609
610	return d->btree[sym].get<LR::Left>();
611	}
612
613	bool check_dtz_stm(TBTable<WDL>, int, File) { return* true; }
614
615	bool check_dtz_stm(TBTable<DTZ>* entry, int stm, File f) {
616
617	auto flags = entry->get(stm, f)->flags;
618	return (flags & TBFlag::STM) == stm
619	\|\| ((entry->key == entry->key2) && !entry->hasPawns);
620	}
621
622	// DTZ scores are sorted by frequency of occurrence and then assigned the
623	// values 0, 1, 2, ... in order of decreasing frequency. This is done for each
624	// of the four WDLScore values. The mapping information necessary to reconstruct
625	// the original values is stored in the TB file and read during map[] init.
626	WDLScore map_score(TBTable<WDL>, File, int* value, WDLScore) { return WDLScore(value - `2`); }
627
628	int map_score(TBTable<DTZ>* entry, File f, int value, WDLScore wdl) {
629
630	constexpr int WDLMap[] = { `1`, `3`, `0`, `2`, `0` };
631
632	auto flags = entry->get(`0`, f)->flags;
633
634	uint8_t* map = entry->map;
635	uint16_t* idx = entry->get(`0`, f)->map_idx;
636	if (flags & TBFlag::Mapped) {
637	if (flags & TBFlag::Wide)
638	value = ((uint16_t *)map)[idx[WDLMap[wdl + `2`]] + value];
639	else
640	value = map[idx[WDLMap[wdl + `2`]] + value];
641	}
642
643	// DTZ tables store distance to zero in number of moves or plies. We
644	// want to return plies, so we have convert to plies when needed.
645	if ( (wdl == WDLWin && !(flags & TBFlag::WinPlies))
646	\|\| (wdl == WDLLoss && !(flags & TBFlag::LossPlies))
647	\|\| wdl == WDLCursedWin
648	\|\| wdl == WDLBlessedLoss)
649	value *= `2`;
650
651	return value + `1`;
652	}
653
654	// Compute a unique index out of a position and use it to probe the TB file. To
655	// encode k pieces of same type and color, first sort the pieces by square in
656	// ascending order s1 <= s2 <= ... <= sk then compute the unique index as:
657	//
658	// idx = Binomial[1][s1] + Binomial[2][s2] + ... + Binomial[k][sk]
659	//
660	template<typename T, typename Ret = typename T::Ret>
661	Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* result) {
662
663	Square squares[TBPIECES];
664	Piece pieces[TBPIECES];
665	uint64_t idx;
666	int next = `0`, size = `0`, leadPawnsCnt = `0`;
667	PairsData* d;
668	Bitboard b, leadPawns = `0`;
669	File tbFile = FILE_A;
670
671	// A given TB entry like KRK has associated two material keys: KRvk and Kvkr.
672	// If both sides have the same pieces keys are equal. In this case TB tables
673	// only store the 'white to move' case, so if the position to lookup has black
674	// to move, we need to switch the color and flip the squares before to lookup.
675	bool symmetricBlackToMove = (entry->key == entry->key2 && pos.side_to_move());
676
677	// TB files are calculated for white as stronger side. For instance we have
678	// KRvK, not KvKR. A position where stronger side is white will have its
679	// material key == entry->key, otherwise we have to switch the color and
680	// flip the squares before to lookup.
681	bool blackStronger = (pos.material_key() != entry->key);
682
683	int flipColor = (symmetricBlackToMove \|\| blackStronger) * `8`;
684	int flipSquares = (symmetricBlackToMove \|\| blackStronger) * `070`;
685	int stm = (symmetricBlackToMove \|\| blackStronger) ^ pos.side_to_move();
686
687	// For pawns, TB files store 4 separate tables according if leading pawn is on
688	// file a, b, c or d after reordering. The leading pawn is the one with maximum
689	// MapPawns[] value, that is the one most toward the edges and with lowest rank.
690	if (entry->hasPawns) {
691
692	// In all the 4 tables, pawns are at the beginning of the piece sequence and
693	// their color is the reference one. So we just pick the first one.
694	Piece pc = Piece(entry->get(`0`, `0`)->pieces[`0`] ^ flipColor);
695
696	assert(type_of(pc) == PAWN);
697
698	leadPawns = b = pos.pieces(color_of(pc), PAWN);
699	do
700	squares[size++] = pop_lsb(&b) ^ flipSquares;
701	while (b);
702
703	leadPawnsCnt = size;
704
705	std::swap(squares[`0`], *std::max_element(squares, squares + leadPawnsCnt, pawns_comp));
706
707	tbFile = file_of(squares[`0`]);
708	if (tbFile > FILE_D)
709	tbFile = file_of(squares[`0`] ^ `7`); // Horizontal flip: SQ_H1 -> SQ_A1
710	}
711
712	// DTZ tables are one-sided, i.e. they store positions only for white to
713	// move or only for black to move, so check for side to move to be stm,
714	// early exit otherwise.
715	if (!check_dtz_stm(entry, stm, tbFile))
716	return *result = CHANGE_STM, Ret();
717
718	// Now we are ready to get all the position pieces (but the lead pawns) and
719	// directly map them to the correct color and square.
720	b = pos.pieces() ^ leadPawns;
721	do {
722	Square s = pop_lsb(&b);
723	squares[size] = s ^ flipSquares;
724	pieces[size++] = Piece(pos.piece_on(s) ^ flipColor);
725	} while (b);
726
727	assert(size >= `2`);
728
729	d = entry->get(stm, tbFile);
730
731	// Then we reorder the pieces to have the same sequence as the one stored
732	// in pieces[i]: the sequence that ensures the best compression.
733	for (int i = leadPawnsCnt; i < size; ++i)
734	for (int j = i; j < size; ++j)
735	if (d->pieces[i] == pieces[j])
736	{
737	std::swap(pieces[i], pieces[j]);
738	std::swap(squares[i], squares[j]);
739	break;
740	}
741
742	// Now we map again the squares so that the square of the lead piece is in
743	// the triangle A1-D1-D4.
744	if (file_of(squares[`0`]) > FILE_D)
745	for (int i = `0`; i < size; ++i)
746	squares[i] ^= `7`; // Horizontal flip: SQ_H1 -> SQ_A1
747
748	// Encode leading pawns starting with the one with minimum MapPawns[] and
749	// proceeding in ascending order.
750	if (entry->hasPawns) {
751	idx = LeadPawnIdx[leadPawnsCnt][squares[`0`]];
752
753	std::sort(squares + `1`, squares + leadPawnsCnt, pawns_comp);
754
755	for (int i = `1`; i < leadPawnsCnt; ++i)
756	idx += Binomial[i][MapPawns[squares[i]]];
757
758	goto encode_remaining; // With pawns we have finished special treatments
759	}
760
761	// In positions withouth pawns, we further flip the squares to ensure leading
762	// piece is below RANK_5.
763	if (rank_of(squares[`0`]) > RANK_4)
764	for (int i = `0`; i < size; ++i)
765	squares[i] ^= `070`; // Vertical flip: SQ_A8 -> SQ_A1
766
767	// Look for the first piece of the leading group not on the A1-D4 diagonal
768	// and ensure it is mapped below the diagonal.
769	for (int i = `0`; i < d->groupLen[`0`]; ++i) {
770	if (!off_A1H8(squares[i]))
771	continue;
772
773	if (off_A1H8(squares[i]) > `0`) // A1-H8 diagonal flip: SQ_A3 -> SQ_C3
774	for (int j = i; j < size; ++j)
775	squares[j] = Square(((squares[j] >> `3`) \| (squares[j] << `3`)) & `63`);
776	break;
777	}
778
779	// Encode the leading group.
780	//
781	// Suppose we have KRvK. Let's say the pieces are on square numbers wK, wR
782	// and bK (each 0...63). The simplest way to map this position to an index
783	// is like this:
784	//
785	// index = wK 64 * 64 + wR * 64 + bK;*
786	//
787	// But this way the TB is going to have 646464 = 262144 positions, with
788	// lots of positions being equivalent (because they are mirrors of each
789	// other) and lots of positions being invalid (two pieces on one square,
790	// adjacent kings, etc.).
791	// Usually the first step is to take the wK and bK together. There are just
792	// 462 ways legal and not-mirrored ways to place the wK and bK on the board.
793	// Once we have placed the wK and bK, there are 62 squares left for the wR
794	// Mapping its square from 0..63 to available squares 0..61 can be done like:
795	//
796	// wR -= (wR > wK) + (wR > bK);
797	//
798	// In words: if wR "comes later" than wK, we deduct 1, and the same if wR
799	// "comes later" than bK. In case of two same pieces like KRRvK we want to
800	// place the two Rs "together". If we have 62 squares left, we can place two
801	// Rs "together" in 62 61 / 2 ways (we divide by 2 because rooks can be*
802	// swapped and still get the same position.)
803	//
804	// In case we have at least 3 unique pieces (inlcuded kings) we encode them
805	// together.
806	if (entry->hasUniquePieces) {
807
808	int adjust1 = squares[`1`] > squares[`0`];
809	int adjust2 = (squares[`2`] > squares[`0`]) + (squares[`2`] > squares[`1`]);
810
811	// First piece is below a1-h8 diagonal. MapA1D1D4[] maps the b1-d1-d3
812	// triangle to 0...5. There are 63 squares for second piece and and 62
813	// (mapped to 0...61) for the third.
814	if (off_A1H8(squares[`0`]))
815	idx = ( MapA1D1D4[squares[`0`]] * `63`
816	+ (squares[`1`] - adjust1)) * `62`
817	+ squares[`2`] - adjust2;
818
819	// First piece is on a1-h8 diagonal, second below: map this occurence to
820	// 6 to differentiate from the above case, rank_of() maps a1-d4 diagonal
821	// to 0...3 and finally MapB1H1H7[] maps the b1-h1-h7 triangle to 0..27.
822	else if (off_A1H8(squares[`1`]))
823	idx = ( `6` * `63` + rank_of(squares[`0`]) * `28`
824	+ MapB1H1H7[squares[`1`]]) * `62`
825	+ squares[`2`] - adjust2;
826
827	// First two pieces are on a1-h8 diagonal, third below
828	else if (off_A1H8(squares[`2`]))
829	idx = `6` * `63` * `62` + `4` * `28` * `62`
830	+ rank_of(squares[`0`]) * `7` * `28`
831	+ (rank_of(squares[`1`]) - adjust1) * `28`
832	+ MapB1H1H7[squares[`2`]];
833
834	// All 3 pieces on the diagonal a1-h8
835	else
836	idx = `6` * `63` * `62` + `4` * `28` * `62` + `4` * `7` * `28`
837	+ rank_of(squares[`0`]) * `7` * `6`
838	+ (rank_of(squares[`1`]) - adjust1) * `6`
839	+ (rank_of(squares[`2`]) - adjust2);
840	} else
841	// We don't have at least 3 unique pieces, like in KRRvKBB, just map
842	// the kings.
843	idx = MapKK[MapA1D1D4[squares[`0`]]][squares[`1`]];
844
845	encode_remaining:
846	idx *= d->groupIdx[`0`];
847	Square* groupSq = squares + d->groupLen[`0`];
848
849	// Encode remainig pawns then pieces according to square, in ascending order
850	bool remainingPawns = entry->hasPawns && entry->pawnCount[`1`];
851
852	while (d->groupLen[++next])
853	{
854	std::sort(groupSq, groupSq + d->groupLen[next]);
855	uint64_t n = `0`;
856
857	// Map down a square if "comes later" than a square in the previous
858	// groups (similar to what done earlier for leading group pieces).
859	for (int i = `0`; i < d->groupLen[next]; ++i)
860	{
861	auto f = [&](Square s) { return groupSq[i] > s; };
862	auto adjust = std::count_if(squares, groupSq, f);
863	n += Binomial[i + `1`][groupSq[i] - adjust - `8` * remainingPawns];
864	}
865
866	remainingPawns = false;
867	idx += n * d->groupIdx[next];
868	groupSq += d->groupLen[next];
869	}
870
871	// Now that we have the index, decompress the pair and get the score
872	return map_score(entry, tbFile, decompress_pairs(d, idx), wdl);
873	}
874
875	// Group together pieces that will be encoded together. The general rule is that
876	// a group contains pieces of same type and color. The exception is the leading
877	// group that, in case of positions withouth pawns, can be formed by 3 different
878	// pieces (default) or by the king pair when there is not a unique piece apart
879	// from the kings. When there are pawns, pawns are always first in pieces[].
880	//
881	// As example KRKN -> KRK + N, KNNK -> KK + NN, KPPKP -> P + PP + K + K
882	//
883	// The actual grouping depends on the TB generator and can be inferred from the
884	// sequence of pieces in piece[] array.
885	template<typename T>
886	void set_groups(T& e, PairsData* d, int order[], File f) {
887
888	int n = `0`, firstLen = e.hasPawns ? `0` : e.hasUniquePieces ? `3` : `2`;
889	d->groupLen[n] = `1`;
890
891	// Number of pieces per group is stored in groupLen[], for instance in KRKN
892	// the encoder will default on '111', so groupLen[] will be (3, 1).
893	for (int i = `1`; i < e.pieceCount; ++i)
894	if (--firstLen > `0` \|\| d->pieces[i] == d->pieces[i - `1`])
895	d->groupLen[n]++;
896	else
897	d->groupLen[++n] = `1`;
898
899	d->groupLen[++n] = `0`; // Zero-terminated
900
901	// The sequence in pieces[] defines the groups, but not the order in which
902	// they are encoded. If the pieces in a group g can be combined on the board
903	// in N(g) different ways, then the position encoding will be of the form:
904	//
905	// g1 N(g2) * N(g3) + g2 * N(g3) + g3*
906	//
907	// This ensures unique encoding for the whole position. The order of the
908	// groups is a per-table parameter and could not follow the canonical leading
909	// pawns/pieces -> remainig pawns -> remaining pieces. In particular the
910	// first group is at order[0] position and the remaining pawns, when present,
911	// are at order[1] position.
912	bool pp = e.hasPawns && e.pawnCount[`1`]; // Pawns on both sides
913	int next = pp ? `2` : `1`;
914	int freeSquares = `64` - d->groupLen[`0`] - (pp ? d->groupLen[`1`] : `0`);
915	uint64_t idx = `1`;
916
917	for (int k = `0`; next < n \|\| k == order[`0`] \|\| k == order[`1`]; ++k)
918	if (k == order[`0`]) // Leading pawns or pieces
919	{
920	d->groupIdx[`0`] = idx;
921	idx *= e.hasPawns ? LeadPawnsSize[d->groupLen[`0`]][f]
922	: e.hasUniquePieces ? `31332` : `462`;
923	}
924	else if (k == order[`1`]) // Remaining pawns
925	{
926	d->groupIdx[`1`] = idx;
927	idx *= Binomial[d->groupLen[`1`]][`48` - d->groupLen[`0`]];
928	}
929	else // Remainig pieces
930	{
931	d->groupIdx[next] = idx;
932	idx *= Binomial[d->groupLen[next]][freeSquares];
933	freeSquares -= d->groupLen[next++];
934	}
935
936	d->groupIdx[n] = idx;
937	}
938
939	// In Recursive Pairing each symbol represents a pair of childern symbols. So
940	// read d->btree[] symbols data and expand each one in his left and right child
941	// symbol until reaching the leafs that represent the symbol value.
942	uint8_t set_symlen(PairsData* d, Sym s, std::vector<bool>& visited) {
943
944	visited [s] = true; // We can set it now because tree is acyclic
945	Sym sr = d->btree[s].get<LR::Right>();
946
947	if (sr == `0xFFF`)
948	return `0`;
949
950	Sym sl = d->btree[s].get<LR::Left>();
951
952	if (!visited [sl])
953	d->symlen [sl] = set_symlen(d, sl, visited);
954
955	if (!visited [sr])
956	d->symlen [sr] = set_symlen(d, sr, visited);
957
958	return d->symlen [sl] + d->symlen [sr] + `1`;
959	}
960
961	uint8_t* set_sizes(PairsData* d, uint8_t* data) {
962
963	d->flags = *data++;
964
965	if (d->flags & TBFlag::SingleValue) {
966	d->blocksNum = d->blockLengthSize = `0`;
967	d->span = d->sparseIndexSize = `0`; // Broken MSVC zero-init
968	d->minSymLen = data++; // Here we store the single value*
969	return data;
970	}
971
972	// groupLen[] is a zero-terminated list of group lengths, the last groupIdx[]
973	// element stores the biggest index that is the tb size.
974	uint64_t tbSize = d->groupIdx[std::find(d->groupLen, d->groupLen + `7`, `0`) - d->groupLen];
975
976	d->sizeofBlock = `1ULL` << *data++;
977	d->span = `1ULL` << *data++;
978	d->sparseIndexSize = (tbSize + d->span - `1`) / d->span; // Round up
979	auto padding = number<uint8_t, LittleEndian>(data++);
980	d->blocksNum = number<uint32_t, LittleEndian>(data); data += sizeof(uint32_t);
981	d->blockLengthSize = d->blocksNum + padding; // Padded to ensure SparseIndex[]
982	// does not point out of range.
983	d->maxSymLen = *data++;
984	d->minSymLen = *data++;
985	d->lowestSym = (Sym*)data;
986	d->base64.resize(d->maxSymLen - d->minSymLen + `1`);
987
988	// The canonical code is ordered such that longer symbols (in terms of
989	// the number of bits of their Huffman code) have lower numeric value,
990	// so that d->lowestSym[i] >= d->lowestSym[i+1] (when read as LittleEndian).
991	// Starting from this we compute a base64[] table indexed by symbol length
992	// and containing 64 bit values so that d->base64[i] >= d->base64[i+1].
993	// See http://www.eecs.harvard.edu/~michaelm/E210/huffman.pdf
994	for (int i = d->base64.size() - `2`; i >= `0`; --i) {
995	d->base64 [i] = (d->base64 [i + `1`] + number<Sym, LittleEndian>(&d->lowestSym[i])
996	- number<Sym, LittleEndian>(&d->lowestSym[i + `1`])) / `2`;
997
998	assert(d->base64[i] * `2` >= d->base64[i+`1`]);
999	}
1000
1001	// Now left-shift by an amount so that d->base64[i] gets shifted 1 bit more
1002	// than d->base64[i+1] and given the above assert condition, we ensure that
1003	// d->base64[i] >= d->base64[i+1]. Moreover for any symbol s64 of length i
1004	// and right-padded to 64 bits holds d->base64[i-1] >= s64 >= d->base64[i].
1005	for (size_t i = `0`; i < d->base64.size(); ++i)
1006	d->base64 [i] <<= `64` - i - d->minSymLen; // Right-padding to 64 bits
1007
1008	data += d->base64.size() * sizeof(Sym);
1009	d->symlen.resize(number<uint16_t, LittleEndian>(data)); data += sizeof(uint16_t);
1010	d->btree = (LR*)data;
1011
1012	// The compression scheme used is "Recursive Pairing", that replaces the most
1013	// frequent adjacent pair of symbols in the source message by a new symbol,
1014	// reevaluating the frequencies of all of the symbol pairs with respect to
1015	// the extended alphabet, and then repeating the process.
1016	// See http://www.larsson.dogma.net/dcc99.pdf
1017	std::vector<bool> visited(d->symlen.size());
1018
1019	for (Sym sym = `0`; sym < d->symlen.size(); ++sym)
1020	if (!visited [sym])
1021	d->symlen [sym] = set_symlen(d, sym, visited);
1022
1023	return data + d->symlen.size() * sizeof(LR) + (d->symlen.size() & `1`);
1024	}
1025
1026	uint8_t* set_dtz_map(TBTable<WDL>&, uint8_t* data, File) { return data; }
1027
1028	uint8_t* set_dtz_map(TBTable<DTZ>& e, uint8_t* data, File maxFile) {
1029
1030	e.map = data;
1031
1032	for (File f = FILE_A; f <= maxFile; ++f) {
1033	auto flags = e.get(`0`, f)->flags;
1034	if (flags & TBFlag::Mapped) {
1035	if (flags & TBFlag::Wide) {
1036	data += (uintptr_t)data & `1`; // Word alignment, we may have a mixed table
1037	for (int i = `0`; i < `4`; ++i) { // Sequence like 3,x,x,x,1,x,0,2,x,x
1038	e.get(`0`, f)->map_idx[i] = (uint16_t)((uint16_t )data - (uint16_t )e.map + `1`);
1039	data += `2` * number<uint16_t, LittleEndian>(data) + `2`;
1040	}
1041	}
1042	else {
1043	for (int i = `0`; i < `4`; ++i) {
1044	e.get(`0`, f)->map_idx[i] = (uint16_t)(data - e.map + `1`);
1045	data += *data + `1`;
1046	}
1047	}
1048	}
1049	}
1050
1051	return data += (uintptr_t)data & `1`; // Word alignment
1052	}
1053
1054	// Populate entry's PairsData records with data from the just memory mapped file.
1055	// Called at first access.
1056	template<typename T>
1057	void set(T& e, uint8_t* data) {
1058
1059	PairsData* d;
1060
1061	enum { Split = `1`, HasPawns = `2` };
1062
1063	assert(e.hasPawns == !!(*data & HasPawns));
1064	assert((e.key != e.key2) == !!(*data & Split));
1065
1066	data++; // First byte stores flags
1067
1068	const int sides = T::Sides == `2` && (e.key != e.key2) ? `2` : `1`;
1069	const File maxFile = e.hasPawns ? FILE_D : FILE_A;
1070
1071	bool pp = e.hasPawns && e.pawnCount[`1`]; // Pawns on both sides
1072
1073	assert(!pp \|\| e.pawnCount[`0`]);
1074
1075	for (File f = FILE_A; f <= maxFile; ++f) {
1076
1077	for (int i = `0`; i < sides; i++)
1078	*e.get(i, f) = PairsData ();
1079
1080	int order[][`2`] = { { data & `0xF`, pp ? (data + `1`) & `0xF` : `0xF` },
1081	{ data >> `4`, pp ? (data + `1`) >> `4` : `0xF` } };
1082	data += `1` + pp;
1083
1084	for (int k = `0`; k < e.pieceCount; ++k, ++data)
1085	for (int i = `0`; i < sides; i++)
1086	e.get(i, f)->pieces[k] = Piece(i ? data >> `4` : data & `0xF`);
1087
1088	for (int i = `0`; i < sides; ++i)
1089	set_groups(e, e.get(i, f), order[i], f);
1090	}
1091
1092	data += (uintptr_t)data & `1`; // Word alignment
1093
1094	for (File f = FILE_A; f <= maxFile; ++f)
1095	for (int i = `0`; i < sides; i++)
1096	data = set_sizes(e.get(i, f), data);
1097
1098	data = set_dtz_map(e, data, maxFile);
1099
1100	for (File f = FILE_A; f <= maxFile; ++f)
1101	for (int i = `0`; i < sides; i++) {
1102	(d = e.get(i, f))->sparseIndex = (SparseEntry*)data;
1103	data += d->sparseIndexSize * sizeof(SparseEntry);
1104	}
1105
1106	for (File f = FILE_A; f <= maxFile; ++f)
1107	for (int i = `0`; i < sides; i++) {
1108	(d = e.get(i, f))->blockLength = (uint16_t*)data;
1109	data += d->blockLengthSize * sizeof(uint16_t);
1110	}
1111
1112	for (File f = FILE_A; f <= maxFile; ++f)
1113	for (int i = `0`; i < sides; i++) {
1114	data = (uint8_t)(((uintptr_t)data + `0x3F`) & ~`0x3F`); // 64 byte alignment*
1115	(d = e.get(i, f))->data = data;
1116	data += d->blocksNum * d->sizeofBlock;
1117	}
1118	}
1119
1120	// If the TB file corresponding to the given position is already memory mapped
1121	// then return its base address, otherwise try to memory map and init it. Called
1122	// at every probe, memory map and init only at first access. Function is thread
1123	// safe and can be called concurrently.
1124	template<TBType Type>
1125	void* mapped(TBTable<Type>& e, const Position& pos) {
1126
1127	static Mutex mutex;
1128
1129	// Use 'acquire' to avoid a thread reading 'ready' == true while
1130	// another is still working. (compiler reordering may cause this).
1131	if (e.ready.load(std::memory_order_acquire))
1132	return e.baseAddress; // Could be nullptr if file does not exist
1133
1134	std::unique_lock<Mutex> lk(mutex);
1135
1136	if (e.ready.load(std::memory_order_relaxed)) // Recheck under lock
1137	return e.baseAddress;
1138
1139	// Pieces strings in decreasing order for each color, like ("KPP","KR")
1140	std::string fname, w, b;
1141	for (PieceType pt = KING; pt >= PAWN; --pt) {
1142	w += std::string (popcount(pos.pieces(WHITE, pt)), PieceToChar [pt]);
1143	b += std::string (popcount(pos.pieces(BLACK, pt)), PieceToChar [pt]);
1144	}
1145
1146	fname = (e.key == pos.material_key() ? w + `'v'` + b : b + `'v'` + w)
1147	+ (Type == WDL ? ".rtbw" : ".rtbz");
1148
1149	uint8_t* data = TBFile (fname).map(&e.baseAddress, &e.mapping, Type);
1150
1151	if (data)
1152	set(e, data);
1153
1154	e.ready.store(true, std::memory_order_release);
1155	return e.baseAddress;
1156	}
1157
1158	template<TBType Type, typename Ret = typename TBTable<Type>::Ret>
1159	Ret probe_table(const Position& pos, ProbeState* result, WDLScore wdl = WDLDraw) {
1160
1161	if (pos.count<ALL_PIECES>() == `2`) // KvK
1162	return Ret(WDLDraw);
1163
1164	TBTable<Type>* entry = TBTables.get<Type>(pos.material_key());
1165
1166	if (!entry \|\| !mapped(*entry, pos))
1167	return *result = FAIL, Ret();
1168
1169	return do_probe_table(pos, entry, wdl, result);
1170	}
1171
1172	// For a position where the side to move has a winning capture it is not necessary
1173	// to store a winning value so the generator treats such positions as "don't cares"
1174	// and tries to assign to it a value that improves the compression ratio. Similarly,
1175	// if the side to move has a drawing capture, then the position is at least drawn.
1176	// If the position is won, then the TB needs to store a win value. But if the
1177	// position is drawn, the TB may store a loss value if that is better for compression.
1178	// All of this means that during probing, the engine must look at captures and probe
1179	// their results and must probe the position itself. The "best" result of these
1180	// probes is the correct result for the position.
1181	// DTZ tables do not store values when a following move is a zeroing winning move
1182	// (winning capture or winning pawn move). Also DTZ store wrong values for positions
1183	// where the best move is an ep-move (even if losing). So in all these cases set
1184	// the state to ZEROING_BEST_MOVE.
1185	template<bool CheckZeroingMoves>
1186	WDLScore search(Position& pos, ProbeState* result) {
1187
1188	WDLScore value, bestValue = WDLLoss;
1189	StateInfo st;
1190
1191	auto moveList = MoveList<LEGAL>(pos);
1192	size_t totalCount = moveList.size(), moveCount = `0`;
1193
1194	for (const Move& move : moveList)
1195	{
1196	if ( !pos.capture(move)
1197	&& (!CheckZeroingMoves \|\| type_of(pos.moved_piece(move)) != PAWN))
1198	continue;
1199
1200	moveCount++;
1201
1202	pos.do_move(move, st);
1203	value = -search<false>(pos, result);
1204	pos.undo_move(move);
1205
1206	if (*result == FAIL)
1207	return WDLDraw;
1208
1209	if (value > bestValue)
1210	{
1211	bestValue = value;
1212
1213	if (value >= WDLWin)
1214	{
1215	result = ZEROING_BEST_MOVE; // Winning DTZ-zeroing move*
1216	return value;
1217	}
1218	}
1219	}
1220
1221	// In case we have already searched all the legal moves we don't have to probe
1222	// the TB because the stored score could be wrong. For instance TB tables
1223	// do not contain information on position with ep rights, so in this case
1224	// the result of probe_wdl_table is wrong. Also in case of only capture
1225	// moves, for instance here 4K3/4q3/6p1/2k5/6p1/8/8/8 w - - 0 7, we have to
1226	// return with ZEROING_BEST_MOVE set.
1227	bool noMoreMoves = (moveCount && moveCount == totalCount);
1228
1229	if (noMoreMoves)
1230	value = bestValue;
1231	else
1232	{
1233	value = probe_table<WDL>(pos, result);
1234
1235	if (*result == FAIL)
1236	return WDLDraw;
1237	}
1238
1239	// DTZ stores a "don't care" value if bestValue is a win
1240	if (bestValue >= value)
1241	return *result = ( bestValue > WDLDraw
1242	\|\| noMoreMoves ? ZEROING_BEST_MOVE : OK), bestValue;
1243
1244	return *result = OK, value;
1245	}
1246
1247	} // namespace
1248
1249
1250	/// Tablebases::init() is called at startup and after every change to
1251	/// "SyzygyPath" UCI option to (re)create the various tables. It is not thread
1252	/// safe, nor it needs to be.
1253	void Tablebases::init(const std::string& paths) {
1254
1255	TBTables.clear();
1256	MaxCardinality = `0`;
1257	TBFile::Paths = paths;
1258
1259	if (paths.empty() \|\| paths == "<empty>")
1260	return;
1261
1262	// MapB1H1H7[] encodes a square below a1-h8 diagonal to 0..27
1263	int code = `0`;
1264	for (Square s = SQ_A1; s <= SQ_H8; ++s)
1265	if (off_A1H8(s) < `0`)
1266	MapB1H1H7[s] = code++;
1267
1268	// MapA1D1D4[] encodes a square in the a1-d1-d4 triangle to 0..9
1269	std::vector<Square> diagonal;
1270	code = `0`;
1271	for (Square s = SQ_A1; s <= SQ_D4; ++s)
1272	if (off_A1H8(s) < `0` && file_of(s) <= FILE_D)
1273	MapA1D1D4[s] = code++;
1274
1275	else if (!off_A1H8(s) && file_of(s) <= FILE_D)
1276	diagonal.push_back(s);
1277
1278	// Diagonal squares are encoded as last ones
1279	for (auto s : diagonal)
1280	MapA1D1D4[s] = code++;
1281
1282	// MapKK[] encodes all the 461 possible legal positions of two kings where
1283	// the first is in the a1-d1-d4 triangle. If the first king is on the a1-d4
1284	// diagonal, the other one shall not to be above the a1-h8 diagonal.
1285	std::vector<std::pair<int, Square>> bothOnDiagonal;
1286	code = `0`;
1287	for (int idx = `0`; idx < `10`; idx++)
1288	for (Square s1 = SQ_A1; s1 <= SQ_D4; ++s1)
1289	if (MapA1D1D4[s1] == idx && (idx \|\| s1 == SQ_B1)) // SQ_B1 is mapped to 0
1290	{
1291	for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2)
1292	if ((PseudoAttacks[KING][s1] \| s1) & s2)
1293	continue; // Illegal position
1294
1295	else if (!off_A1H8(s1) && off_A1H8(s2) > `0`)
1296	continue; // First on diagonal, second above
1297
1298	else if (!off_A1H8(s1) && !off_A1H8(s2))
1299	bothOnDiagonal.emplace_back(idx, s2);
1300
1301	else
1302	MapKK[idx][s2] = code++;
1303	}
1304
1305	// Legal positions with both kings on diagonal are encoded as last ones
1306	for (auto p : bothOnDiagonal)
1307	MapKK[p.first][p.second] = code++;
1308
1309	// Binomial[] stores the Binomial Coefficents using Pascal rule. There
1310	// are Binomial[k][n] ways to choose k elements from a set of n elements.
1311	Binomial[`0`][`0`] = `1`;
1312
1313	for (int n = `1`; n < `64`; n++) // Squares
1314	for (int k = `0`; k < `6` && k <= n; ++k) // Pieces
1315	Binomial[k][n] = (k > `0` ? Binomial[k - `1`][n - `1`] : `0`)
1316	+ (k < n ? Binomial[k ][n - `1`] : `0`);
1317
1318	// MapPawns[s] encodes squares a2-h7 to 0..47. This is the number of possible
1319	// available squares when the leading one is in 's'. Moreover the pawn with
1320	// highest MapPawns[] is the leading pawn, the one nearest the edge and,
1321	// among pawns with same file, the one with lowest rank.
1322	int availableSquares = `47`; // Available squares when lead pawn is in a2
1323
1324	// Init the tables for the encoding of leading pawns group: with 7-men TB we
1325	// can have up to 5 leading pawns (KPPPPPK).
1326	for (int leadPawnsCnt = `1`; leadPawnsCnt <= `5`; ++leadPawnsCnt)
1327	for (File f = FILE_A; f <= FILE_D; ++f)
1328	{
1329	// Restart the index at every file because TB table is splitted
1330	// by file, so we can reuse the same index for different files.
1331	int idx = `0`;
1332
1333	// Sum all possible combinations for a given file, starting with
1334	// the leading pawn on rank 2 and increasing the rank.
1335	for (Rank r = RANK_2; r <= RANK_7; ++r)
1336	{
1337	Square sq = make_square(f, r);
1338
1339	// Compute MapPawns[] at first pass.
1340	// If sq is the leading pawn square, any other pawn cannot be
1341	// below or more toward the edge of sq. There are 47 available
1342	// squares when sq = a2 and reduced by 2 for any rank increase
1343	// due to mirroring: sq == a3 -> no a2, h2, so MapPawns[a3] = 45
1344	if (leadPawnsCnt == `1`)
1345	{
1346	MapPawns[sq] = availableSquares--;
1347	MapPawns[sq ^ `7`] = availableSquares--; // Horizontal flip
1348	}
1349	LeadPawnIdx[leadPawnsCnt][sq] = idx;
1350	idx += Binomial[leadPawnsCnt - `1`][MapPawns[sq]];
1351	}
1352	// After a file is traversed, store the cumulated per-file index
1353	LeadPawnsSize[leadPawnsCnt][f] = idx;
1354	}
1355
1356	// Add entries in TB tables if the corresponding ".rtbw" file exsists
1357	for (PieceType p1 = PAWN; p1 < KING; ++p1) {
1358	TBTables.add({KING, p1, KING});
1359
1360	for (PieceType p2 = PAWN; p2 <= p1; ++p2) {
1361	TBTables.add({KING, p1, p2, KING});
1362	TBTables.add({KING, p1, KING, p2});
1363
1364	for (PieceType p3 = PAWN; p3 < KING; ++p3)
1365	TBTables.add({KING, p1, p2, KING, p3});
1366
1367	for (PieceType p3 = PAWN; p3 <= p2; ++p3) {
1368	TBTables.add({KING, p1, p2, p3, KING});
1369
1370	for (PieceType p4 = PAWN; p4 <= p3; ++p4) {
1371	TBTables.add({KING, p1, p2, p3, p4, KING});
1372
1373	for (PieceType p5 = PAWN; p5 <= p4; ++p5)
1374	TBTables.add({KING, p1, p2, p3, p4, p5, KING});
1375
1376	for (PieceType p5 = PAWN; p5 < KING; ++p5)
1377	TBTables.add({KING, p1, p2, p3, p4, KING, p5});
1378	}
1379
1380	for (PieceType p4 = PAWN; p4 < KING; ++p4) {
1381	TBTables.add({KING, p1, p2, p3, KING, p4});
1382
1383	for (PieceType p5 = PAWN; p5 <= p4; ++p5)
1384	TBTables.add({KING, p1, p2, p3, KING, p4, p5});
1385	}
1386	}
1387
1388	for (PieceType p3 = PAWN; p3 <= p1; ++p3)
1389	for (PieceType p4 = PAWN; p4 <= (p1 == p3 ? p2 : p3); ++p4)
1390	TBTables.add({KING, p1, p2, KING, p3, p4});
1391	}
1392	}
1393
1394	sync_cout << "info string Found " << TBTables.size() << " tablebases" << sync_endl;
1395	}
1396
1397	// Probe the WDL table for a particular position.
1398	// If result != FAIL, the probe was successful.*
1399	// The return value is from the point of view of the side to move:
1400	// -2 : loss
1401	// -1 : loss, but draw under 50-move rule
1402	// 0 : draw
1403	// 1 : win, but draw under 50-move rule
1404	// 2 : win
1405	WDLScore Tablebases::probe_wdl(Position& pos, ProbeState* result) {
1406
1407	*result = OK;
1408	return search<false>(pos, result);
1409	}
1410
1411	// Probe the DTZ table for a particular position.
1412	// If result != FAIL, the probe was successful.*
1413	// The return value is from the point of view of the side to move:
1414	// n < -100 : loss, but draw under 50-move rule
1415	// -100 <= n < -1 : loss in n ply (assuming 50-move counter == 0)
1416	// -1 : loss, the side to move is mated
1417	// 0 : draw
1418	// 1 < n <= 100 : win in n ply (assuming 50-move counter == 0)
1419	// 100 < n : win, but draw under 50-move rule
1420	//
1421	// The return value n can be off by 1: a return value -n can mean a loss
1422	// in n+1 ply and a return value +n can mean a win in n+1 ply. This
1423	// cannot happen for tables with positions exactly on the "edge" of
1424	// the 50-move rule.
1425	//
1426	// This implies that if dtz > 0 is returned, the position is certainly
1427	// a win if dtz + 50-move-counter <= 99. Care must be taken that the engine
1428	// picks moves that preserve dtz + 50-move-counter <= 99.
1429	//
1430	// If n = 100 immediately after a capture or pawn move, then the position
1431	// is also certainly a win, and during the whole phase until the next
1432	// capture or pawn move, the inequality to be preserved is
1433	// dtz + 50-movecounter <= 100.
1434	//
1435	// In short, if a move is available resulting in dtz + 50-move-counter <= 99,
1436	// then do not accept moves leading to dtz + 50-move-counter == 100.
1437	int Tablebases::probe_dtz(Position& pos, ProbeState* result) {
1438
1439	*result = OK;
1440	WDLScore wdl = search<true>(pos, result);
1441
1442	if (result == FAIL \|\| wdl == WDLDraw) // DTZ tables don't store draws*
1443	return `0`;
1444
1445	// DTZ stores a 'don't care' value in this case, or even a plain wrong
1446	// one as in case the best move is a losing ep, so it cannot be probed.
1447	if (*result == ZEROING_BEST_MOVE)
1448	return dtz_before_zeroing(wdl);
1449
1450	int dtz = probe_table<DTZ>(pos, result, wdl);
1451
1452	if (*result == FAIL)
1453	return `0`;
1454
1455	if (*result != CHANGE_STM)
1456	return (dtz + `100` * (wdl == WDLBlessedLoss \|\| wdl == WDLCursedWin)) * sign_of(wdl);
1457
1458	// DTZ stores results for the other side, so we need to do a 1-ply search and
1459	// find the winning move that minimizes DTZ.
1460	StateInfo st;
1461	int minDTZ = `0xFFFF`;
1462
1463	for (const Move& move : MoveList<LEGAL>(pos))
1464	{
1465	bool zeroing = pos.capture(move) \|\| type_of(pos.moved_piece(move)) == PAWN;
1466
1467	pos.do_move(move, st);
1468
1469	// For zeroing moves we want the dtz of the move _before_ doing it,
1470	// otherwise we will get the dtz of the next move sequence. Search the
1471	// position after the move to get the score sign (because even in a
1472	// winning position we could make a losing capture or going for a draw).
1473	dtz = zeroing ? -dtz_before_zeroing(search<false>(pos, result))
1474	: -probe_dtz(pos, result);
1475
1476	// If the move mates, force minDTZ to 1
1477	if (dtz == `1` && pos.checkers() && MoveList<LEGAL>(pos).size() == `0`)
1478	minDTZ = `1`;
1479
1480	// Convert result from 1-ply search. Zeroing moves are already accounted
1481	// by dtz_before_zeroing() that returns the DTZ of the previous move.
1482	if (!zeroing)
1483	dtz += sign_of(dtz);
1484
1485	// Skip the draws and if we are winning only pick positive dtz
1486	if (dtz < minDTZ && sign_of(dtz) == sign_of(wdl))
1487	minDTZ = dtz;
1488
1489	pos.undo_move(move);
1490
1491	if (*result == FAIL)
1492	return `0`;
1493	}
1494
1495	// When there are no legal moves, the position is mate: we return -1
1496	return minDTZ == `0xFFFF` ? -`1` : minDTZ;
1497	}
1498
1499
1500	// Use the DTZ tables to rank root moves.
1501	//
1502	// A return value false indicates that not all probes were successful.
1503	bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves) {
1504
1505	ProbeState result;
1506	StateInfo st;
1507
1508	// Obtain 50-move counter for the root position
1509	int cnt50 = pos.rule50_count();
1510
1511	// Check whether a position was repeated since the last zeroing move.
1512	bool rep = pos.has_repeated();
1513
1514	int dtz, bound = Options ["Syzygy50MoveRule"] ? `900` : `1`;
1515
1516	// Probe and rank each move
1517	for (auto& m : rootMoves)
1518	{
1519	pos.do_move(m.pv [`0`], st);
1520
1521	// Calculate dtz for the current move counting from the root position
1522	if (pos.rule50_count() == `0`)
1523	{
1524	// In case of a zeroing move, dtz is one of -101/-1/0/1/101
1525	WDLScore wdl = -probe_wdl(pos, &result);
1526	dtz = dtz_before_zeroing(wdl);
1527	}
1528	else
1529	{
1530	// Otherwise, take dtz for the new position and correct by 1 ply
1531	dtz = -probe_dtz(pos, &result);
1532	dtz = dtz > `0` ? dtz + `1`
1533	: dtz < `0` ? dtz - `1` : dtz;
1534	}
1535
1536	// Make sure that a mating move is assigned a dtz value of 1
1537	if ( pos.checkers()
1538	&& dtz == `2`
1539	&& MoveList<LEGAL>(pos).size() == `0`)
1540	dtz = `1`;
1541
1542	pos.undo_move(m.pv [`0`]);
1543
1544	if (result == FAIL)
1545	return false;
1546
1547	// Better moves are ranked higher. Certain wins are ranked equally.
1548	// Losing moves are ranked equally unless a 50-move draw is in sight.
1549	int r = dtz > `0` ? (dtz + cnt50 <= `99` && !rep ? `1000` : `1000` - (dtz + cnt50))
1550	: dtz < `0` ? (-dtz * `2` + cnt50 < `100` ? -`1000` : -`1000` + (-dtz + cnt50))
1551	: `0`;
1552	m.tbRank = r;
1553
1554	// Determine the score to be displayed for this move. Assign at least
1555	// 1 cp to cursed wins and let it grow to 49 cp as the positions gets
1556	// closer to a real win.
1557	m.tbScore = r >= bound ? VALUE_MATE - MAX_PLY - `1`
1558	: r > `0` ? Value((std::max( `3`, r - `800`) * int(PawnValueEg)) / `200`)
1559	: r == `0` ? VALUE_DRAW
1560	: r > -bound ? Value((std::min(-`3`, r + `800`) * int(PawnValueEg)) / `200`)
1561	: -VALUE_MATE + MAX_PLY + `1`;
1562	}
1563
1564	return true;
1565	}
1566
1567
1568	// Use the WDL tables to rank root moves.
1569	// This is a fallback for the case that some or all DTZ tables are missing.
1570	//
1571	// A return value false indicates that not all probes were successful.
1572	bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves) {
1573
1574	static const int WDL_to_rank[] = { -`1000`, -`899`, `0`, `899`, `1000` };
1575
1576	ProbeState result;
1577	StateInfo st;
1578
1579	bool rule50 = Options ["Syzygy50MoveRule"];
1580
1581	// Probe and rank each move
1582	for (auto& m : rootMoves)
1583	{
1584	pos.do_move(m.pv [`0`], st);
1585
1586	WDLScore wdl = -probe_wdl(pos, &result);
1587
1588	pos.undo_move(m.pv [`0`]);
1589
1590	if (result == FAIL)
1591	return false;
1592
1593	m.tbRank = WDL_to_rank[wdl + `2`];
1594
1595	if (!rule50)
1596	wdl = wdl > WDLDraw ? WDLWin
1597	: wdl < WDLDraw ? WDLLoss : WDLDraw;
1598	m.tbScore = WDL_to_value[wdl + `2`];
1599	}
1600
1601	return true;
1602	}
1603

Browse the source code of Stockfish/syzygy/tbprobe.cpp