zstdmt_compress.c source code [ClickHouse/contrib/zstd/lib/compress/zstdmt_compress.c]

1	/*
2	* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3	* All rights reserved.
4	*
5	* This source code is licensed under both the BSD-style license (found in the
6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
7	* in the COPYING file in the root directory of this source tree).
8	* You may select, at your option, one of the above-listed licenses.
9	*/
10
11
12	/ ====== Tuning parameters ====== /
13	#define ZSTDMT_NBWORKERS_MAX 200
14	#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (2 GB)) /* note : limited by `jobSize` type, which is `unsigned` */
15	#define ZSTDMT_OVERLAPLOG_DEFAULT 6
16
17
18	/ ====== Compiler specifics ====== /
19	#if defined(_MSC_VER)
20	# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
21	#endif
22
23
24	/ ====== Dependencies ====== /
25	#include <string.h> /* memcpy, memset */
26	#include <limits.h> /* INT_MAX */
27	#include "pool.h" /* threadpool */
28	#include "threading.h" /* mutex */
29	#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_, ZSTD_highbit32 /
30	#include "zstd_ldm.h"
31	#include "zstdmt_compress.h"
32
33	/ Guards code to support resizing the SeqPool.*
34	* We will want to resize the SeqPool to save memory in the future.
35	* Until then, comment the code out since it is unused.
36	*/
37	#define ZSTD_RESIZE_SEQPOOL 0
38
39	/ ====== Debug ====== /
40	#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
41
42	# include <stdio.h>
43	# include <unistd.h>
44	# include <sys/times.h>
45	# define DEBUGLOGRAW(l, ...) if (l<=ZSTD_DEBUG) { fprintf(stderr, __VA_ARGS__); }
46
47	# define DEBUG_PRINTHEX(l,p,n) { \
48	unsigned debug_u; \
49	for (debug_u=0; debug_u<(n); debug_u++) \
50	DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
51	DEBUGLOGRAW(l, " \n"); \
52	}
53
54	static unsigned long long GetCurrentClockTimeMicroseconds(void)
55	{
56	static clock_t _ticksPerSecond = `0`;
57	if (_ticksPerSecond <= `0`) _ticksPerSecond = sysconf(_SC_CLK_TCK);
58
59	{ struct tms junk; clock_t newTicks = (clock_t) times(&junk);
60	return ((((unsigned long long)newTicks)*(`1000000`))/_ticksPerSecond); }
61	}
62
63	#define MUTEX_WAIT_TIME_DLEVEL 6
64	#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
65	if (ZSTD_DEBUG >= MUTEX_WAIT_TIME_DLEVEL) { \
66	unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
67	ZSTD_pthread_mutex_lock(mutex); \
68	{ unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
69	unsigned long long const elapsedTime = (afterTime-beforeTime); \
70	if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
71	DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
72	elapsedTime, #mutex); \
73	} } \
74	} else { \
75	ZSTD_pthread_mutex_lock(mutex); \
76	} \
77	}
78
79	#else
80
81	# define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m)
82	# define DEBUG_PRINTHEX(l,p,n) {}
83
84	#endif
85
86
87	/ ===== Buffer Pool ===== /
88	/ a single Buffer Pool can be invoked from multiple threads in parallel /
89
90	typedef struct buffer_s {
91	void* start;
92	size_t capacity;
93	} buffer_t;
94
95	static const buffer_t g_nullBuffer = { NULL, `0` };
96
97	typedef struct ZSTDMT_bufferPool_s {
98	ZSTD_pthread_mutex_t poolMutex;
99	size_t bufferSize;
100	unsigned totalBuffers;
101	unsigned nbBuffers;
102	ZSTD_customMem cMem;
103	buffer_t bTable[`1`]; / variable size /
104	} ZSTDMT_bufferPool;
105
106	static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem)
107	{
108	unsigned const maxNbBuffers = `2`*nbWorkers + `3`;
109	ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
110	sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-`1`) * sizeof(buffer_t), cMem);
111	if (bufPool==NULL) return NULL;
112	if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
113	ZSTD_free(bufPool, cMem);
114	return NULL;
115	}
116	bufPool->bufferSize = `64` KB;
117	bufPool->totalBuffers = maxNbBuffers;
118	bufPool->nbBuffers = `0`;
119	bufPool->cMem = cMem;
120	return bufPool;
121	}
122
123	static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
124	{
125	unsigned u;
126	DEBUGLOG(`3`, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
127	if (!bufPool) return; / compatibility with free on NULL /
128	for (u=`0`; u<bufPool->totalBuffers; u++) {
129	DEBUGLOG(`4`, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start);
130	ZSTD_free(bufPool->bTable[u].start, bufPool->cMem);
131	}
132	ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
133	ZSTD_free(bufPool, bufPool->cMem);
134	}
135
136	/ only works at initialization, not during compression /
137	static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
138	{
139	size_t const poolSize = sizeof(*bufPool)
140	+ (bufPool->totalBuffers - `1`) * sizeof(buffer_t);
141	unsigned u;
142	size_t totalBufferSize = `0`;
143	ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
144	for (u=`0`; u<bufPool->totalBuffers; u++)
145	totalBufferSize += bufPool->bTable[u].capacity;
146	ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
147
148	return poolSize + totalBufferSize;
149	}
150
151	/ ZSTDMT_setBufferSize() :*
152	* all future buffers provided by this buffer pool will have _at least_ this size
153	* note : it's better for all buffers to have same size,
154	* as they become freely interchangeable, reducing malloc/free usages and memory fragmentation */
155	static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const bSize)
156	{
157	ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
158	DEBUGLOG(`4`, "ZSTDMT_setBufferSize: bSize = %u", (U32)bSize);
159	bufPool->bufferSize = bSize;
160	ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
161	}
162
163	/* ZSTDMT_getBuffer() :*
164	* assumption : bufPool must be valid
165	* @return : a buffer, with start pointer and size
166	* note: allocation may fail, in this case, start==NULL and size==0 */
167	static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
168	{
169	size_t const bSize = bufPool->bufferSize;
170	DEBUGLOG(`5`, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize);
171	ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
172	if (bufPool->nbBuffers) { / try to use an existing buffer /
173	buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
174	size_t const availBufferSize = buf.capacity;
175	bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer;
176	if ((availBufferSize >= bSize) & ((availBufferSize>>`3`) <= bSize)) {
177	/ large enough, but not too much /
178	DEBUGLOG(`5`, "ZSTDMT_getBuffer: provide buffer %u of size %u",
179	bufPool->nbBuffers, (U32)buf.capacity);
180	ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
181	return buf;
182	}
183	/ size conditions not respected : scratch this buffer, create new one /
184	DEBUGLOG(`5`, "ZSTDMT_getBuffer: existing buffer does not meet size conditions => freeing");
185	ZSTD_free(buf.start, bufPool->cMem);
186	}
187	ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
188	/ create new buffer /
189	DEBUGLOG(`5`, "ZSTDMT_getBuffer: create a new buffer");
190	{ buffer_t buffer;
191	void* const start = ZSTD_malloc(bSize, bufPool->cMem);
192	buffer.start = start; / note : start can be NULL if malloc fails ! /
193	buffer.capacity = (start==NULL) ? `0` : bSize;
194	if (start==NULL) {
195	DEBUGLOG(`5`, "ZSTDMT_getBuffer: buffer allocation failure !!");
196	} else {
197	DEBUGLOG(`5`, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize);
198	}
199	return buffer;
200	}
201	}
202
203	#if ZSTD_RESIZE_SEQPOOL
204	/* ZSTDMT_resizeBuffer() :*
205	* assumption : bufPool must be valid
206	* @return : a buffer that is at least the buffer pool buffer size.
207	* If a reallocation happens, the data in the input buffer is copied.
208	*/
209	static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer)
210	{
211	size_t const bSize = bufPool->bufferSize;
212	if (buffer.capacity < bSize) {
213	void* const start = ZSTD_malloc(bSize, bufPool->cMem);
214	buffer_t newBuffer;
215	newBuffer.start = start;
216	newBuffer.capacity = start == NULL ? `0` : bSize;
217	if (start != NULL) {
218	assert(newBuffer.capacity >= buffer.capacity);
219	memcpy(newBuffer.start, buffer.start, buffer.capacity);
220	DEBUGLOG(`5`, "ZSTDMT_resizeBuffer: created buffer of size %u", (U32)bSize);
221	return newBuffer;
222	}
223	DEBUGLOG(`5`, "ZSTDMT_resizeBuffer: buffer allocation failure !!");
224	}
225	return buffer;
226	}
227	#endif
228
229	/ store buffer for later re-use, up to pool capacity /
230	static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
231	{
232	if (buf.start == NULL) return; / compatible with release on NULL /
233	DEBUGLOG(`5`, "ZSTDMT_releaseBuffer");
234	ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
235	if (bufPool->nbBuffers < bufPool->totalBuffers) {
236	bufPool->bTable[bufPool->nbBuffers++] = buf; / stored for later use /
237	DEBUGLOG(`5`, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
238	(U32)buf.capacity, (U32)(bufPool->nbBuffers-`1`));
239	ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
240	return;
241	}
242	ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
243	/ Reached bufferPool capacity (should not happen) /
244	DEBUGLOG(`5`, "ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
245	ZSTD_free(buf.start, bufPool->cMem);
246	}
247
248
249	/ ===== Seq Pool Wrapper ====== /
250
251	static rawSeqStore_t kNullRawSeqStore = {NULL, `0`, `0`, `0`};
252
253	typedef ZSTDMT_bufferPool ZSTDMT_seqPool;
254
255	static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool)
256	{
257	return ZSTDMT_sizeof_bufferPool(seqPool);
258	}
259
260	static rawSeqStore_t bufferToSeq(buffer_t buffer)
261	{
262	rawSeqStore_t seq = {NULL, `0`, `0`, `0`};
263	seq.seq = (rawSeq*)buffer.start;
264	seq.capacity = buffer.capacity / sizeof(rawSeq);
265	return seq;
266	}
267
268	static buffer_t seqToBuffer(rawSeqStore_t seq)
269	{
270	buffer_t buffer;
271	buffer.start = seq.seq;
272	buffer.capacity = seq.capacity * sizeof(rawSeq);
273	return buffer;
274	}
275
276	static rawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool)
277	{
278	if (seqPool->bufferSize == `0`) {
279	return kNullRawSeqStore;
280	}
281	return bufferToSeq(ZSTDMT_getBuffer(seqPool));
282	}
283
284	#if ZSTD_RESIZE_SEQPOOL
285	static rawSeqStore_t ZSTDMT_resizeSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq)
286	{
287	return bufferToSeq(ZSTDMT_resizeBuffer(seqPool, seqToBuffer(seq)));
288	}
289	#endif
290
291	static void ZSTDMT_releaseSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq)
292	{
293	ZSTDMT_releaseBuffer(seqPool, seqToBuffer(seq));
294	}
295
296	static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq)
297	{
298	ZSTDMT_setBufferSize(seqPool, nbSeq * sizeof(rawSeq));
299	}
300
301	static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
302	{
303	ZSTDMT_seqPool* seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
304	ZSTDMT_setNbSeq(seqPool, `0`);
305	return seqPool;
306	}
307
308	static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
309	{
310	ZSTDMT_freeBufferPool(seqPool);
311	}
312
313
314
315	/ ===== CCtx Pool ===== /
316	/ a single CCtx Pool can be invoked from multiple threads in parallel /
317
318	typedef struct {
319	ZSTD_pthread_mutex_t poolMutex;
320	unsigned totalCCtx;
321	unsigned availCCtx;
322	ZSTD_customMem cMem;
323	ZSTD_CCtx* cctx[`1`]; / variable size /
324	} ZSTDMT_CCtxPool;
325
326	/ note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool /
327	static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
328	{
329	unsigned u;
330	for (u=`0`; u<pool->totalCCtx; u++)
331	ZSTD_freeCCtx(pool->cctx[u]); / note : compatible with free on NULL /
332	ZSTD_pthread_mutex_destroy(&pool->poolMutex);
333	ZSTD_free(pool, pool->cMem);
334	}
335
336	/ ZSTDMT_createCCtxPool() :*
337	* implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */
338	static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
339	ZSTD_customMem cMem)
340	{
341	ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
342	sizeof(ZSTDMT_CCtxPool) + (nbWorkers-`1`)*sizeof(ZSTD_CCtx*), cMem);
343	assert(nbWorkers > `0`);
344	if (!cctxPool) return NULL;
345	if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
346	ZSTD_free(cctxPool, cMem);
347	return NULL;
348	}
349	cctxPool->cMem = cMem;
350	cctxPool->totalCCtx = nbWorkers;
351	cctxPool->availCCtx = `1`; / at least one cctx for single-thread mode /
352	cctxPool->cctx[`0`] = ZSTD_createCCtx_advanced(cMem);
353	if (!cctxPool->cctx[`0`]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
354	DEBUGLOG(`3`, "cctxPool created, with %u workers", nbWorkers);
355	return cctxPool;
356	}
357
358	/ only works during initialization phase, not during compression /
359	static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
360	{
361	ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
362	{ unsigned const nbWorkers = cctxPool->totalCCtx;
363	size_t const poolSize = sizeof(*cctxPool)
364	+ (nbWorkers-`1`) * sizeof(ZSTD_CCtx*);
365	unsigned u;
366	size_t totalCCtxSize = `0`;
367	for (u=`0`; u<nbWorkers; u++) {
368	totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
369	}
370	ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
371	assert(nbWorkers > `0`);
372	return poolSize + totalCCtxSize;
373	}
374	}
375
376	static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool)
377	{
378	DEBUGLOG(`5`, "ZSTDMT_getCCtx");
379	ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
380	if (cctxPool->availCCtx) {
381	cctxPool->availCCtx--;
382	{ ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx];
383	ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
384	return cctx;
385	} }
386	ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
387	DEBUGLOG(`5`, "create one more CCtx");
388	return ZSTD_createCCtx_advanced(cctxPool->cMem); / note : can be NULL, when creation fails ! /
389	}
390
391	static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
392	{
393	if (cctx==NULL) return; / compatibility with release on NULL /
394	ZSTD_pthread_mutex_lock(&pool->poolMutex);
395	if (pool->availCCtx < pool->totalCCtx)
396	pool->cctx[pool->availCCtx++] = cctx;
397	else {
398	/ pool overflow : should not happen, since totalCCtx==nbWorkers /
399	DEBUGLOG(`4`, "CCtx pool overflow : free cctx");
400	ZSTD_freeCCtx(cctx);
401	}
402	ZSTD_pthread_mutex_unlock(&pool->poolMutex);
403	}
404
405	/ ==== Serial State ==== /
406
407	typedef struct {
408	void const* start;
409	size_t size;
410	} range_t;
411
412	typedef struct {
413	/ All variables in the struct are protected by mutex. /
414	ZSTD_pthread_mutex_t mutex;
415	ZSTD_pthread_cond_t cond;
416	ZSTD_CCtx_params params;
417	ldmState_t ldmState;
418	XXH64_state_t xxhState;
419	unsigned nextJobID;
420	/ Protects ldmWindow.*
421	* Must be acquired after the main mutex when acquiring both.
422	*/
423	ZSTD_pthread_mutex_t ldmWindowMutex;
424	ZSTD_pthread_cond_t ldmWindowCond; / Signaled when ldmWindow is udpated /
425	ZSTD_window_t ldmWindow; / A thread-safe copy of ldmState.window /
426	} serialState_t;
427
428	static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params)
429	{
430	/ Adjust parameters /
431	if (params.ldmParams.enableLdm) {
432	DEBUGLOG(`4`, "LDM window size = %u KB", (`1U` << params.cParams.windowLog) >> `10`);
433	params.ldmParams.windowLog = params.cParams.windowLog;
434	ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
435	assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
436	assert(params.ldmParams.hashEveryLog < `32`);
437	serialState->ldmState.hashPower =
438	ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength);
439	} else {
440	memset(&params.ldmParams, `0`, sizeof(params.ldmParams));
441	}
442	serialState->nextJobID = `0`;
443	if (params.fParams.checksumFlag)
444	XXH64_reset(&serialState->xxhState, `0`);
445	if (params.ldmParams.enableLdm) {
446	ZSTD_customMem cMem = params.customMem;
447	unsigned const hashLog = params.ldmParams.hashLog;
448	size_t const hashSize = ((size_t)`1` << hashLog) * sizeof(ldmEntry_t);
449	unsigned const bucketLog =
450	params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
451	size_t const bucketSize = (size_t)`1` << bucketLog;
452	unsigned const prevBucketLog =
453	serialState->params.ldmParams.hashLog -
454	serialState->params.ldmParams.bucketSizeLog;
455	/ Size the seq pool tables /
456	ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, params.jobSize));
457	/ Reset the window /
458	ZSTD_window_clear(&serialState->ldmState.window);
459	serialState->ldmWindow = serialState->ldmState.window;
460	/ Resize tables and output space if necessary. /
461	if (serialState->ldmState.hashTable == NULL \|\| serialState->params.ldmParams.hashLog < hashLog) {
462	ZSTD_free(serialState->ldmState.hashTable, cMem);
463	serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_malloc(hashSize, cMem);
464	}
465	if (serialState->ldmState.bucketOffsets == NULL \|\| prevBucketLog < bucketLog) {
466	ZSTD_free(serialState->ldmState.bucketOffsets, cMem);
467	serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_malloc(bucketSize, cMem);
468	}
469	if (!serialState->ldmState.hashTable \|\| !serialState->ldmState.bucketOffsets)
470	return `1`;
471	/ Zero the tables /
472	memset(serialState->ldmState.hashTable, `0`, hashSize);
473	memset(serialState->ldmState.bucketOffsets, `0`, bucketSize);
474	}
475	serialState->params = params;
476	return `0`;
477	}
478
479	static int ZSTDMT_serialState_init(serialState_t* serialState)
480	{
481	int initError = `0`;
482	memset(serialState, `0`, sizeof(*serialState));
483	initError \|= ZSTD_pthread_mutex_init(&serialState->mutex, NULL);
484	initError \|= ZSTD_pthread_cond_init(&serialState->cond, NULL);
485	initError \|= ZSTD_pthread_mutex_init(&serialState->ldmWindowMutex, NULL);
486	initError \|= ZSTD_pthread_cond_init(&serialState->ldmWindowCond, NULL);
487	return initError;
488	}
489
490	static void ZSTDMT_serialState_free(serialState_t* serialState)
491	{
492	ZSTD_customMem cMem = serialState->params.customMem;
493	ZSTD_pthread_mutex_destroy(&serialState->mutex);
494	ZSTD_pthread_cond_destroy(&serialState->cond);
495	ZSTD_pthread_mutex_destroy(&serialState->ldmWindowMutex);
496	ZSTD_pthread_cond_destroy(&serialState->ldmWindowCond);
497	ZSTD_free(serialState->ldmState.hashTable, cMem);
498	ZSTD_free(serialState->ldmState.bucketOffsets, cMem);
499	}
500
501	static void ZSTDMT_serialState_update(serialState_t* serialState,
502	ZSTD_CCtx* jobCCtx, rawSeqStore_t seqStore,
503	range_t src, unsigned jobID)
504	{
505	/ Wait for our turn /
506	ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
507	while (serialState->nextJobID < jobID) {
508	ZSTD_pthread_cond_wait(&serialState->cond, &serialState->mutex);
509	}
510	/ A future job may error and skip our job /
511	if (serialState->nextJobID == jobID) {
512	/ It is now our turn, do any processing necessary /
513	if (serialState->params.ldmParams.enableLdm) {
514	size_t error;
515	assert(seqStore.seq != NULL && seqStore.pos == `0` &&
516	seqStore.size == `0` && seqStore.capacity > `0`);
517	ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
518	error = ZSTD_ldm_generateSequences(
519	&serialState->ldmState, &seqStore,
520	&serialState->params.ldmParams, src.start, src.size);
521	/ We provide a large enough buffer to never fail. /
522	assert(!ZSTD_isError(error)); (void)error;
523	/ Update ldmWindow to match the ldmState.window and signal the main*
524	* thread if it is waiting for a buffer.
525	*/
526	ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex);
527	serialState->ldmWindow = serialState->ldmState.window;
528	ZSTD_pthread_cond_signal(&serialState->ldmWindowCond);
529	ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex);
530	}
531	if (serialState->params.fParams.checksumFlag && src.size > `0`)
532	XXH64_update(&serialState->xxhState, src.start, src.size);
533	}
534	/ Now it is the next jobs turn /
535	serialState->nextJobID++;
536	ZSTD_pthread_cond_broadcast(&serialState->cond);
537	ZSTD_pthread_mutex_unlock(&serialState->mutex);
538
539	if (seqStore.size > `0`) {
540	size_t const err = ZSTD_referenceExternalSequences(
541	jobCCtx, seqStore.seq, seqStore.size);
542	assert(serialState->params.ldmParams.enableLdm);
543	assert(!ZSTD_isError(err));
544	(void)err;
545	}
546	}
547
548	static void ZSTDMT_serialState_ensureFinished(serialState_t* serialState,
549	unsigned jobID, size_t cSize)
550	{
551	ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
552	if (serialState->nextJobID <= jobID) {
553	assert(ZSTD_isError(cSize)); (void)cSize;
554	DEBUGLOG(`5`, "Skipping past job %u because of error", jobID);
555	serialState->nextJobID = jobID + `1`;
556	ZSTD_pthread_cond_broadcast(&serialState->cond);
557
558	ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex);
559	ZSTD_window_clear(&serialState->ldmWindow);
560	ZSTD_pthread_cond_signal(&serialState->ldmWindowCond);
561	ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex);
562	}
563	ZSTD_pthread_mutex_unlock(&serialState->mutex);
564
565	}
566
567
568	/ ------------------------------------------ /
569	/ ===== Worker thread ===== /
570	/ ------------------------------------------ /
571
572	static const range_t kNullRange = { NULL, `0` };
573
574	typedef struct {
575	size_t consumed; / SHARED - set0 by mtctx, then modified by worker AND read by mtctx /
576	size_t cSize; / SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx /
577	ZSTD_pthread_mutex_t job_mutex; / Thread-safe - used by mtctx and worker /
578	ZSTD_pthread_cond_t job_cond; / Thread-safe - used by mtctx and worker /
579	ZSTDMT_CCtxPool* cctxPool; / Thread-safe - used by mtctx and (all) workers /
580	ZSTDMT_bufferPool* bufPool; / Thread-safe - used by mtctx and (all) workers /
581	ZSTDMT_seqPool* seqPool; / Thread-safe - used by mtctx and (all) workers /
582	serialState_t* serial; / Thread-safe - used by mtctx and (all) workers /
583	buffer_t dstBuff; / set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier /
584	range_t prefix; / set by mtctx, then read by worker & mtctx => no barrier /
585	range_t src; / set by mtctx, then read by worker & mtctx => no barrier /
586	unsigned jobID; / set by mtctx, then read by worker => no barrier /
587	unsigned firstJob; / set by mtctx, then read by worker => no barrier /
588	unsigned lastJob; / set by mtctx, then read by worker => no barrier /
589	ZSTD_CCtx_params params; / set by mtctx, then read by worker => no barrier /
590	const ZSTD_CDict* cdict; / set by mtctx, then read by worker => no barrier /
591	unsigned long long fullFrameSize; / set by mtctx, then read by worker => no barrier /
592	size_t dstFlushed; / used only by mtctx /
593	unsigned frameChecksumNeeded; / used only by mtctx /
594	} ZSTDMT_jobDescription;
595
596	/ ZSTDMT_compressionJob() is a POOL_function type /
597	void ZSTDMT_compressionJob(void* jobDescription)
598	{
599	ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
600	ZSTD_CCtx_params jobParams = job->params; / do not modify job->params ! copy it, modify the copy /
601	ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool);
602	rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool);
603	buffer_t dstBuff = job->dstBuff;
604
605	/ Don't compute the checksum for chunks, since we compute it externally,*
606	* but write it in the header.
607	*/
608	if (job->jobID != `0`) jobParams.fParams.checksumFlag = `0`;
609	/ Don't run LDM for the chunks, since we handle it externally /
610	jobParams.ldmParams.enableLdm = `0`;
611
612	/ ressources /
613	if (cctx==NULL) {
614	job->cSize = ERROR(memory_allocation);
615	goto _endJob;
616	}
617	if (dstBuff.start == NULL) { / streaming job : doesn't provide a dstBuffer /
618	dstBuff = ZSTDMT_getBuffer(job->bufPool);
619	if (dstBuff.start==NULL) {
620	job->cSize = ERROR(memory_allocation);
621	goto _endJob;
622	}
623	job->dstBuff = dstBuff; / this value can be read in ZSTDMT_flush, when it copies the whole job /
624	}
625
626	/ init /
627	if (job->cdict) {
628	size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, `0`, ZSTD_dct_auto, job->cdict, jobParams, job->fullFrameSize);
629	assert(job->firstJob); / only allowed for first job /
630	if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
631	} else { / srcStart points at reloaded section /
632	U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
633	{ size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstJob);
634	if (ZSTD_isError(forceWindowError)) {
635	job->cSize = forceWindowError;
636	goto _endJob;
637	} }
638	{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
639	job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, / load dictionary in "content-only" mode (no header analysis) /
640	NULL, /cdict/
641	jobParams, pledgedSrcSize);
642	if (ZSTD_isError(initError)) {
643	job->cSize = initError;
644	goto _endJob;
645	} } }
646
647	/ Perform serial step as early as possible, but after CCtx initialization /
648	ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
649
650	if (!job->firstJob) { / flush and overwrite frame header when it's not first job /
651	size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, `0`);
652	if (ZSTD_isError(hSize)) { job->cSize = hSize; / save error code / goto _endJob; }
653	DEBUGLOG(`5`, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
654	ZSTD_invalidateRepCodes(cctx);
655	}
656
657	/ compress /
658	{ size_t const chunkSize = `4`*ZSTD_BLOCKSIZE_MAX;
659	int const nbChunks = (int)((job->src.size + (chunkSize-`1`)) / chunkSize);
660	const BYTE* ip = (const BYTE*) job->src.start;
661	BYTE* const ostart = (BYTE*)dstBuff.start;
662	BYTE* op = ostart;
663	BYTE* oend = op + dstBuff.capacity;
664	int chunkNb;
665	if (sizeof(size_t) > sizeof(int)) assert(job->src.size < ((size_t)INT_MAX) * chunkSize); / check overflow /
666	DEBUGLOG(`5`, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
667	assert(job->cSize == `0`);
668	for (chunkNb = `1`; chunkNb < nbChunks; chunkNb++) {
669	size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
670	if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
671	ip += chunkSize;
672	op += cSize; assert(op < oend);
673	/ stats /
674	ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
675	job->cSize += cSize;
676	job->consumed = chunkSize * chunkNb;
677	DEBUGLOG(`5`, "ZSTDMT_compressionJob: compress new block : cSize==%u bytes (total: %u)",
678	(U32)cSize, (U32)job->cSize);
679	ZSTD_pthread_cond_signal(&job->job_cond); / warns some more data is ready to be flushed /
680	ZSTD_pthread_mutex_unlock(&job->job_mutex);
681	}
682	/ last block /
683	assert(chunkSize > `0`); assert((chunkSize & (chunkSize - `1`)) == `0`); / chunkSize must be power of 2 for mask==(chunkSize-1) to work /
684	if ((nbChunks > `0`) \| job->lastJob /must output a "last block" flag/ ) {
685	size_t const lastBlockSize1 = job->src.size & (chunkSize-`1`);
686	size_t const lastBlockSize = ((lastBlockSize1==`0`) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
687	size_t const cSize = (job->lastJob) ?
688	ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
689	ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
690	if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
691	/ stats /
692	ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
693	job->cSize += cSize;
694	ZSTD_pthread_mutex_unlock(&job->job_mutex);
695	} }
696
697	_endJob:
698	ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);
699	if (job->prefix.size > `0`)
700	DEBUGLOG(`5`, "Finished with prefix: %zx", (size_t)job->prefix.start);
701	DEBUGLOG(`5`, "Finished with source: %zx", (size_t)job->src.start);
702	/ release resources /
703	ZSTDMT_releaseSeq(job->seqPool, rawSeqStore);
704	ZSTDMT_releaseCCtx(job->cctxPool, cctx);
705	/ report /
706	ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
707	job->consumed = job->src.size;
708	ZSTD_pthread_cond_signal(&job->job_cond);
709	ZSTD_pthread_mutex_unlock(&job->job_mutex);
710	}
711
712
713	/ ------------------------------------------ /
714	/ ===== Multi-threaded compression ===== /
715	/ ------------------------------------------ /
716
717	typedef struct {
718	range_t prefix; / read-only non-owned prefix buffer /
719	buffer_t buffer;
720	size_t filled;
721	} inBuff_t;
722
723	typedef struct {
724	BYTE* buffer; / The round input buffer. All jobs get references*
725	* to pieces of the buffer. ZSTDMT_tryGetInputRange()
726	* handles handing out job input buffers, and makes
727	* sure it doesn't overlap with any pieces still in use.
728	*/
729	size_t capacity; / The capacity of buffer. /
730	size_t pos; / The position of the current inBuff in the round*
731	* buffer. Updated past the end if the inBuff once
732	* the inBuff is sent to the worker thread.
733	* pos <= capacity.
734	*/
735	} roundBuff_t;
736
737	static const roundBuff_t kNullRoundBuff = {NULL, `0`, `0`};
738
739	struct ZSTDMT_CCtx_s {
740	POOL_ctx* factory;
741	ZSTDMT_jobDescription* jobs;
742	ZSTDMT_bufferPool* bufPool;
743	ZSTDMT_CCtxPool* cctxPool;
744	ZSTDMT_seqPool* seqPool;
745	ZSTD_CCtx_params params;
746	size_t targetSectionSize;
747	size_t targetPrefixSize;
748	roundBuff_t roundBuff;
749	inBuff_t inBuff;
750	int jobReady; / 1 => one job is already prepared, but pool has shortage of workers. Don't create another one. /
751	serialState_t serial;
752	unsigned singleBlockingThread;
753	unsigned jobIDMask;
754	unsigned doneJobID;
755	unsigned nextJobID;
756	unsigned frameEnded;
757	unsigned allJobsCompleted;
758	unsigned long long frameContentSize;
759	unsigned long long consumed;
760	unsigned long long produced;
761	ZSTD_customMem cMem;
762	ZSTD_CDict* cdictLocal;
763	const ZSTD_CDict* cdict;
764	};
765
766	static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem)
767	{
768	U32 jobNb;
769	if (jobTable == NULL) return;
770	for (jobNb=`0`; jobNb<nbJobs; jobNb++) {
771	ZSTD_pthread_mutex_destroy(&jobTable[jobNb].job_mutex);
772	ZSTD_pthread_cond_destroy(&jobTable[jobNb].job_cond);
773	}
774	ZSTD_free(jobTable, cMem);
775	}
776
777	/ ZSTDMT_allocJobsTable()*
778	* allocate and init a job table.
779	* update nbJobsPtr to next power of 2 value, as size of table /
780	static ZSTDMT_jobDescription* ZSTDMT_createJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
781	{
782	U32 const nbJobsLog2 = ZSTD_highbit32(*nbJobsPtr) + `1`;
783	U32 const nbJobs = `1` << nbJobsLog2;
784	U32 jobNb;
785	ZSTDMT_jobDescription* const jobTable = (ZSTDMT_jobDescription*)
786	ZSTD_calloc(nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
787	int initError = `0`;
788	if (jobTable==NULL) return NULL;
789	*nbJobsPtr = nbJobs;
790	for (jobNb=`0`; jobNb<nbJobs; jobNb++) {
791	initError \|= ZSTD_pthread_mutex_init(&jobTable[jobNb].job_mutex, NULL);
792	initError \|= ZSTD_pthread_cond_init(&jobTable[jobNb].job_cond, NULL);
793	}
794	if (initError != `0`) {
795	ZSTDMT_freeJobsTable(jobTable, nbJobs, cMem);
796	return NULL;
797	}
798	return jobTable;
799	}
800
801	/ ZSTDMT_CCtxParam_setNbWorkers():*
802	* Internal use only */
803	size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
804	{
805	if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX;
806	params->nbWorkers = nbWorkers;
807	params->overlapSizeLog = ZSTDMT_OVERLAPLOG_DEFAULT;
808	params->jobSize = `0`;
809	return nbWorkers;
810	}
811
812	ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem)
813	{
814	ZSTDMT_CCtx* mtctx;
815	U32 nbJobs = nbWorkers + `2`;
816	int initError;
817	DEBUGLOG(`3`, "ZSTDMT_createCCtx_advanced (nbWorkers = %u)", nbWorkers);
818
819	if (nbWorkers < `1`) return NULL;
820	nbWorkers = MIN(nbWorkers , ZSTDMT_NBWORKERS_MAX);
821	if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
822	/ invalid custom allocator /
823	return NULL;
824
825	mtctx = (ZSTDMT_CCtx) ZSTD_calloc(sizeof*(ZSTDMT_CCtx), cMem);
826	if (!mtctx) return NULL;
827	ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
828	mtctx->cMem = cMem;
829	mtctx->allJobsCompleted = `1`;
830	mtctx->factory = POOL_create_advanced(nbWorkers, `0`, cMem);
831	mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
832	assert(nbJobs > `0`); assert((nbJobs & (nbJobs - `1`)) == `0`); / ensure nbJobs is a power of 2 /
833	mtctx->jobIDMask = nbJobs - `1`;
834	mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
835	mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem);
836	mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem);
837	initError = ZSTDMT_serialState_init(&mtctx->serial);
838	mtctx->roundBuff = kNullRoundBuff;
839	if (!mtctx->factory \| !mtctx->jobs \| !mtctx->bufPool \| !mtctx->cctxPool \| !mtctx->seqPool \| initError) {
840	ZSTDMT_freeCCtx(mtctx);
841	return NULL;
842	}
843	DEBUGLOG(`3`, "mt_cctx created, for %u threads", nbWorkers);
844	return mtctx;
845	}
846
847	ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers)
848	{
849	return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem);
850	}
851
852
853	/ ZSTDMT_releaseAllJobResources() :*
854	* note : ensure all workers are killed first ! */
855	static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
856	{
857	unsigned jobID;
858	DEBUGLOG(`3`, "ZSTDMT_releaseAllJobResources");
859	for (jobID=`0`; jobID <= mtctx->jobIDMask; jobID++) {
860	DEBUGLOG(`4`, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
861	ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
862	mtctx->jobs[jobID].dstBuff = g_nullBuffer;
863	mtctx->jobs[jobID].cSize = `0`;
864	}
865	memset(mtctx->jobs, `0`, (mtctx->jobIDMask+`1`)*sizeof(ZSTDMT_jobDescription));
866	mtctx->inBuff.buffer = g_nullBuffer;
867	mtctx->inBuff.filled = `0`;
868	mtctx->allJobsCompleted = `1`;
869	}
870
871	static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx)
872	{
873	DEBUGLOG(`4`, "ZSTDMT_waitForAllJobsCompleted");
874	while (mtctx->doneJobID < mtctx->nextJobID) {
875	unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask;
876	ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
877	while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) {
878	DEBUGLOG(`5`, "waiting for jobCompleted signal from job %u", mtctx->doneJobID); / we want to block when waiting for data to flush /
879	ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
880	}
881	ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
882	mtctx->doneJobID++;
883	}
884	}
885
886	size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
887	{
888	if (mtctx==NULL) return `0`; / compatible with free on NULL /
889	POOL_free(mtctx->factory); / stop and free worker threads /
890	ZSTDMT_releaseAllJobResources(mtctx); / release job resources into pools first /
891	ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+`1`, mtctx->cMem);
892	ZSTDMT_freeBufferPool(mtctx->bufPool);
893	ZSTDMT_freeCCtxPool(mtctx->cctxPool);
894	ZSTDMT_freeSeqPool(mtctx->seqPool);
895	ZSTDMT_serialState_free(&mtctx->serial);
896	ZSTD_freeCDict(mtctx->cdictLocal);
897	if (mtctx->roundBuff.buffer)
898	ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem);
899	ZSTD_free(mtctx, mtctx->cMem);
900	return `0`;
901	}
902
903	size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
904	{
905	if (mtctx == NULL) return `0`; / supports sizeof NULL /
906	return sizeof(*mtctx)
907	+ POOL_sizeof(mtctx->factory)
908	+ ZSTDMT_sizeof_bufferPool(mtctx->bufPool)
909	+ (mtctx->jobIDMask+`1`) * sizeof(ZSTDMT_jobDescription)
910	+ ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
911	+ ZSTDMT_sizeof_seqPool(mtctx->seqPool)
912	+ ZSTD_sizeof_CDict(mtctx->cdictLocal)
913	+ mtctx->roundBuff.capacity;
914	}
915
916	/ Internal only /
917	size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
918	ZSTDMT_parameter parameter, unsigned value) {
919	DEBUGLOG(`4`, "ZSTDMT_CCtxParam_setMTCtxParameter");
920	switch(parameter)
921	{
922	case ZSTDMT_p_jobSize :
923	DEBUGLOG(`4`, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %u", value);
924	if ( (value > `0`) / value==0 => automatic job size /
925	& (value < ZSTDMT_JOBSIZE_MIN) )
926	value = ZSTDMT_JOBSIZE_MIN;
927	params->jobSize = value;
928	return value;
929	case ZSTDMT_p_overlapSectionLog :
930	if (value > `9`) value = `9`;
931	DEBUGLOG(`4`, "ZSTDMT_p_overlapSectionLog : %u", value);
932	params->overlapSizeLog = (value >= `9`) ? `9` : value;
933	return value;
934	default :
935	return ERROR(parameter_unsupported);
936	}
937	}
938
939	size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value)
940	{
941	DEBUGLOG(`4`, "ZSTDMT_setMTCtxParameter");
942	switch(parameter)
943	{
944	case ZSTDMT_p_jobSize :
945	return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
946	case ZSTDMT_p_overlapSectionLog :
947	return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
948	default :
949	return ERROR(parameter_unsupported);
950	}
951	}
952
953	/ Sets parameters relevant to the compression job,*
954	* initializing others to default values. */
955	static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
956	{
957	ZSTD_CCtx_params jobParams;
958	memset(&jobParams, `0`, sizeof(jobParams));
959
960	jobParams.cParams = params.cParams;
961	jobParams.fParams = params.fParams;
962	jobParams.compressionLevel = params.compressionLevel;
963	jobParams.disableLiteralCompression = params.disableLiteralCompression;
964
965	return jobParams;
966	}
967
968	/! ZSTDMT_updateCParams_whileCompressing() :*
969	* Updates only a selected set of compression parameters, to remain compatible with current frame.
970	* New parameters will be applied to next compression job. */
971	void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams)
972	{
973	U32 const saved_wlog = mtctx->params.cParams.windowLog; / Do not modify windowLog while compressing /
974	int const compressionLevel = cctxParams->compressionLevel;
975	DEBUGLOG(`5`, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
976	compressionLevel);
977	mtctx->params.compressionLevel = compressionLevel;
978	{ ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, `0`, `0`);
979	cParams.windowLog = saved_wlog;
980	mtctx->params.cParams = cParams;
981	}
982	}
983
984	/ ZSTDMT_getNbWorkers():*
985	* @return nb threads currently active in mtctx.
986	* mtctx must be valid */
987	unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx)
988	{
989	assert(mtctx != NULL);
990	return mtctx->params.nbWorkers;
991	}
992
993	/ ZSTDMT_getFrameProgression():*
994	* tells how much data has been consumed (input) and produced (output) for current frame.
995	* able to count progression inside worker threads.
996	* Note : mutex will be acquired during statistics collection. */
997	ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
998	{
999	ZSTD_frameProgression fps;
1000	DEBUGLOG(`6`, "ZSTDMT_getFrameProgression");
1001	fps.consumed = mtctx->consumed;
1002	fps.produced = mtctx->produced;
1003	fps.ingested = mtctx->consumed + mtctx->inBuff.filled;
1004	{ unsigned jobNb;
1005	unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= `1`);
1006	DEBUGLOG(`6`, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
1007	mtctx->doneJobID, lastJobNb, mtctx->jobReady)
1008	for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
1009	unsigned const wJobID = jobNb & mtctx->jobIDMask;
1010	ZSTD_pthread_mutex_lock(&mtctx->jobs[wJobID].job_mutex);
1011	{ size_t const cResult = mtctx->jobs[wJobID].cSize;
1012	size_t const produced = ZSTD_isError(cResult) ? `0` : cResult;
1013	fps.consumed += mtctx->jobs[wJobID].consumed;
1014	fps.ingested += mtctx->jobs[wJobID].src.size;
1015	fps.produced += produced;
1016	}
1017	ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
1018	}
1019	}
1020	return fps;
1021	}
1022
1023
1024	/ ------------------------------------------ /
1025	/ ===== Multi-threaded compression ===== /
1026	/ ------------------------------------------ /
1027
1028	static size_t ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
1029	{
1030	if (params.ldmParams.enableLdm)
1031	return MAX(`21`, params.cParams.chainLog + `4`);
1032	return MAX(`20`, params.cParams.windowLog + `2`);
1033	}
1034
1035	static size_t ZSTDMT_computeOverlapLog(ZSTD_CCtx_params const params)
1036	{
1037	unsigned const overlapRLog = (params.overlapSizeLog>`9`) ? `0` : `9`-params.overlapSizeLog;
1038	if (params.ldmParams.enableLdm)
1039	return (MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - `2`) - overlapRLog);
1040	return overlapRLog >= `9` ? `0` : (params.cParams.windowLog - overlapRLog);
1041	}
1042
1043	static unsigned ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers) {
1044	assert(nbWorkers>`0`);
1045	{ size_t const jobSizeTarget = (size_t)`1` << ZSTDMT_computeTargetJobLog(params);
1046	size_t const jobMaxSize = jobSizeTarget << `2`;
1047	size_t const passSizeMax = jobMaxSize * nbWorkers;
1048	unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + `1`;
1049	unsigned const nbJobsLarge = multiplier * nbWorkers;
1050	unsigned const nbJobsMax = (unsigned)(srcSize / jobSizeTarget) + `1`;
1051	unsigned const nbJobsSmall = MIN(nbJobsMax, nbWorkers);
1052	return (multiplier>`1`) ? nbJobsLarge : nbJobsSmall;
1053	} }
1054
1055	/ ZSTDMT_compress_advanced_internal() :*
1056	* This is a blocking function : it will only give back control to caller after finishing its compression job.
1057	*/
1058	static size_t ZSTDMT_compress_advanced_internal(
1059	ZSTDMT_CCtx* mtctx,
1060	void* dst, size_t dstCapacity,
1061	const void* src, size_t srcSize,
1062	const ZSTD_CDict* cdict,
1063	ZSTD_CCtx_params params)
1064	{
1065	ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
1066	size_t const overlapSize = (size_t)`1` << ZSTDMT_computeOverlapLog(params);
1067	unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
1068	size_t const proposedJobSize = (srcSize + (nbJobs-`1`)) / nbJobs;
1069	size_t const avgJobSize = (((proposedJobSize-`1`) & `0x1FFFF`) < `0x7FFF`) ? proposedJobSize + `0xFFFF` : proposedJobSize; / avoid too small last block /
1070	const char* const srcStart = (const char*)src;
1071	size_t remainingSrcSize = srcSize;
1072	unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbJobs : (unsigned)(dstCapacity / ZSTD_compressBound(avgJobSize)); / presumes avgJobSize >= 256 KB, which should be the case /
1073	size_t frameStartPos = `0`, dstBufferPos = `0`;
1074	assert(jobParams.nbWorkers == `0`);
1075	assert(mtctx->cctxPool->totalCCtx == params.nbWorkers);
1076
1077	params.jobSize = (U32)avgJobSize;
1078	DEBUGLOG(`4`, "ZSTDMT_compress_advanced_internal: nbJobs=%2u (rawSize=%u bytes; fixedSize=%u) ",
1079	nbJobs, (U32)proposedJobSize, (U32)avgJobSize);
1080
1081	if ((nbJobs==`1`) \| (params.nbWorkers<=`1`)) { / fallback to single-thread mode : this is a blocking invocation anyway /
1082	ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[`0`];
1083	DEBUGLOG(`4`, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
1084	if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
1085	return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, `0`, jobParams);
1086	}
1087
1088	assert(avgJobSize >= `256` KB); / condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) /
1089	ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
1090	if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
1091	return ERROR(memory_allocation);
1092
1093	if (nbJobs > mtctx->jobIDMask+`1`) { / enlarge job table /
1094	U32 jobsTableSize = nbJobs;
1095	ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+`1`, mtctx->cMem);
1096	mtctx->jobIDMask = `0`;
1097	mtctx->jobs = ZSTDMT_createJobsTable(&jobsTableSize, mtctx->cMem);
1098	if (mtctx->jobs==NULL) return ERROR(memory_allocation);
1099	assert((jobsTableSize != `0`) && ((jobsTableSize & (jobsTableSize - `1`)) == `0`)); / ensure jobsTableSize is a power of 2 /
1100	mtctx->jobIDMask = jobsTableSize - `1`;
1101	}
1102
1103	{ unsigned u;
1104	for (u=`0`; u<nbJobs; u++) {
1105	size_t const jobSize = MIN(remainingSrcSize, avgJobSize);
1106	size_t const dstBufferCapacity = ZSTD_compressBound(jobSize);
1107	buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity };
1108	buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : g_nullBuffer;
1109	size_t dictSize = u ? overlapSize : `0`;
1110
1111	mtctx->jobs[u].prefix.start = srcStart + frameStartPos - dictSize;
1112	mtctx->jobs[u].prefix.size = dictSize;
1113	mtctx->jobs[u].src.start = srcStart + frameStartPos;
1114	mtctx->jobs[u].src.size = jobSize; assert(jobSize > `0`); / avoid job.src.size == 0 /
1115	mtctx->jobs[u].consumed = `0`;
1116	mtctx->jobs[u].cSize = `0`;
1117	mtctx->jobs[u].cdict = (u==`0`) ? cdict : NULL;
1118	mtctx->jobs[u].fullFrameSize = srcSize;
1119	mtctx->jobs[u].params = jobParams;
1120	/ do not calculate checksum within sections, but write it in header for first section /
1121	mtctx->jobs[u].dstBuff = dstBuffer;
1122	mtctx->jobs[u].cctxPool = mtctx->cctxPool;
1123	mtctx->jobs[u].bufPool = mtctx->bufPool;
1124	mtctx->jobs[u].seqPool = mtctx->seqPool;
1125	mtctx->jobs[u].serial = &mtctx->serial;
1126	mtctx->jobs[u].jobID = u;
1127	mtctx->jobs[u].firstJob = (u==`0`);
1128	mtctx->jobs[u].lastJob = (u==nbJobs-`1`);
1129
1130	DEBUGLOG(`5`, "ZSTDMT_compress_advanced_internal: posting job %u (%u bytes)", u, (U32)jobSize);
1131	DEBUG_PRINTHEX(`6`, mtctx->jobs[u].prefix.start, `12`);
1132	POOL_add(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[u]);
1133
1134	frameStartPos += jobSize;
1135	dstBufferPos += dstBufferCapacity;
1136	remainingSrcSize -= jobSize;
1137	} }
1138
1139	/ collect result /
1140	{ size_t error = `0`, dstPos = `0`;
1141	unsigned jobID;
1142	for (jobID=`0`; jobID<nbJobs; jobID++) {
1143	DEBUGLOG(`5`, "waiting for job %u ", jobID);
1144	ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
1145	while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) {
1146	DEBUGLOG(`5`, "waiting for jobCompleted signal from job %u", jobID);
1147	ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
1148	}
1149	ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
1150	DEBUGLOG(`5`, "ready to write job %u ", jobID);
1151
1152	{ size_t const cSize = mtctx->jobs[jobID].cSize;
1153	if (ZSTD_isError(cSize)) error = cSize;
1154	if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
1155	if (jobID) { / note : job 0 is written directly at dst, which is correct position /
1156	if (!error)
1157	memmove((char)dst + dstPos, mtctx->jobs[jobID].dstBuff.start, cSize); /* may overlap when job compressed within dst /
1158	if (jobID >= compressWithinDst) { / job compressed into its own buffer, which must be released /
1159	DEBUGLOG(`5`, "releasing buffer %u>=%u", jobID, compressWithinDst);
1160	ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
1161	} }
1162	mtctx->jobs[jobID].dstBuff = g_nullBuffer;
1163	mtctx->jobs[jobID].cSize = `0`;
1164	dstPos += cSize ;
1165	}
1166	} / for (jobID=0; jobID<nbJobs; jobID++) /
1167
1168	DEBUGLOG(`4`, "checksumFlag : %u ", params.fParams.checksumFlag);
1169	if (params.fParams.checksumFlag) {
1170	U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState);
1171	if (dstPos + `4` > dstCapacity) {
1172	error = ERROR(dstSize_tooSmall);
1173	} else {
1174	DEBUGLOG(`4`, "writing checksum : %08X \n", checksum);
1175	MEM_writeLE32((char*)dst + dstPos, checksum);
1176	dstPos += `4`;
1177	} }
1178
1179	if (!error) DEBUGLOG(`4`, "compressed size : %u ", (U32)dstPos);
1180	return error ? error : dstPos;
1181	}
1182	}
1183
1184	size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
1185	void* dst, size_t dstCapacity,
1186	const void* src, size_t srcSize,
1187	const ZSTD_CDict* cdict,
1188	ZSTD_parameters params,
1189	unsigned overlapLog)
1190	{
1191	ZSTD_CCtx_params cctxParams = mtctx->params;
1192	cctxParams.cParams = params.cParams;
1193	cctxParams.fParams = params.fParams;
1194	cctxParams.overlapSizeLog = overlapLog;
1195	return ZSTDMT_compress_advanced_internal(mtctx,
1196	dst, dstCapacity,
1197	src, srcSize,
1198	cdict, cctxParams);
1199	}
1200
1201
1202	size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
1203	void* dst, size_t dstCapacity,
1204	const void* src, size_t srcSize,
1205	int compressionLevel)
1206	{
1207	U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? `9` : ZSTDMT_OVERLAPLOG_DEFAULT;
1208	ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, `0`);
1209	params.fParams.contentSizeFlag = `1`;
1210	return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
1211	}
1212
1213
1214	/ ====================================== /
1215	/ ======= Streaming API ======= /
1216	/ ====================================== /
1217
1218	size_t ZSTDMT_initCStream_internal(
1219	ZSTDMT_CCtx* mtctx,
1220	const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
1221	const ZSTD_CDict* cdict, ZSTD_CCtx_params params,
1222	unsigned long long pledgedSrcSize)
1223	{
1224	DEBUGLOG(`4`, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u, disableLiteralCompression=%i)",
1225	(U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx, params.disableLiteralCompression);
1226	/ params are supposed to be fully validated at this point /
1227	assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
1228	assert(!((dict) && (cdict))); / either dict or cdict, not both /
1229	assert(mtctx->cctxPool->totalCCtx == params.nbWorkers);
1230
1231	/ init /
1232	if (params.jobSize == `0`) {
1233	params.jobSize = `1U` << ZSTDMT_computeTargetJobLog(params);
1234	}
1235	if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
1236
1237	mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); / do not trigger multi-threading when srcSize is too small /
1238	if (mtctx->singleBlockingThread) {
1239	ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
1240	DEBUGLOG(`5`, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
1241	assert(singleThreadParams.nbWorkers == `0`);
1242	return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[`0`],
1243	dict, dictSize, cdict,
1244	singleThreadParams, pledgedSrcSize);
1245	}
1246
1247	DEBUGLOG(`4`, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
1248
1249	if (mtctx->allJobsCompleted == `0`) { / previous compression not correctly finished /
1250	ZSTDMT_waitForAllJobsCompleted(mtctx);
1251	ZSTDMT_releaseAllJobResources(mtctx);
1252	mtctx->allJobsCompleted = `1`;
1253	}
1254
1255	mtctx->params = params;
1256	mtctx->frameContentSize = pledgedSrcSize;
1257	if (dict) {
1258	ZSTD_freeCDict(mtctx->cdictLocal);
1259	mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
1260	ZSTD_dlm_byCopy, dictContentType, / note : a loadPrefix becomes an internal CDict /
1261	params.cParams, mtctx->cMem);
1262	mtctx->cdict = mtctx->cdictLocal;
1263	if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation);
1264	} else {
1265	ZSTD_freeCDict(mtctx->cdictLocal);
1266	mtctx->cdictLocal = NULL;
1267	mtctx->cdict = cdict;
1268	}
1269
1270	mtctx->targetPrefixSize = (size_t)`1` << ZSTDMT_computeOverlapLog(params);
1271	DEBUGLOG(`4`, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>`10`));
1272	mtctx->targetSectionSize = params.jobSize;
1273	if (mtctx->targetSectionSize < ZSTDMT_JOBSIZE_MIN) mtctx->targetSectionSize = ZSTDMT_JOBSIZE_MIN;
1274	if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; / job size must be >= overlap size /
1275	DEBUGLOG(`4`, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>`10`), params.jobSize);
1276	DEBUGLOG(`4`, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>`10`));
1277	ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
1278	{
1279	/ If ldm is enabled we need windowSize space. /
1280	size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (`1U` << mtctx->params.cParams.windowLog) : `0`;
1281	/ Two buffers of slack, plus extra space for the overlap*
1282	* This is the minimum slack that LDM works with. One extra because
1283	* flush might waste up to targetSectionSize-1 bytes. Another extra
1284	* for the overlap (if > 0), then one to fill which doesn't overlap
1285	* with the LDM window.
1286	*/
1287	size_t const nbSlackBuffers = `2` + (mtctx->targetPrefixSize > `0`);
1288	size_t const slackSize = mtctx->targetSectionSize * nbSlackBuffers;
1289	/ Compute the total size, and always have enough slack /
1290	size_t const nbWorkers = MAX(mtctx->params.nbWorkers, `1`);
1291	size_t const sectionsSize = mtctx->targetSectionSize * nbWorkers;
1292	size_t const capacity = MAX(windowSize, sectionsSize) + slackSize;
1293	if (mtctx->roundBuff.capacity < capacity) {
1294	if (mtctx->roundBuff.buffer)
1295	ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem);
1296	mtctx->roundBuff.buffer = (BYTE*)ZSTD_malloc(capacity, mtctx->cMem);
1297	if (mtctx->roundBuff.buffer == NULL) {
1298	mtctx->roundBuff.capacity = `0`;
1299	return ERROR(memory_allocation);
1300	}
1301	mtctx->roundBuff.capacity = capacity;
1302	}
1303	}
1304	DEBUGLOG(`4`, "roundBuff capacity : %u KB", (U32)(mtctx->roundBuff.capacity>>`10`));
1305	mtctx->roundBuff.pos = `0`;
1306	mtctx->inBuff.buffer = g_nullBuffer;
1307	mtctx->inBuff.filled = `0`;
1308	mtctx->inBuff.prefix = kNullRange;
1309	mtctx->doneJobID = `0`;
1310	mtctx->nextJobID = `0`;
1311	mtctx->frameEnded = `0`;
1312	mtctx->allJobsCompleted = `0`;
1313	mtctx->consumed = `0`;
1314	mtctx->produced = `0`;
1315	if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
1316	return ERROR(memory_allocation);
1317	return `0`;
1318	}
1319
1320	size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
1321	const void* dict, size_t dictSize,
1322	ZSTD_parameters params,
1323	unsigned long long pledgedSrcSize)
1324	{
1325	ZSTD_CCtx_params cctxParams = mtctx->params; / retrieve sticky params /
1326	DEBUGLOG(`4`, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
1327	cctxParams.cParams = params.cParams;
1328	cctxParams.fParams = params.fParams;
1329	return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dct_auto, NULL,
1330	cctxParams, pledgedSrcSize);
1331	}
1332
1333	size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
1334	const ZSTD_CDict* cdict,
1335	ZSTD_frameParameters fParams,
1336	unsigned long long pledgedSrcSize)
1337	{
1338	ZSTD_CCtx_params cctxParams = mtctx->params;
1339	if (cdict==NULL) return ERROR(dictionary_wrong); / method incompatible with NULL cdict /
1340	cctxParams.cParams = ZSTD_getCParamsFromCDict(cdict);
1341	cctxParams.fParams = fParams;
1342	return ZSTDMT_initCStream_internal(mtctx, NULL, `0` /dictSize/, ZSTD_dct_auto, cdict,
1343	cctxParams, pledgedSrcSize);
1344	}
1345
1346
1347	/ ZSTDMT_resetCStream() :*
1348	* pledgedSrcSize can be zero == unknown (for the time being)
1349	* prefer using ZSTD_CONTENTSIZE_UNKNOWN,
1350	* as `0` might mean "empty" in the future */
1351	size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize)
1352	{
1353	if (!pledgedSrcSize) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
1354	return ZSTDMT_initCStream_internal(mtctx, NULL, `0`, ZSTD_dct_auto, `0`, mtctx->params,
1355	pledgedSrcSize);
1356	}
1357
1358	size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) {
1359	ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, `0`);
1360	ZSTD_CCtx_params cctxParams = mtctx->params; / retrieve sticky params /
1361	DEBUGLOG(`4`, "ZSTDMT_initCStream (cLevel=%i)", compressionLevel);
1362	cctxParams.cParams = params.cParams;
1363	cctxParams.fParams = params.fParams;
1364	return ZSTDMT_initCStream_internal(mtctx, NULL, `0`, ZSTD_dct_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN);
1365	}
1366
1367
1368	/ ZSTDMT_writeLastEmptyBlock()*
1369	* Write a single empty block with an end-of-frame to finish a frame.
1370	* Job must be created from streaming variant.
1371	* This function is always successfull if expected conditions are fulfilled.
1372	*/
1373	static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job)
1374	{
1375	assert(job->lastJob == `1`);
1376	assert(job->src.size == `0`); / last job is empty -> will be simplified into a last empty block /
1377	assert(job->firstJob == `0`); / cannot be first job, as it also needs to create frame header /
1378	assert(job->dstBuff.start == NULL); / invoked from streaming variant only (otherwise, dstBuff might be user's output) /
1379	job->dstBuff = ZSTDMT_getBuffer(job->bufPool);
1380	if (job->dstBuff.start == NULL) {
1381	job->cSize = ERROR(memory_allocation);
1382	return;
1383	}
1384	assert(job->dstBuff.capacity >= ZSTD_blockHeaderSize); / no buffer should ever be that small /
1385	job->src = kNullRange;
1386	job->cSize = ZSTD_writeLastEmptyBlock(job->dstBuff.start, job->dstBuff.capacity);
1387	assert(!ZSTD_isError(job->cSize));
1388	assert(job->consumed == `0`);
1389	}
1390
1391	static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZSTD_EndDirective endOp)
1392	{
1393	unsigned const jobID = mtctx->nextJobID & mtctx->jobIDMask;
1394	int const endFrame = (endOp == ZSTD_e_end);
1395
1396	if (mtctx->nextJobID > mtctx->doneJobID + mtctx->jobIDMask) {
1397	DEBUGLOG(`5`, "ZSTDMT_createCompressionJob: will not create new job : table is full");
1398	assert((mtctx->nextJobID & mtctx->jobIDMask) == (mtctx->doneJobID & mtctx->jobIDMask));
1399	return `0`;
1400	}
1401
1402	if (!mtctx->jobReady) {
1403	BYTE const* src = (BYTE const*)mtctx->inBuff.buffer.start;
1404	DEBUGLOG(`5`, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
1405	mtctx->nextJobID, (U32)srcSize, (U32)mtctx->inBuff.prefix.size);
1406	mtctx->jobs[jobID].src.start = src;
1407	mtctx->jobs[jobID].src.size = srcSize;
1408	assert(mtctx->inBuff.filled >= srcSize);
1409	mtctx->jobs[jobID].prefix = mtctx->inBuff.prefix;
1410	mtctx->jobs[jobID].consumed = `0`;
1411	mtctx->jobs[jobID].cSize = `0`;
1412	mtctx->jobs[jobID].params = mtctx->params;
1413	mtctx->jobs[jobID].cdict = mtctx->nextJobID==`0` ? mtctx->cdict : NULL;
1414	mtctx->jobs[jobID].fullFrameSize = mtctx->frameContentSize;
1415	mtctx->jobs[jobID].dstBuff = g_nullBuffer;
1416	mtctx->jobs[jobID].cctxPool = mtctx->cctxPool;
1417	mtctx->jobs[jobID].bufPool = mtctx->bufPool;
1418	mtctx->jobs[jobID].seqPool = mtctx->seqPool;
1419	mtctx->jobs[jobID].serial = &mtctx->serial;
1420	mtctx->jobs[jobID].jobID = mtctx->nextJobID;
1421	mtctx->jobs[jobID].firstJob = (mtctx->nextJobID==`0`);
1422	mtctx->jobs[jobID].lastJob = endFrame;
1423	mtctx->jobs[jobID].frameChecksumNeeded = endFrame && (mtctx->nextJobID>`0`) && mtctx->params.fParams.checksumFlag;
1424	mtctx->jobs[jobID].dstFlushed = `0`;
1425
1426	/ Update the round buffer pos and clear the input buffer to be reset /
1427	mtctx->roundBuff.pos += srcSize;
1428	mtctx->inBuff.buffer = g_nullBuffer;
1429	mtctx->inBuff.filled = `0`;
1430	/ Set the prefix /
1431	if (!endFrame) {
1432	size_t const newPrefixSize = MIN(srcSize, mtctx->targetPrefixSize);
1433	mtctx->inBuff.prefix.start = src + srcSize - newPrefixSize;
1434	mtctx->inBuff.prefix.size = newPrefixSize;
1435	} else { / endFrame==1 => no need for another input buffer /
1436	mtctx->inBuff.prefix = kNullRange;
1437	mtctx->frameEnded = endFrame;
1438	if (mtctx->nextJobID == `0`) {
1439	/ single job exception : checksum is already calculated directly within worker thread /
1440	mtctx->params.fParams.checksumFlag = `0`;
1441	} }
1442
1443	if ( (srcSize == `0`)
1444	&& (mtctx->nextJobID>`0`)/single job must also write frame header/ ) {
1445	DEBUGLOG(`5`, "ZSTDMT_createCompressionJob: creating a last empty block to end frame");
1446	assert(endOp == ZSTD_e_end); / only possible case : need to end the frame with an empty last block /
1447	ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID);
1448	mtctx->nextJobID++;
1449	return `0`;
1450	}
1451	}
1452
1453	DEBUGLOG(`5`, "ZSTDMT_createCompressionJob: posting job %u : %u bytes (end:%u, jobNb == %u (mod:%u))",
1454	mtctx->nextJobID,
1455	(U32)mtctx->jobs[jobID].src.size,
1456	mtctx->jobs[jobID].lastJob,
1457	mtctx->nextJobID,
1458	jobID);
1459	if (POOL_tryAdd(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[jobID])) {
1460	mtctx->nextJobID++;
1461	mtctx->jobReady = `0`;
1462	} else {
1463	DEBUGLOG(`5`, "ZSTDMT_createCompressionJob: no worker available for job %u", mtctx->nextJobID);
1464	mtctx->jobReady = `1`;
1465	}
1466	return `0`;
1467	}
1468
1469
1470	/! ZSTDMT_flushProduced() :*
1471	* `output` : `pos` will be updated with amount of data flushed .
1472	* `blockToFlush` : if >0, the function will block and wait if there is no data available to flush .
1473	* @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */
1474	static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned blockToFlush, ZSTD_EndDirective end)
1475	{
1476	unsigned const wJobID = mtctx->doneJobID & mtctx->jobIDMask;
1477	DEBUGLOG(`5`, "ZSTDMT_flushProduced (blocking:%u , job %u <= %u)",
1478	blockToFlush, mtctx->doneJobID, mtctx->nextJobID);
1479	assert(output->size >= output->pos);
1480
1481	ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex);
1482	if ( blockToFlush
1483	&& (mtctx->doneJobID < mtctx->nextJobID) ) {
1484	assert(mtctx->jobs[wJobID].dstFlushed <= mtctx->jobs[wJobID].cSize);
1485	while (mtctx->jobs[wJobID].dstFlushed == mtctx->jobs[wJobID].cSize) { / nothing to flush /
1486	if (mtctx->jobs[wJobID].consumed == mtctx->jobs[wJobID].src.size) {
1487	DEBUGLOG(`5`, "job %u is completely consumed (%u == %u) => don't wait for cond, there will be none",
1488	mtctx->doneJobID, (U32)mtctx->jobs[wJobID].consumed, (U32)mtctx->jobs[wJobID].src.size);
1489	break;
1490	}
1491	DEBUGLOG(`5`, "waiting for something to flush from job %u (currently flushed: %u bytes)",
1492	mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
1493	ZSTD_pthread_cond_wait(&mtctx->jobs[wJobID].job_cond, &mtctx->jobs[wJobID].job_mutex); / block when nothing to flush but some to come /
1494	} }
1495
1496	/ try to flush something /
1497	{ size_t cSize = mtctx->jobs[wJobID].cSize; / shared /
1498	size_t const srcConsumed = mtctx->jobs[wJobID].consumed; / shared /
1499	size_t const srcSize = mtctx->jobs[wJobID].src.size; / read-only, could be done after mutex lock, but no-declaration-after-statement /
1500	ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
1501	if (ZSTD_isError(cSize)) {
1502	DEBUGLOG(`5`, "ZSTDMT_flushProduced: job %u : compression error detected : %s",
1503	mtctx->doneJobID, ZSTD_getErrorName(cSize));
1504	ZSTDMT_waitForAllJobsCompleted(mtctx);
1505	ZSTDMT_releaseAllJobResources(mtctx);
1506	return cSize;
1507	}
1508	/ add frame checksum if necessary (can only happen once) /
1509	assert(srcConsumed <= srcSize);
1510	if ( (srcConsumed == srcSize) / job completed -> worker no longer active /
1511	&& mtctx->jobs[wJobID].frameChecksumNeeded ) {
1512	U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState);
1513	DEBUGLOG(`4`, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum);
1514	MEM_writeLE32((char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].cSize, checksum);
1515	cSize += `4`;
1516	mtctx->jobs[wJobID].cSize += `4`; / can write this shared value, as worker is no longer active /
1517	mtctx->jobs[wJobID].frameChecksumNeeded = `0`;
1518	}
1519	if (cSize > `0`) { / compression is ongoing or completed /
1520	size_t const toFlush = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos);
1521	DEBUGLOG(`5`, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)",
1522	(U32)toFlush, mtctx->doneJobID, (U32)srcConsumed, (U32)srcSize, (U32)cSize);
1523	assert(mtctx->doneJobID < mtctx->nextJobID);
1524	assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
1525	assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
1526	memcpy((char*)output->dst + output->pos,
1527	(const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
1528	toFlush);
1529	output->pos += toFlush;
1530	mtctx->jobs[wJobID].dstFlushed += toFlush; / can write : this value is only used by mtctx /
1531
1532	if ( (srcConsumed == srcSize) / job completed /
1533	&& (mtctx->jobs[wJobID].dstFlushed == cSize) ) { / output buffer fully flushed => free this job position /
1534	DEBUGLOG(`5`, "Job %u completed (%u bytes), moving to next one",
1535	mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
1536	ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff);
1537	mtctx->jobs[wJobID].dstBuff = g_nullBuffer;
1538	mtctx->jobs[wJobID].cSize = `0`; / ensure this job slot is considered "not started" in future check /
1539	mtctx->consumed += srcSize;
1540	mtctx->produced += cSize;
1541	mtctx->doneJobID++;
1542	} }
1543
1544	/ return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 /
1545	if (cSize > mtctx->jobs[wJobID].dstFlushed) return (cSize - mtctx->jobs[wJobID].dstFlushed);
1546	if (srcSize > srcConsumed) return `1`; / current job not completely compressed /
1547	}
1548	if (mtctx->doneJobID < mtctx->nextJobID) return `1`; / some more jobs ongoing /
1549	if (mtctx->jobReady) return `1`; / one job is ready to push, just not yet in the list /
1550	if (mtctx->inBuff.filled > `0`) return `1`; / input is not empty, and still needs to be converted into a job /
1551	mtctx->allJobsCompleted = mtctx->frameEnded; / all jobs are entirely flushed => if this one is last one, frame is completed /
1552	if (end == ZSTD_e_end) return !mtctx->frameEnded; / for ZSTD_e_end, question becomes : is frame completed ? instead of : are internal buffers fully flushed ? /
1553	return `0`; / internal buffers fully flushed /
1554	}
1555
1556	/**
1557	* Returns the range of data used by the earliest job that is not yet complete.
1558	* If the data of the first job is broken up into two segments, we cover both
1559	* sections.
1560	*/
1561	static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
1562	{
1563	unsigned const firstJobID = mtctx->doneJobID;
1564	unsigned const lastJobID = mtctx->nextJobID;
1565	unsigned jobID;
1566
1567	for (jobID = firstJobID; jobID < lastJobID; ++jobID) {
1568	unsigned const wJobID = jobID & mtctx->jobIDMask;
1569	size_t consumed;
1570
1571	ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex);
1572	consumed = mtctx->jobs[wJobID].consumed;
1573	ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
1574
1575	if (consumed < mtctx->jobs[wJobID].src.size) {
1576	range_t range = mtctx->jobs[wJobID].prefix;
1577	if (range.size == `0`) {
1578	/ Empty prefix /
1579	range = mtctx->jobs[wJobID].src;
1580	}
1581	/ Job source in multiple segments not supported yet /
1582	assert(range.start <= mtctx->jobs[wJobID].src.start);
1583	return range;
1584	}
1585	}
1586	return kNullRange;
1587	}
1588
1589	/**
1590	* Returns non-zero iff buffer and range overlap.
1591	*/
1592	static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
1593	{
1594	BYTE const* const bufferStart = (BYTE const*)buffer.start;
1595	BYTE const* const bufferEnd = bufferStart + buffer.capacity;
1596	BYTE const* const rangeStart = (BYTE const*)range.start;
1597	BYTE const* const rangeEnd = rangeStart + range.size;
1598
1599	if (rangeStart == NULL \|\| bufferStart == NULL)
1600	return `0`;
1601	/ Empty ranges cannot overlap /
1602	if (bufferStart == bufferEnd \|\| rangeStart == rangeEnd)
1603	return `0`;
1604
1605	return bufferStart < rangeEnd && rangeStart < bufferEnd;
1606	}
1607
1608	static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
1609	{
1610	range_t extDict;
1611	range_t prefix;
1612
1613	extDict.start = window.dictBase + window.lowLimit;
1614	extDict.size = window.dictLimit - window.lowLimit;
1615
1616	prefix.start = window.base + window.dictLimit;
1617	prefix.size = window.nextSrc - (window.base + window.dictLimit);
1618	DEBUGLOG(`5`, "extDict [0x%zx, 0x%zx)",
1619	(size_t)extDict.start,
1620	(size_t)extDict.start + extDict.size);
1621	DEBUGLOG(`5`, "prefix [0x%zx, 0x%zx)",
1622	(size_t)prefix.start,
1623	(size_t)prefix.start + prefix.size);
1624
1625	return ZSTDMT_isOverlapped(buffer, extDict)
1626	\|\| ZSTDMT_isOverlapped(buffer, prefix);
1627	}
1628
1629	static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
1630	{
1631	if (mtctx->params.ldmParams.enableLdm) {
1632	ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
1633	DEBUGLOG(`5`, "source [0x%zx, 0x%zx)",
1634	(size_t)buffer.start,
1635	(size_t)buffer.start + buffer.capacity);
1636	ZSTD_PTHREAD_MUTEX_LOCK(mutex);
1637	while (ZSTDMT_doesOverlapWindow(buffer, mtctx->serial.ldmWindow)) {
1638	DEBUGLOG(`6`, "Waiting for LDM to finish...");
1639	ZSTD_pthread_cond_wait(&mtctx->serial.ldmWindowCond, mutex);
1640	}
1641	DEBUGLOG(`6`, "Done waiting for LDM to finish");
1642	ZSTD_pthread_mutex_unlock(mutex);
1643	}
1644	}
1645
1646	/**
1647	* Attempts to set the inBuff to the next section to fill.
1648	* If any part of the new section is still in use we give up.
1649	* Returns non-zero if the buffer is filled.
1650	*/
1651	static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
1652	{
1653	range_t const inUse = ZSTDMT_getInputDataInUse(mtctx);
1654	size_t const spaceLeft = mtctx->roundBuff.capacity - mtctx->roundBuff.pos;
1655	size_t const target = mtctx->targetSectionSize;
1656	buffer_t buffer;
1657
1658	assert(mtctx->inBuff.buffer.start == NULL);
1659	assert(mtctx->roundBuff.capacity >= target);
1660
1661	if (spaceLeft < target) {
1662	/ ZSTD_invalidateRepCodes() doesn't work for extDict variants.*
1663	* Simply copy the prefix to the beginning in that case.
1664	*/
1665	BYTE* const start = (BYTE*)mtctx->roundBuff.buffer;
1666	size_t const prefixSize = mtctx->inBuff.prefix.size;
1667
1668	buffer.start = start;
1669	buffer.capacity = prefixSize;
1670	if (ZSTDMT_isOverlapped(buffer, inUse)) {
1671	DEBUGLOG(`6`, "Waiting for buffer...");
1672	return `0`;
1673	}
1674	ZSTDMT_waitForLdmComplete(mtctx, buffer);
1675	memmove(start, mtctx->inBuff.prefix.start, prefixSize);
1676	mtctx->inBuff.prefix.start = start;
1677	mtctx->roundBuff.pos = prefixSize;
1678	}
1679	buffer.start = mtctx->roundBuff.buffer + mtctx->roundBuff.pos;
1680	buffer.capacity = target;
1681
1682	if (ZSTDMT_isOverlapped(buffer, inUse)) {
1683	DEBUGLOG(`6`, "Waiting for buffer...");
1684	return `0`;
1685	}
1686	assert(!ZSTDMT_isOverlapped(buffer, mtctx->inBuff.prefix));
1687
1688	ZSTDMT_waitForLdmComplete(mtctx, buffer);
1689
1690	DEBUGLOG(`5`, "Using prefix range [%zx, %zx)",
1691	(size_t)mtctx->inBuff.prefix.start,
1692	(size_t)mtctx->inBuff.prefix.start + mtctx->inBuff.prefix.size);
1693	DEBUGLOG(`5`, "Using source range [%zx, %zx)",
1694	(size_t)buffer.start,
1695	(size_t)buffer.start + buffer.capacity);
1696
1697
1698	mtctx->inBuff.buffer = buffer;
1699	mtctx->inBuff.filled = `0`;
1700	assert(mtctx->roundBuff.pos + buffer.capacity <= mtctx->roundBuff.capacity);
1701	return `1`;
1702	}
1703
1704
1705	/* ZSTDMT_compressStream_generic() :*
1706	* internal use only - exposed to be invoked from zstd_compress.c
1707	* assumption : output and input are valid (pos <= size)
1708	* @return : minimum amount of data remaining to flush, 0 if none */
1709	size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
1710	ZSTD_outBuffer* output,
1711	ZSTD_inBuffer* input,
1712	ZSTD_EndDirective endOp)
1713	{
1714	unsigned forwardInputProgress = `0`;
1715	DEBUGLOG(`5`, "ZSTDMT_compressStream_generic (endOp=%u, srcSize=%u)",
1716	(U32)endOp, (U32)(input->size - input->pos));
1717	assert(output->pos <= output->size);
1718	assert(input->pos <= input->size);
1719
1720	if (mtctx->singleBlockingThread) { / delegate to single-thread (synchronous) /
1721	return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[`0`], output, input, endOp);
1722	}
1723
1724	if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
1725	/ current frame being ended. Only flush/end are allowed /
1726	return ERROR(stage_wrong);
1727	}
1728
1729	/ single-pass shortcut (note : synchronous-mode) /
1730	if ( (mtctx->nextJobID == `0`) / just started /
1731	&& (mtctx->inBuff.filled == `0`) / nothing buffered /
1732	&& (!mtctx->jobReady) / no job already created /
1733	&& (endOp == ZSTD_e_end) / end order /
1734	&& (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { / enough space in dst /
1735	size_t const cSize = ZSTDMT_compress_advanced_internal(mtctx,
1736	(char*)output->dst + output->pos, output->size - output->pos,
1737	(const char*)input->src + input->pos, input->size - input->pos,
1738	mtctx->cdict, mtctx->params);
1739	if (ZSTD_isError(cSize)) return cSize;
1740	input->pos = input->size;
1741	output->pos += cSize;
1742	mtctx->allJobsCompleted = `1`;
1743	mtctx->frameEnded = `1`;
1744	return `0`;
1745	}
1746
1747	/ fill input buffer /
1748	if ( (!mtctx->jobReady)
1749	&& (input->size > input->pos) ) { / support NULL input /
1750	if (mtctx->inBuff.buffer.start == NULL) {
1751	assert(mtctx->inBuff.filled == `0`); / Can't fill an empty buffer /
1752	if (!ZSTDMT_tryGetInputRange(mtctx)) {
1753	/ It is only possible for this operation to fail if there are*
1754	* still compression jobs ongoing.
1755	*/
1756	assert(mtctx->doneJobID != mtctx->nextJobID);
1757	}
1758	}
1759	if (mtctx->inBuff.buffer.start != NULL) {
1760	size_t const toLoad = MIN(input->size - input->pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
1761	assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize);
1762	DEBUGLOG(`5`, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
1763	(U32)toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
1764	memcpy((char)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const* char*)input->src + input->pos, toLoad);
1765	input->pos += toLoad;
1766	mtctx->inBuff.filled += toLoad;
1767	forwardInputProgress = toLoad>`0`;
1768	}
1769	if ((input->pos < input->size) && (endOp == ZSTD_e_end))
1770	endOp = ZSTD_e_flush; / can't end now : not all input consumed /
1771	}
1772
1773	if ( (mtctx->jobReady)
1774	\|\| (mtctx->inBuff.filled >= mtctx->targetSectionSize) / filled enough : let's compress /
1775	\|\| ((endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > `0`)) / something to flush : let's go /
1776	\|\| ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { / must finish the frame with a zero-size block /
1777	size_t const jobSize = mtctx->inBuff.filled;
1778	assert(mtctx->inBuff.filled <= mtctx->targetSectionSize);
1779	CHECK_F( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
1780	}
1781
1782	/ check for potential compressed data ready to be flushed /
1783	{ size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); / block if there was no forward input progress /
1784	if (input->pos < input->size) return MAX(remainingToFlush, `1`); / input not consumed : do not end flush yet /
1785	return remainingToFlush;
1786	}
1787	}
1788
1789
1790	size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
1791	{
1792	CHECK_F( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
1793
1794	/ recommended next input size : fill current input buffer /
1795	return mtctx->targetSectionSize - mtctx->inBuff.filled; / note : could be zero when input buffer is fully filled and no more availability to create new job /
1796	}
1797
1798
1799	static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_EndDirective endFrame)
1800	{
1801	size_t const srcSize = mtctx->inBuff.filled;
1802	DEBUGLOG(`5`, "ZSTDMT_flushStream_internal");
1803
1804	if ( mtctx->jobReady / one job ready for a worker to pick up /
1805	\|\| (srcSize > `0`) / still some data within input buffer /
1806	\|\| ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { / need a last 0-size block to end frame /
1807	DEBUGLOG(`5`, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
1808	(U32)srcSize, (U32)endFrame);
1809	CHECK_F( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
1810	}
1811
1812	/ check if there is any data available to flush /
1813	return ZSTDMT_flushProduced(mtctx, output, `1` / blockToFlush /, endFrame);
1814	}
1815
1816
1817	size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
1818	{
1819	DEBUGLOG(`5`, "ZSTDMT_flushStream");
1820	if (mtctx->singleBlockingThread)
1821	return ZSTD_flushStream(mtctx->cctxPool->cctx[`0`], output);
1822	return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_flush);
1823	}
1824
1825	size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
1826	{
1827	DEBUGLOG(`4`, "ZSTDMT_endStream");
1828	if (mtctx->singleBlockingThread)
1829	return ZSTD_endStream(mtctx->cctxPool->cctx[`0`], output);
1830	return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_end);
1831	}
1832

Browse the source code of ClickHouse/contrib/zstd/lib/compress/zstdmt_compress.c