ginfast.c source code [PostgreSQL/src/backend/access/gin/ginfast.c]

1	/-------------------------------------------------------------------------*
2	*
3	* ginfast.c
4	* Fast insert routines for the Postgres inverted index access method.
5	* Pending entries are stored in linear list of pages. Later on
6	* (typically during VACUUM), ginInsertCleanup() will be invoked to
7	* transfer pending entries into the regular index structure. This
8	* wins because bulk insertion is much more efficient than retail.
9	*
10	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
11	* Portions Copyright (c) 1994, Regents of the University of California
12	*
13	* IDENTIFICATION
14	* src/backend/access/gin/ginfast.c
15	*
16	*-------------------------------------------------------------------------
17	*/
18
19	#include "postgres.h"
20
21	#include "access/gin_private.h"
22	#include "access/ginxlog.h"
23	#include "access/xloginsert.h"
24	#include "access/xlog.h"
25	#include "commands/vacuum.h"
26	#include "catalog/pg_am.h"
27	#include "miscadmin.h"
28	#include "utils/memutils.h"
29	#include "utils/rel.h"
30	#include "utils/acl.h"
31	#include "postmaster/autovacuum.h"
32	#include "storage/indexfsm.h"
33	#include "storage/lmgr.h"
34	#include "storage/predicate.h"
35	#include "utils/builtins.h"
36
37	/ GUC parameter /
38	int gin_pending_list_limit = `0`;
39
40	#define GIN_PAGE_FREESIZE \
41	( BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(GinPageOpaqueData)) )
42
43	typedef struct KeyArray
44	{
45	Datum keys; /* expansible array /
46	GinNullCategory categories; /* another expansible array /
47	int32 nvalues; / current number of valid entries /
48	int32 maxvalues; / allocated size of arrays /
49	} KeyArray;
50
51
52	/*
53	* Build a pending-list page from the given array of tuples, and write it out.
54	*
55	* Returns amount of free space left on the page.
56	*/
57	static int32
58	writeListPage(Relation index, Buffer buffer,
59	IndexTuple *tuples, int32 ntuples, BlockNumber rightlink)
60	{
61	Page page = BufferGetPage(buffer);
62	int32 i,
63	freesize,
64	size = `0`;
65	OffsetNumber l,
66	off;
67	PGAlignedBlock workspace;
68	char *ptr;
69
70	START_CRIT_SECTION();
71
72	GinInitBuffer(buffer, GIN_LIST);
73
74	off = FirstOffsetNumber;
75	ptr = workspace.data;
76
77	for (i = `0`; i < ntuples; i++)
78	{
79	int this_size = IndexTupleSize(tuples[i]);
80
81	memcpy(ptr, tuples[i], this_size);
82	ptr += this_size;
83	size += this_size;
84
85	l = PageAddItem(page, (Item) tuples[i], this_size, off, false, false);
86
87	if (l == InvalidOffsetNumber)
88	elog(ERROR, "failed to add item to index page in \"%s\"",
89	RelationGetRelationName(index));
90
91	off++;
92	}
93
94	Assert(size <= BLCKSZ); / else we overran workspace /
95
96	GinPageGetOpaque(page)->rightlink = rightlink;
97
98	/*
99	* tail page may contain only whole row(s) or final part of row placed on
100	* previous pages (a "row" here meaning all the index tuples generated for
101	* one heap tuple)
102	*/
103	if (rightlink == InvalidBlockNumber)
104	{
105	GinPageSetFullRow(page);
106	GinPageGetOpaque(page)->maxoff = `1`;
107	}
108	else
109	{
110	GinPageGetOpaque(page)->maxoff = `0`;
111	}
112
113	MarkBufferDirty(buffer);
114
115	if (RelationNeedsWAL(index))
116	{
117	ginxlogInsertListPage data;
118	XLogRecPtr recptr;
119
120	data.rightlink = rightlink;
121	data.ntuples = ntuples;
122
123	XLogBeginInsert();
124	XLogRegisterData((char ) &data, sizeof*(ginxlogInsertListPage));
125
126	XLogRegisterBuffer(`0`, buffer, REGBUF_WILL_INIT);
127	XLogRegisterBufData(`0`, workspace.data, size);
128
129	recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE);
130	PageSetLSN(page, recptr);
131	}
132
133	/ get free space before releasing buffer /
134	freesize = PageGetExactFreeSpace(page);
135
136	UnlockReleaseBuffer(buffer);
137
138	END_CRIT_SECTION();
139
140	return freesize;
141	}
142
143	static void
144	makeSublist(Relation index, IndexTuple *tuples, int32 ntuples,
145	GinMetaPageData *res)
146	{
147	Buffer curBuffer = InvalidBuffer;
148	Buffer prevBuffer = InvalidBuffer;
149	int i,
150	size = `0`,
151	tupsize;
152	int startTuple = `0`;
153
154	Assert(ntuples > `0`);
155
156	/*
157	* Split tuples into pages
158	*/
159	for (i = `0`; i < ntuples; i++)
160	{
161	if (curBuffer == InvalidBuffer)
162	{
163	curBuffer = GinNewBuffer(index);
164
165	if (prevBuffer != InvalidBuffer)
166	{
167	res->nPendingPages++;
168	writeListPage(index, prevBuffer,
169	tuples + startTuple,
170	i - startTuple,
171	BufferGetBlockNumber(curBuffer));
172	}
173	else
174	{
175	res->head = BufferGetBlockNumber(curBuffer);
176	}
177
178	prevBuffer = curBuffer;
179	startTuple = i;
180	size = `0`;
181	}
182
183	tupsize = MAXALIGN(IndexTupleSize(tuples[i])) + sizeof(ItemIdData);
184
185	if (size + tupsize > GinListPageSize)
186	{
187	/ won't fit, force a new page and reprocess /
188	i--;
189	curBuffer = InvalidBuffer;
190	}
191	else
192	{
193	size += tupsize;
194	}
195	}
196
197	/*
198	* Write last page
199	*/
200	res->tail = BufferGetBlockNumber(curBuffer);
201	res->tailFreeSize = writeListPage(index, curBuffer,
202	tuples + startTuple,
203	ntuples - startTuple,
204	InvalidBlockNumber);
205	res->nPendingPages++;
206	/ that was only one heap tuple /
207	res->nPendingHeapTuples = `1`;
208	}
209
210	/*
211	* Write the index tuples contained in *collector into the index's
212	* pending list.
213	*
214	* Function guarantees that all these tuples will be inserted consecutively,
215	* preserving order
216	*/
217	void
218	ginHeapTupleFastInsert(GinState ginstate, GinTupleCollector collector)
219	{
220	Relation index = ginstate->index;
221	Buffer metabuffer;
222	Page metapage;
223	GinMetaPageData *metadata = NULL;
224	Buffer buffer = InvalidBuffer;
225	Page page = NULL;
226	ginxlogUpdateMeta data;
227	bool separateList = false;
228	bool needCleanup = false;
229	int cleanupSize;
230	bool needWal;
231
232	if (collector->ntuples == `0`)
233	return;
234
235	needWal = RelationNeedsWAL(index);
236
237	data.node = index->rd_node;
238	data.ntuples = `0`;
239	data.newRightlink = data.prevTail = InvalidBlockNumber;
240
241	metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
242	metapage = BufferGetPage(metabuffer);
243
244	/*
245	* An insertion to the pending list could logically belong anywhere in the
246	* tree, so it conflicts with all serializable scans. All scans acquire a
247	* predicate lock on the metabuffer to represent that.
248	*/
249	CheckForSerializableConflictIn(index, NULL, metabuffer);
250
251	if (collector->sumsize + collector->ntuples * sizeof(ItemIdData) > GinListPageSize)
252	{
253	/*
254	* Total size is greater than one page => make sublist
255	*/
256	separateList = true;
257	}
258	else
259	{
260	LockBuffer(metabuffer, GIN_EXCLUSIVE);
261	metadata = GinPageGetMeta(metapage);
262
263	if (metadata->head == InvalidBlockNumber \|\|
264	collector->sumsize + collector->ntuples * sizeof(ItemIdData) > metadata->tailFreeSize)
265	{
266	/*
267	* Pending list is empty or total size is greater than freespace
268	* on tail page => make sublist
269	*
270	* We unlock metabuffer to keep high concurrency
271	*/
272	separateList = true;
273	LockBuffer(metabuffer, GIN_UNLOCK);
274	}
275	}
276
277	if (separateList)
278	{
279	/*
280	* We should make sublist separately and append it to the tail
281	*/
282	GinMetaPageData sublist;
283
284	memset(&sublist, `0`, sizeof(GinMetaPageData));
285	makeSublist(index, collector->tuples, collector->ntuples, &sublist);
286
287	if (needWal)
288	XLogBeginInsert();
289
290	/*
291	* metapage was unlocked, see above
292	*/
293	LockBuffer(metabuffer, GIN_EXCLUSIVE);
294	metadata = GinPageGetMeta(metapage);
295
296	if (metadata->head == InvalidBlockNumber)
297	{
298	/*
299	* Main list is empty, so just insert sublist as main list
300	*/
301	START_CRIT_SECTION();
302
303	metadata->head = sublist.head;
304	metadata->tail = sublist.tail;
305	metadata->tailFreeSize = sublist.tailFreeSize;
306
307	metadata->nPendingPages = sublist.nPendingPages;
308	metadata->nPendingHeapTuples = sublist.nPendingHeapTuples;
309	}
310	else
311	{
312	/*
313	* Merge lists
314	*/
315	data.prevTail = metadata->tail;
316	data.newRightlink = sublist.head;
317
318	buffer = ReadBuffer(index, metadata->tail);
319	LockBuffer(buffer, GIN_EXCLUSIVE);
320	page = BufferGetPage(buffer);
321
322	Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber);
323
324	START_CRIT_SECTION();
325
326	GinPageGetOpaque(page)->rightlink = sublist.head;
327
328	MarkBufferDirty(buffer);
329
330	metadata->tail = sublist.tail;
331	metadata->tailFreeSize = sublist.tailFreeSize;
332
333	metadata->nPendingPages += sublist.nPendingPages;
334	metadata->nPendingHeapTuples += sublist.nPendingHeapTuples;
335
336	if (needWal)
337	XLogRegisterBuffer(`1`, buffer, REGBUF_STANDARD);
338	}
339	}
340	else
341	{
342	/*
343	* Insert into tail page. Metapage is already locked
344	*/
345	OffsetNumber l,
346	off;
347	int i,
348	tupsize;
349	char *ptr;
350	char *collectordata;
351
352	buffer = ReadBuffer(index, metadata->tail);
353	LockBuffer(buffer, GIN_EXCLUSIVE);
354	page = BufferGetPage(buffer);
355
356	off = (PageIsEmpty(page)) ? FirstOffsetNumber :
357	OffsetNumberNext(PageGetMaxOffsetNumber(page));
358
359	collectordata = ptr = (char *) palloc(collector->sumsize);
360
361	data.ntuples = collector->ntuples;
362
363	if (needWal)
364	XLogBeginInsert();
365
366	START_CRIT_SECTION();
367
368	/*
369	* Increase counter of heap tuples
370	*/
371	Assert(GinPageGetOpaque(page)->maxoff <= metadata->nPendingHeapTuples);
372	GinPageGetOpaque(page)->maxoff++;
373	metadata->nPendingHeapTuples++;
374
375	for (i = `0`; i < collector->ntuples; i++)
376	{
377	tupsize = IndexTupleSize(collector->tuples[i]);
378	l = PageAddItem(page, (Item) collector->tuples[i], tupsize, off, false, false);
379
380	if (l == InvalidOffsetNumber)
381	elog(ERROR, "failed to add item to index page in \"%s\"",
382	RelationGetRelationName(index));
383
384	memcpy(ptr, collector->tuples[i], tupsize);
385	ptr += tupsize;
386
387	off++;
388	}
389
390	Assert((ptr - collectordata) <= collector->sumsize);
391	if (needWal)
392	{
393	XLogRegisterBuffer(`1`, buffer, REGBUF_STANDARD);
394	XLogRegisterBufData(`1`, collectordata, collector->sumsize);
395	}
396
397	metadata->tailFreeSize = PageGetExactFreeSpace(page);
398
399	MarkBufferDirty(buffer);
400	}
401
402	/*
403	* Set pd_lower just past the end of the metadata. This is essential,
404	* because without doing so, metadata will be lost if xlog.c compresses
405	* the page. (We must do this here because pre-v11 versions of PG did not
406	* set the metapage's pd_lower correctly, so a pg_upgraded index might
407	* contain the wrong value.)
408	*/
409	((PageHeader) metapage)->pd_lower =
410	((char ) metadata + sizeof(GinMetaPageData)) - (char* *) metapage;
411
412	/*
413	* Write metabuffer, make xlog entry
414	*/
415	MarkBufferDirty(metabuffer);
416
417	if (needWal)
418	{
419	XLogRecPtr recptr;
420
421	memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
422
423	XLogRegisterBuffer(`0`, metabuffer, REGBUF_WILL_INIT \| REGBUF_STANDARD);
424	XLogRegisterData((char ) &data, sizeof*(ginxlogUpdateMeta));
425
426	recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
427	PageSetLSN(metapage, recptr);
428
429	if (buffer != InvalidBuffer)
430	{
431	PageSetLSN(page, recptr);
432	}
433	}
434
435	if (buffer != InvalidBuffer)
436	UnlockReleaseBuffer(buffer);
437
438	/*
439	* Force pending list cleanup when it becomes too long. And,
440	* ginInsertCleanup could take significant amount of time, so we prefer to
441	* call it when it can do all the work in a single collection cycle. In
442	* non-vacuum mode, it shouldn't require maintenance_work_mem, so fire it
443	* while pending list is still small enough to fit into
444	* gin_pending_list_limit.
445	*
446	* ginInsertCleanup() should not be called inside our CRIT_SECTION.
447	*/
448	cleanupSize = GinGetPendingListCleanupSize(index);
449	if (metadata->nPendingPages * GIN_PAGE_FREESIZE > cleanupSize * `1024L`)
450	needCleanup = true;
451
452	UnlockReleaseBuffer(metabuffer);
453
454	END_CRIT_SECTION();
455
456	/*
457	* Since it could contend with concurrent cleanup process we cleanup
458	* pending list not forcibly.
459	*/
460	if (needCleanup)
461	ginInsertCleanup(ginstate, false, true, false, NULL);
462	}
463
464	/*
465	* Create temporary index tuples for a single indexable item (one index column
466	* for the heap tuple specified by ht_ctid), and append them to the array
467	* in *collector. They will subsequently be written out using
468	* ginHeapTupleFastInsert. Note that to guarantee consistent state, all
469	* temp tuples for a given heap tuple must be written in one call to
470	* ginHeapTupleFastInsert.
471	*/
472	void
473	ginHeapTupleFastCollect(GinState *ginstate,
474	GinTupleCollector *collector,
475	OffsetNumber attnum, Datum value, bool isNull,
476	ItemPointer ht_ctid)
477	{
478	Datum *entries;
479	GinNullCategory *categories;
480	int32 i,
481	nentries;
482
483	/*
484	* Extract the key values that need to be inserted in the index
485	*/
486	entries = ginExtractEntries(ginstate, attnum, value, isNull,
487	&nentries, &categories);
488
489	/*
490	* Protect against integer overflow in allocation calculations
491	*/
492	if (nentries < `0` \|\|
493	collector->ntuples + nentries > MaxAllocSize / sizeof(IndexTuple))
494	elog(ERROR, "too many entries for GIN index");
495
496	/*
497	* Allocate/reallocate memory for storing collected tuples
498	*/
499	if (collector->tuples == NULL)
500	{
501	/*
502	* Determine the number of elements to allocate in the tuples array
503	* initially. Make it a power of 2 to avoid wasting memory when
504	* resizing (since palloc likes powers of 2).
505	*/
506	collector->lentuples = `16`;
507	while (collector->lentuples < nentries)
508	collector->lentuples *= `2`;
509
510	collector->tuples = (IndexTuple ) palloc(sizeof(IndexTuple) collector->lentuples);
511	}
512	else if (collector->lentuples < collector->ntuples + nentries)
513	{
514	/*
515	* Advance lentuples to the next suitable power of 2. This won't
516	* overflow, though we could get to a value that exceeds
517	* MaxAllocSize/sizeof(IndexTuple), causing an error in repalloc.
518	*/
519	do
520	{
521	collector->lentuples *= `2`;
522	} while (collector->lentuples < collector->ntuples + nentries);
523
524	collector->tuples = (IndexTuple *) repalloc(collector->tuples,
525	sizeof(IndexTuple) * collector->lentuples);
526	}
527
528	/*
529	* Build an index tuple for each key value, and add to array. In pending
530	* tuples we just stick the heap TID into t_tid.
531	*/
532	for (i = `0`; i < nentries; i++)
533	{
534	IndexTuple itup;
535
536	itup = GinFormTuple(ginstate, attnum, entries[i], categories[i],
537	NULL, `0`, `0`, true);
538	itup->t_tid = *ht_ctid;
539	collector->tuples[collector->ntuples++] = itup;
540	collector->sumsize += IndexTupleSize(itup);
541	}
542	}
543
544	/*
545	* Deletes pending list pages up to (not including) newHead page.
546	* If newHead == InvalidBlockNumber then function drops the whole list.
547	*
548	* metapage is pinned and exclusive-locked throughout this function.
549	*/
550	static void
551	shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
552	bool fill_fsm, IndexBulkDeleteResult *stats)
553	{
554	Page metapage;
555	GinMetaPageData *metadata;
556	BlockNumber blknoToDelete;
557
558	metapage = BufferGetPage(metabuffer);
559	metadata = GinPageGetMeta(metapage);
560	blknoToDelete = metadata->head;
561
562	do
563	{
564	Page page;
565	int i;
566	int64 nDeletedHeapTuples = `0`;
567	ginxlogDeleteListPages data;
568	Buffer buffers[GIN_NDELETE_AT_ONCE];
569	BlockNumber freespace[GIN_NDELETE_AT_ONCE];
570
571	data.ndeleted = `0`;
572	while (data.ndeleted < GIN_NDELETE_AT_ONCE && blknoToDelete != newHead)
573	{
574	freespace[data.ndeleted] = blknoToDelete;
575	buffers[data.ndeleted] = ReadBuffer(index, blknoToDelete);
576	LockBuffer(buffers[data.ndeleted], GIN_EXCLUSIVE);
577	page = BufferGetPage(buffers[data.ndeleted]);
578
579	data.ndeleted++;
580
581	Assert(!GinPageIsDeleted(page));
582
583	nDeletedHeapTuples += GinPageGetOpaque(page)->maxoff;
584	blknoToDelete = GinPageGetOpaque(page)->rightlink;
585	}
586
587	if (stats)
588	stats->pages_deleted += data.ndeleted;
589
590	/*
591	* This operation touches an unusually large number of pages, so
592	* prepare the XLogInsert machinery for that before entering the
593	* critical section.
594	*/
595	if (RelationNeedsWAL(index))
596	XLogEnsureRecordSpace(data.ndeleted, `0`);
597
598	START_CRIT_SECTION();
599
600	metadata->head = blknoToDelete;
601
602	Assert(metadata->nPendingPages >= data.ndeleted);
603	metadata->nPendingPages -= data.ndeleted;
604	Assert(metadata->nPendingHeapTuples >= nDeletedHeapTuples);
605	metadata->nPendingHeapTuples -= nDeletedHeapTuples;
606
607	if (blknoToDelete == InvalidBlockNumber)
608	{
609	metadata->tail = InvalidBlockNumber;
610	metadata->tailFreeSize = `0`;
611	metadata->nPendingPages = `0`;
612	metadata->nPendingHeapTuples = `0`;
613	}
614
615	/*
616	* Set pd_lower just past the end of the metadata. This is essential,
617	* because without doing so, metadata will be lost if xlog.c
618	* compresses the page. (We must do this here because pre-v11
619	* versions of PG did not set the metapage's pd_lower correctly, so a
620	* pg_upgraded index might contain the wrong value.)
621	*/
622	((PageHeader) metapage)->pd_lower =
623	((char ) metadata + sizeof(GinMetaPageData)) - (char* *) metapage;
624
625	MarkBufferDirty(metabuffer);
626
627	for (i = `0`; i < data.ndeleted; i++)
628	{
629	page = BufferGetPage(buffers[i]);
630	GinPageGetOpaque(page)->flags = GIN_DELETED;
631	MarkBufferDirty(buffers[i]);
632	}
633
634	if (RelationNeedsWAL(index))
635	{
636	XLogRecPtr recptr;
637
638	XLogBeginInsert();
639	XLogRegisterBuffer(`0`, metabuffer,
640	REGBUF_WILL_INIT \| REGBUF_STANDARD);
641	for (i = `0`; i < data.ndeleted; i++)
642	XLogRegisterBuffer(i + `1`, buffers[i], REGBUF_WILL_INIT);
643
644	memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
645
646	XLogRegisterData((char *) &data,
647	sizeof(ginxlogDeleteListPages));
648
649	recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE);
650	PageSetLSN(metapage, recptr);
651
652	for (i = `0`; i < data.ndeleted; i++)
653	{
654	page = BufferGetPage(buffers[i]);
655	PageSetLSN(page, recptr);
656	}
657	}
658
659	for (i = `0`; i < data.ndeleted; i++)
660	UnlockReleaseBuffer(buffers[i]);
661
662	END_CRIT_SECTION();
663
664	for (i = `0`; fill_fsm && i < data.ndeleted; i++)
665	RecordFreeIndexPage(index, freespace[i]);
666
667	} while (blknoToDelete != newHead);
668	}
669
670	/ Initialize empty KeyArray /
671	static void
672	initKeyArray(KeyArray *keys, int32 maxvalues)
673	{
674	keys->keys = (Datum ) palloc(sizeof(Datum) maxvalues);
675	keys->categories = (GinNullCategory *)
676	palloc(sizeof(GinNullCategory) * maxvalues);
677	keys->nvalues = `0`;
678	keys->maxvalues = maxvalues;
679	}
680
681	/ Add datum to KeyArray, resizing if needed /
682	static void
683	addDatum(KeyArray *keys, Datum datum, GinNullCategory category)
684	{
685	if (keys->nvalues >= keys->maxvalues)
686	{
687	keys->maxvalues *= `2`;
688	keys->keys = (Datum *)
689	repalloc(keys->keys, sizeof(Datum) * keys->maxvalues);
690	keys->categories = (GinNullCategory *)
691	repalloc(keys->categories, sizeof(GinNullCategory) * keys->maxvalues);
692	}
693
694	keys->keys[keys->nvalues] = datum;
695	keys->categories[keys->nvalues] = category;
696	keys->nvalues++;
697	}
698
699	/*
700	* Collect data from a pending-list page in preparation for insertion into
701	* the main index.
702	*
703	* Go through all tuples >= startoff on page and collect values in accum
704	*
705	* Note that ka is just workspace --- it does not carry any state across
706	* calls.
707	*/
708	static void
709	processPendingPage(BuildAccumulator accum, KeyArray ka,
710	Page page, OffsetNumber startoff)
711	{
712	ItemPointerData heapptr;
713	OffsetNumber i,
714	maxoff;
715	OffsetNumber attrnum;
716
717	/ reset ka to empty /*
718	ka->nvalues = `0`;
719
720	maxoff = PageGetMaxOffsetNumber(page);
721	Assert(maxoff >= FirstOffsetNumber);
722	ItemPointerSetInvalid(&heapptr);
723	attrnum = `0`;
724
725	for (i = startoff; i <= maxoff; i = OffsetNumberNext(i))
726	{
727	IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
728	OffsetNumber curattnum;
729	Datum curkey;
730	GinNullCategory curcategory;
731
732	/ Check for change of heap TID or attnum /
733	curattnum = gintuple_get_attrnum(accum->ginstate, itup);
734
735	if (!ItemPointerIsValid(&heapptr))
736	{
737	heapptr = itup->t_tid;
738	attrnum = curattnum;
739	}
740	else if (!(ItemPointerEquals(&heapptr, &itup->t_tid) &&
741	curattnum == attrnum))
742	{
743	/*
744	* ginInsertBAEntries can insert several datums per call, but only
745	* for one heap tuple and one column. So call it at a boundary,
746	* and reset ka.
747	*/
748	ginInsertBAEntries(accum, &heapptr, attrnum,
749	ka->keys, ka->categories, ka->nvalues);
750	ka->nvalues = `0`;
751	heapptr = itup->t_tid;
752	attrnum = curattnum;
753	}
754
755	/ Add key to KeyArray /
756	curkey = gintuple_get_key(accum->ginstate, itup, &curcategory);
757	addDatum(ka, curkey, curcategory);
758	}
759
760	/ Dump out all remaining keys /
761	ginInsertBAEntries(accum, &heapptr, attrnum,
762	ka->keys, ka->categories, ka->nvalues);
763	}
764
765	/*
766	* Move tuples from pending pages into regular GIN structure.
767	*
768	* On first glance it looks completely not crash-safe. But if we crash
769	* after posting entries to the main index and before removing them from the
770	* pending list, it's okay because when we redo the posting later on, nothing
771	* bad will happen.
772	*
773	* fill_fsm indicates that ginInsertCleanup should add deleted pages
774	* to FSM otherwise caller is responsible to put deleted pages into
775	* FSM.
776	*
777	* If stats isn't null, we count deleted pending pages into the counts.
778	*/
779	void
780	ginInsertCleanup(GinState *ginstate, bool full_clean,
781	bool fill_fsm, bool forceCleanup,
782	IndexBulkDeleteResult *stats)
783	{
784	Relation index = ginstate->index;
785	Buffer metabuffer,
786	buffer;
787	Page metapage,
788	page;
789	GinMetaPageData *metadata;
790	MemoryContext opCtx,
791	oldCtx;
792	BuildAccumulator accum;
793	KeyArray datums;
794	BlockNumber blkno,
795	blknoFinish;
796	bool cleanupFinish = false;
797	bool fsm_vac = false;
798	Size workMemory;
799
800	/*
801	* We would like to prevent concurrent cleanup process. For that we will
802	* lock metapage in exclusive mode using LockPage() call. Nobody other
803	* will use that lock for metapage, so we keep possibility of concurrent
804	* insertion into pending list
805	*/
806
807	if (forceCleanup)
808	{
809	/*
810	* We are called from [auto]vacuum/analyze or gin_clean_pending_list()
811	* and we would like to wait concurrent cleanup to finish.
812	*/
813	LockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock);
814	workMemory =
815	(IsAutoVacuumWorkerProcess() && autovacuum_work_mem != -`1`) ?
816	autovacuum_work_mem : maintenance_work_mem;
817	}
818	else
819	{
820	/*
821	* We are called from regular insert and if we see concurrent cleanup
822	* just exit in hope that concurrent process will clean up pending
823	* list.
824	*/
825	if (!ConditionalLockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock))
826	return;
827	workMemory = work_mem;
828	}
829
830	metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
831	LockBuffer(metabuffer, GIN_SHARE);
832	metapage = BufferGetPage(metabuffer);
833	metadata = GinPageGetMeta(metapage);
834
835	if (metadata->head == InvalidBlockNumber)
836	{
837	/ Nothing to do /
838	UnlockReleaseBuffer(metabuffer);
839	UnlockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock);
840	return;
841	}
842
843	/*
844	* Remember a tail page to prevent infinite cleanup if other backends add
845	* new tuples faster than we can cleanup.
846	*/
847	blknoFinish = metadata->tail;
848
849	/*
850	* Read and lock head of pending list
851	*/
852	blkno = metadata->head;
853	buffer = ReadBuffer(index, blkno);
854	LockBuffer(buffer, GIN_SHARE);
855	page = BufferGetPage(buffer);
856
857	LockBuffer(metabuffer, GIN_UNLOCK);
858
859	/*
860	* Initialize. All temporary space will be in opCtx
861	*/
862	opCtx = AllocSetContextCreate(CurrentMemoryContext,
863	"GIN insert cleanup temporary context",
864	ALLOCSET_DEFAULT_SIZES);
865
866	oldCtx = MemoryContextSwitchTo(opCtx);
867
868	initKeyArray(&datums, `128`);
869	ginInitBA(&accum);
870	accum.ginstate = ginstate;
871
872	/*
873	* At the top of this loop, we have pin and lock on the current page of
874	* the pending list. However, we'll release that before exiting the loop.
875	* Note we also have pin but not lock on the metapage.
876	*/
877	for (;;)
878	{
879	Assert(!GinPageIsDeleted(page));
880
881	/*
882	* Are we walk through the page which as we remember was a tail when
883	* we start our cleanup? But if caller asks us to clean up whole
884	* pending list then ignore old tail, we will work until list becomes
885	* empty.
886	*/
887	if (blkno == blknoFinish && full_clean == false)
888	cleanupFinish = true;
889
890	/*
891	* read page's datums into accum
892	*/
893	processPendingPage(&accum, &datums, page, FirstOffsetNumber);
894
895	vacuum_delay_point();
896
897	/*
898	* Is it time to flush memory to disk? Flush if we are at the end of
899	* the pending list, or if we have a full row and memory is getting
900	* full.
901	*/
902	if (GinPageGetOpaque(page)->rightlink == InvalidBlockNumber \|\|
903	(GinPageHasFullRow(page) &&
904	(accum.allocatedMemory >= workMemory * `1024L`)))
905	{
906	ItemPointerData *list;
907	uint32 nlist;
908	Datum key;
909	GinNullCategory category;
910	OffsetNumber maxoff,
911	attnum;
912
913	/*
914	* Unlock current page to increase performance. Changes of page
915	* will be checked later by comparing maxoff after completion of
916	* memory flush.
917	*/
918	maxoff = PageGetMaxOffsetNumber(page);
919	LockBuffer(buffer, GIN_UNLOCK);
920
921	/*
922	* Moving collected data into regular structure can take
923	* significant amount of time - so, run it without locking pending
924	* list.
925	*/
926	ginBeginBAScan(&accum);
927	while ((list = ginGetBAEntry(&accum,
928	&attnum, &key, &category, &nlist)) != NULL)
929	{
930	ginEntryInsert(ginstate, attnum, key, category,
931	list, nlist, NULL);
932	vacuum_delay_point();
933	}
934
935	/*
936	* Lock the whole list to remove pages
937	*/
938	LockBuffer(metabuffer, GIN_EXCLUSIVE);
939	LockBuffer(buffer, GIN_SHARE);
940
941	Assert(!GinPageIsDeleted(page));
942
943	/*
944	* While we left the page unlocked, more stuff might have gotten
945	* added to it. If so, process those entries immediately. There
946	* shouldn't be very many, so we don't worry about the fact that
947	* we're doing this with exclusive lock. Insertion algorithm
948	* guarantees that inserted row(s) will not continue on next page.
949	* NOTE: intentionally no vacuum_delay_point in this loop.
950	*/
951	if (PageGetMaxOffsetNumber(page) != maxoff)
952	{
953	ginInitBA(&accum);
954	processPendingPage(&accum, &datums, page, maxoff + `1`);
955
956	ginBeginBAScan(&accum);
957	while ((list = ginGetBAEntry(&accum,
958	&attnum, &key, &category, &nlist)) != NULL)
959	ginEntryInsert(ginstate, attnum, key, category,
960	list, nlist, NULL);
961	}
962
963	/*
964	* Remember next page - it will become the new list head
965	*/
966	blkno = GinPageGetOpaque(page)->rightlink;
967	UnlockReleaseBuffer(buffer); / shiftList will do exclusive*
968	* locking */
969
970	/*
971	* remove read pages from pending list, at this point all content
972	* of read pages is in regular structure
973	*/
974	shiftList(index, metabuffer, blkno, fill_fsm, stats);
975
976	/ At this point, some pending pages have been freed up /
977	fsm_vac = true;
978
979	Assert(blkno == metadata->head);
980	LockBuffer(metabuffer, GIN_UNLOCK);
981
982	/*
983	* if we removed the whole pending list or we cleanup tail (which
984	* we remembered on start our cleanup process) then just exit
985	*/
986	if (blkno == InvalidBlockNumber \|\| cleanupFinish)
987	break;
988
989	/*
990	* release memory used so far and reinit state
991	*/
992	MemoryContextReset(opCtx);
993	initKeyArray(&datums, datums.maxvalues);
994	ginInitBA(&accum);
995	}
996	else
997	{
998	blkno = GinPageGetOpaque(page)->rightlink;
999	UnlockReleaseBuffer(buffer);
1000	}
1001
1002	/*
1003	* Read next page in pending list
1004	*/
1005	vacuum_delay_point();
1006	buffer = ReadBuffer(index, blkno);
1007	LockBuffer(buffer, GIN_SHARE);
1008	page = BufferGetPage(buffer);
1009	}
1010
1011	UnlockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock);
1012	ReleaseBuffer(metabuffer);
1013
1014	/*
1015	* As pending list pages can have a high churn rate, it is desirable to
1016	* recycle them immediately to the FreeSpace Map when ordinary backends
1017	* clean the list.
1018	*/
1019	if (fsm_vac && fill_fsm)
1020	IndexFreeSpaceMapVacuum(index);
1021
1022	/ Clean up temporary space /
1023	MemoryContextSwitchTo(oldCtx);
1024	MemoryContextDelete(opCtx);
1025	}
1026
1027	/*
1028	* SQL-callable function to clean the insert pending list
1029	*/
1030	Datum
1031	gin_clean_pending_list(PG_FUNCTION_ARGS)
1032	{
1033	Oid indexoid = PG_GETARG_OID(`0`);
1034	Relation indexRel = index_open(indexoid, RowExclusiveLock);
1035	IndexBulkDeleteResult stats;
1036	GinState ginstate;
1037
1038	if (RecoveryInProgress())
1039	ereport(ERROR,
1040	(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1041	errmsg("recovery is in progress"),
1042	errhint("GIN pending list cannot be cleaned up during recovery.")));
1043
1044	/ Must be a GIN index /
1045	if (indexRel->rd_rel->relkind != RELKIND_INDEX \|\|
1046	indexRel->rd_rel->relam != GIN_AM_OID)
1047	ereport(ERROR,
1048	(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1049	errmsg("\"%s\" is not a GIN index",
1050	RelationGetRelationName(indexRel))));
1051
1052	/*
1053	* Reject attempts to read non-local temporary relations; we would be
1054	* likely to get wrong data since we have no visibility into the owning
1055	* session's local buffers.
1056	*/
1057	if (RELATION_IS_OTHER_TEMP(indexRel))
1058	ereport(ERROR,
1059	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1060	errmsg("cannot access temporary indexes of other sessions")));
1061
1062	/ User must own the index (comparable to privileges needed for VACUUM) /
1063	if (!pg_class_ownercheck(indexoid, GetUserId()))
1064	aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_INDEX,
1065	RelationGetRelationName(indexRel));
1066
1067	memset(&stats, `0`, sizeof(stats));
1068	initGinState(&ginstate, indexRel);
1069	ginInsertCleanup(&ginstate, true, true, true, &stats);
1070
1071	index_close(indexRel, RowExclusiveLock);
1072
1073	PG_RETURN_INT64((int64) stats.pages_deleted);
1074	}
1075

Browse the source code of PostgreSQL/src/backend/access/gin/ginfast.c