hio.c source code [PostgreSQL/src/backend/access/heap/hio.c]

1	/-------------------------------------------------------------------------*
2	*
3	* hio.c
4	* POSTGRES heap access method input/output code.
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	* Portions Copyright (c) 1994, Regents of the University of California
8	*
9	*
10	* IDENTIFICATION
11	* src/backend/access/heap/hio.c
12	*
13	*-------------------------------------------------------------------------
14	*/
15
16	#include "postgres.h"
17
18	#include "access/heapam.h"
19	#include "access/hio.h"
20	#include "access/htup_details.h"
21	#include "access/visibilitymap.h"
22	#include "storage/bufmgr.h"
23	#include "storage/freespace.h"
24	#include "storage/lmgr.h"
25	#include "storage/smgr.h"
26
27
28	/*
29	* RelationPutHeapTuple - place tuple at specified page
30	*
31	* !!! EREPORT(ERROR) IS DISALLOWED HERE !!! Must PANIC on failure!!!
32	*
33	* Note - caller must hold BUFFER_LOCK_EXCLUSIVE on the buffer.
34	*/
35	void
36	RelationPutHeapTuple(Relation relation,
37	Buffer buffer,
38	HeapTuple tuple,
39	bool token)
40	{
41	Page pageHeader;
42	OffsetNumber offnum;
43
44	/*
45	* A tuple that's being inserted speculatively should already have its
46	* token set.
47	*/
48	Assert(!token \|\| HeapTupleHeaderIsSpeculative(tuple->t_data));
49
50	/ Add the tuple to the page /
51	pageHeader = BufferGetPage(buffer);
52
53	offnum = PageAddItem(pageHeader, (Item) tuple->t_data,
54	tuple->t_len, InvalidOffsetNumber, false, true);
55
56	if (offnum == InvalidOffsetNumber)
57	elog(PANIC, "failed to add tuple to page");
58
59	/ Update tuple->t_self to the actual position where it was stored /
60	ItemPointerSet(&(tuple->t_self), BufferGetBlockNumber(buffer), offnum);
61
62	/*
63	* Insert the correct position into CTID of the stored tuple, too (unless
64	* this is a speculative insertion, in which case the token is held in
65	* CTID field instead)
66	*/
67	if (!token)
68	{
69	ItemId itemId = PageGetItemId(pageHeader, offnum);
70	HeapTupleHeader item = (HeapTupleHeader) PageGetItem(pageHeader, itemId);
71
72	item->t_ctid = tuple->t_self;
73	}
74	}
75
76	/*
77	* Read in a buffer in mode, using bulk-insert strategy if bistate isn't NULL.
78	*/
79	static Buffer
80	ReadBufferBI(Relation relation, BlockNumber targetBlock,
81	ReadBufferMode mode, BulkInsertState bistate)
82	{
83	Buffer buffer;
84
85	/ If not bulk-insert, exactly like ReadBuffer /
86	if (!bistate)
87	return ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock,
88	mode, NULL);
89
90	/ If we have the desired block already pinned, re-pin and return it /
91	if (bistate->current_buf != InvalidBuffer)
92	{
93	if (BufferGetBlockNumber(bistate->current_buf) == targetBlock)
94	{
95	/*
96	* Currently the LOCK variants are only used for extending
97	* relation, which should never reach this branch.
98	*/
99	Assert(mode != RBM_ZERO_AND_LOCK &&
100	mode != RBM_ZERO_AND_CLEANUP_LOCK);
101
102	IncrBufferRefCount(bistate->current_buf);
103	return bistate->current_buf;
104	}
105	/ ... else drop the old buffer /
106	ReleaseBuffer(bistate->current_buf);
107	bistate->current_buf = InvalidBuffer;
108	}
109
110	/ Perform a read using the buffer strategy /
111	buffer = ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock,
112	mode, bistate->strategy);
113
114	/ Save the selected block as target for future inserts /
115	IncrBufferRefCount(buffer);
116	bistate->current_buf = buffer;
117
118	return buffer;
119	}
120
121	/*
122	* For each heap page which is all-visible, acquire a pin on the appropriate
123	* visibility map page, if we haven't already got one.
124	*
125	* buffer2 may be InvalidBuffer, if only one buffer is involved. buffer1
126	* must not be InvalidBuffer. If both buffers are specified, block1 must
127	* be less than block2.
128	*/
129	static void
130	GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2,
131	BlockNumber block1, BlockNumber block2,
132	Buffer vmbuffer1, Buffer vmbuffer2)
133	{
134	bool need_to_pin_buffer1;
135	bool need_to_pin_buffer2;
136
137	Assert(BufferIsValid(buffer1));
138	Assert(buffer2 == InvalidBuffer \|\| block1 <= block2);
139
140	while (`1`)
141	{
142	/ Figure out which pins we need but don't have. /
143	need_to_pin_buffer1 = PageIsAllVisible(BufferGetPage(buffer1))
144	&& !visibilitymap_pin_ok(block1, *vmbuffer1);
145	need_to_pin_buffer2 = buffer2 != InvalidBuffer
146	&& PageIsAllVisible(BufferGetPage(buffer2))
147	&& !visibilitymap_pin_ok(block2, *vmbuffer2);
148	if (!need_to_pin_buffer1 && !need_to_pin_buffer2)
149	return;
150
151	/ We must unlock both buffers before doing any I/O. /
152	LockBuffer(buffer1, BUFFER_LOCK_UNLOCK);
153	if (buffer2 != InvalidBuffer && buffer2 != buffer1)
154	LockBuffer(buffer2, BUFFER_LOCK_UNLOCK);
155
156	/ Get pins. /
157	if (need_to_pin_buffer1)
158	visibilitymap_pin(relation, block1, vmbuffer1);
159	if (need_to_pin_buffer2)
160	visibilitymap_pin(relation, block2, vmbuffer2);
161
162	/ Relock buffers. /
163	LockBuffer(buffer1, BUFFER_LOCK_EXCLUSIVE);
164	if (buffer2 != InvalidBuffer && buffer2 != buffer1)
165	LockBuffer(buffer2, BUFFER_LOCK_EXCLUSIVE);
166
167	/*
168	* If there are two buffers involved and we pinned just one of them,
169	* it's possible that the second one became all-visible while we were
170	* busy pinning the first one. If it looks like that's a possible
171	* scenario, we'll need to make a second pass through this loop.
172	*/
173	if (buffer2 == InvalidBuffer \|\| buffer1 == buffer2
174	\|\| (need_to_pin_buffer1 && need_to_pin_buffer2))
175	break;
176	}
177	}
178
179	/*
180	* Extend a relation by multiple blocks to avoid future contention on the
181	* relation extension lock. Our goal is to pre-extend the relation by an
182	* amount which ramps up as the degree of contention ramps up, but limiting
183	* the result to some sane overall value.
184	*/
185	static void
186	RelationAddExtraBlocks(Relation relation, BulkInsertState bistate)
187	{
188	BlockNumber blockNum,
189	firstBlock = InvalidBlockNumber;
190	int extraBlocks;
191	int lockWaiters;
192
193	/ Use the length of the lock wait queue to judge how much to extend. /
194	lockWaiters = RelationExtensionLockWaiterCount(relation);
195	if (lockWaiters <= `0`)
196	return;
197
198	/*
199	* It might seem like multiplying the number of lock waiters by as much as
200	* 20 is too aggressive, but benchmarking revealed that smaller numbers
201	* were insufficient. 512 is just an arbitrary cap to prevent
202	* pathological results.
203	*/
204	extraBlocks = Min(`512`, lockWaiters * `20`);
205
206	do
207	{
208	Buffer buffer;
209	Page page;
210	Size freespace;
211
212	/*
213	* Extend by one page. This should generally match the main-line
214	* extension code in RelationGetBufferForTuple, except that we hold
215	* the relation extension lock throughout, and we don't immediately
216	* initialize the page (see below).
217	*/
218	buffer = ReadBufferBI(relation, P_NEW, RBM_ZERO_AND_LOCK, bistate);
219	page = BufferGetPage(buffer);
220
221	if (!PageIsNew(page))
222	elog(ERROR, "page %u of relation \"%s\" should be empty but is not",
223	BufferGetBlockNumber(buffer),
224	RelationGetRelationName(relation));
225
226	/*
227	* Add the page to the FSM without initializing. If we were to
228	* initialize here, the page would potentially get flushed out to disk
229	* before we add any useful content. There's no guarantee that that'd
230	* happen before a potential crash, so we need to deal with
231	* uninitialized pages anyway, thus avoid the potential for
232	* unnecessary writes.
233	*/
234
235	/ we'll need this info below /
236	blockNum = BufferGetBlockNumber(buffer);
237	freespace = BufferGetPageSize(buffer) - SizeOfPageHeaderData;
238
239	UnlockReleaseBuffer(buffer);
240
241	/ Remember first block number thus added. /
242	if (firstBlock == InvalidBlockNumber)
243	firstBlock = blockNum;
244
245	/*
246	* Immediately update the bottom level of the FSM. This has a good
247	* chance of making this page visible to other concurrently inserting
248	* backends, and we want that to happen without delay.
249	*/
250	RecordPageWithFreeSpace(relation, blockNum, freespace);
251	}
252	while (--extraBlocks > `0`);
253
254	/*
255	* Updating the upper levels of the free space map is too expensive to do
256	* for every block, but it's worth doing once at the end to make sure that
257	* subsequent insertion activity sees all of those nifty free pages we
258	* just inserted.
259	*/
260	FreeSpaceMapVacuumRange(relation, firstBlock, blockNum + `1`);
261	}
262
263	/*
264	* RelationGetBufferForTuple
265	*
266	* Returns pinned and exclusive-locked buffer of a page in given relation
267	* with free space >= given len.
268	*
269	* If otherBuffer is not InvalidBuffer, then it references a previously
270	* pinned buffer of another page in the same relation; on return, this
271	* buffer will also be exclusive-locked. (This case is used by heap_update;
272	* the otherBuffer contains the tuple being updated.)
273	*
274	* The reason for passing otherBuffer is that if two backends are doing
275	* concurrent heap_update operations, a deadlock could occur if they try
276	* to lock the same two buffers in opposite orders. To ensure that this
277	* can't happen, we impose the rule that buffers of a relation must be
278	* locked in increasing page number order. This is most conveniently done
279	* by having RelationGetBufferForTuple lock them both, with suitable care
280	* for ordering.
281	*
282	* NOTE: it is unlikely, but not quite impossible, for otherBuffer to be the
283	* same buffer we select for insertion of the new tuple (this could only
284	* happen if space is freed in that page after heap_update finds there's not
285	* enough there). In that case, the page will be pinned and locked only once.
286	*
287	* For the vmbuffer and vmbuffer_other arguments, we avoid deadlock by
288	* locking them only after locking the corresponding heap page, and taking
289	* no further lwlocks while they are locked.
290	*
291	* We normally use FSM to help us find free space. However,
292	* if HEAP_INSERT_SKIP_FSM is specified, we just append a new empty page to
293	* the end of the relation if the tuple won't fit on the current target page.
294	* This can save some cycles when we know the relation is new and doesn't
295	* contain useful amounts of free space.
296	*
297	* HEAP_INSERT_SKIP_FSM is also useful for non-WAL-logged additions to a
298	* relation, if the caller holds exclusive lock and is careful to invalidate
299	* relation's smgr_targblock before the first insertion --- that ensures that
300	* all insertions will occur into newly added pages and not be intermixed
301	* with tuples from other transactions. That way, a crash can't risk losing
302	* any committed data of other transactions. (See heap_insert's comments
303	* for additional constraints needed for safe usage of this behavior.)
304	*
305	* The caller can also provide a BulkInsertState object to optimize many
306	* insertions into the same relation. This keeps a pin on the current
307	* insertion target page (to save pin/unpin cycles) and also passes a
308	* BULKWRITE buffer selection strategy object to the buffer manager.
309	* Passing NULL for bistate selects the default behavior.
310	*
311	* We always try to avoid filling existing pages further than the fillfactor.
312	* This is OK since this routine is not consulted when updating a tuple and
313	* keeping it on the same page, which is the scenario fillfactor is meant
314	* to reserve space for.
315	*
316	* ereport(ERROR) is allowed here, so this routine must be called
317	* before any (unlogged) changes are made in buffer pool.
318	*/
319	Buffer
320	RelationGetBufferForTuple(Relation relation, Size len,
321	Buffer otherBuffer, int options,
322	BulkInsertState bistate,
323	Buffer vmbuffer, Buffer vmbuffer_other)
324	{
325	bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
326	Buffer buffer = InvalidBuffer;
327	Page page;
328	Size pageFreeSpace = `0`,
329	saveFreeSpace = `0`;
330	BlockNumber targetBlock,
331	otherBlock;
332	bool needLock;
333
334	len = MAXALIGN(len); / be conservative /
335
336	/ Bulk insert is not supported for updates, only inserts. /
337	Assert(otherBuffer == InvalidBuffer \|\| !bistate);
338
339	/*
340	* If we're gonna fail for oversize tuple, do it right away
341	*/
342	if (len > MaxHeapTupleSize)
343	ereport(ERROR,
344	(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
345	errmsg("row is too big: size %zu, maximum size %zu",
346	len, MaxHeapTupleSize)));
347
348	/ Compute desired extra freespace due to fillfactor option /
349	saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
350	HEAP_DEFAULT_FILLFACTOR);
351
352	if (otherBuffer != InvalidBuffer)
353	otherBlock = BufferGetBlockNumber(otherBuffer);
354	else
355	otherBlock = InvalidBlockNumber; / just to keep compiler quiet /
356
357	/*
358	* We first try to put the tuple on the same page we last inserted a tuple
359	* on, as cached in the BulkInsertState or relcache entry. If that
360	* doesn't work, we ask the Free Space Map to locate a suitable page.
361	* Since the FSM's info might be out of date, we have to be prepared to
362	* loop around and retry multiple times. (To insure this isn't an infinite
363	* loop, we must update the FSM with the correct amount of free space on
364	* each page that proves not to be suitable.) If the FSM has no record of
365	* a page with enough free space, we give up and extend the relation.
366	*
367	* When use_fsm is false, we either put the tuple onto the existing target
368	* page or extend the relation.
369	*/
370	if (len + saveFreeSpace > MaxHeapTupleSize)
371	{
372	/ can't fit, don't bother asking FSM /
373	targetBlock = InvalidBlockNumber;
374	use_fsm = false;
375	}
376	else if (bistate && bistate->current_buf != InvalidBuffer)
377	targetBlock = BufferGetBlockNumber(bistate->current_buf);
378	else
379	targetBlock = RelationGetTargetBlock(relation);
380
381	if (targetBlock == InvalidBlockNumber && use_fsm)
382	{
383	/*
384	* We have no cached target page, so ask the FSM for an initial
385	* target.
386	*/
387	targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
388
389	/*
390	* If the FSM knows nothing of the rel, try the last page before we
391	* give up and extend. This avoids one-tuple-per-page syndrome during
392	* bootstrapping or in a recently-started system.
393	*/
394	if (targetBlock == InvalidBlockNumber)
395	{
396	BlockNumber nblocks = RelationGetNumberOfBlocks(relation);
397
398	if (nblocks > `0`)
399	targetBlock = nblocks - `1`;
400	}
401	}
402
403	loop:
404	while (targetBlock != InvalidBlockNumber)
405	{
406	/*
407	* Read and exclusive-lock the target block, as well as the other
408	* block if one was given, taking suitable care with lock ordering and
409	* the possibility they are the same block.
410	*
411	* If the page-level all-visible flag is set, caller will need to
412	* clear both that and the corresponding visibility map bit. However,
413	* by the time we return, we'll have x-locked the buffer, and we don't
414	* want to do any I/O while in that state. So we check the bit here
415	* before taking the lock, and pin the page if it appears necessary.
416	* Checking without the lock creates a risk of getting the wrong
417	* answer, so we'll have to recheck after acquiring the lock.
418	*/
419	if (otherBuffer == InvalidBuffer)
420	{
421	/ easy case /
422	buffer = ReadBufferBI(relation, targetBlock, RBM_NORMAL, bistate);
423	if (PageIsAllVisible(BufferGetPage(buffer)))
424	visibilitymap_pin(relation, targetBlock, vmbuffer);
425	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
426	}
427	else if (otherBlock == targetBlock)
428	{
429	/ also easy case /
430	buffer = otherBuffer;
431	if (PageIsAllVisible(BufferGetPage(buffer)))
432	visibilitymap_pin(relation, targetBlock, vmbuffer);
433	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
434	}
435	else if (otherBlock < targetBlock)
436	{
437	/ lock other buffer first /
438	buffer = ReadBuffer(relation, targetBlock);
439	if (PageIsAllVisible(BufferGetPage(buffer)))
440	visibilitymap_pin(relation, targetBlock, vmbuffer);
441	LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
442	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
443	}
444	else
445	{
446	/ lock target buffer first /
447	buffer = ReadBuffer(relation, targetBlock);
448	if (PageIsAllVisible(BufferGetPage(buffer)))
449	visibilitymap_pin(relation, targetBlock, vmbuffer);
450	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
451	LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
452	}
453
454	/*
455	* We now have the target page (and the other buffer, if any) pinned
456	* and locked. However, since our initial PageIsAllVisible checks
457	* were performed before acquiring the lock, the results might now be
458	* out of date, either for the selected victim buffer, or for the
459	* other buffer passed by the caller. In that case, we'll need to
460	* give up our locks, go get the pin(s) we failed to get earlier, and
461	* re-lock. That's pretty painful, but hopefully shouldn't happen
462	* often.
463	*
464	* Note that there's a small possibility that we didn't pin the page
465	* above but still have the correct page pinned anyway, either because
466	* we've already made a previous pass through this loop, or because
467	* caller passed us the right page anyway.
468	*
469	* Note also that it's possible that by the time we get the pin and
470	* retake the buffer locks, the visibility map bit will have been
471	* cleared by some other backend anyway. In that case, we'll have
472	* done a bit of extra work for no gain, but there's no real harm
473	* done.
474	*/
475	if (otherBuffer == InvalidBuffer \|\| targetBlock <= otherBlock)
476	GetVisibilityMapPins(relation, buffer, otherBuffer,
477	targetBlock, otherBlock, vmbuffer,
478	vmbuffer_other);
479	else
480	GetVisibilityMapPins(relation, otherBuffer, buffer,
481	otherBlock, targetBlock, vmbuffer_other,
482	vmbuffer);
483
484	/*
485	* Now we can check to see if there's enough free space here. If so,
486	* we're done.
487	*/
488	page = BufferGetPage(buffer);
489
490	/*
491	* If necessary initialize page, it'll be used soon. We could avoid
492	* dirtying the buffer here, and rely on the caller to do so whenever
493	* it puts a tuple onto the page, but there seems not much benefit in
494	* doing so.
495	*/
496	if (PageIsNew(page))
497	{
498	PageInit(page, BufferGetPageSize(buffer), `0`);
499	MarkBufferDirty(buffer);
500	}
501
502	pageFreeSpace = PageGetHeapFreeSpace(page);
503	if (len + saveFreeSpace <= pageFreeSpace)
504	{
505	/ use this page as future insert target, too /
506	RelationSetTargetBlock(relation, targetBlock);
507	return buffer;
508	}
509
510	/*
511	* Not enough space, so we must give up our page locks and pin (if
512	* any) and prepare to look elsewhere. We don't care which order we
513	* unlock the two buffers in, so this can be slightly simpler than the
514	* code above.
515	*/
516	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
517	if (otherBuffer == InvalidBuffer)
518	ReleaseBuffer(buffer);
519	else if (otherBlock != targetBlock)
520	{
521	LockBuffer(otherBuffer, BUFFER_LOCK_UNLOCK);
522	ReleaseBuffer(buffer);
523	}
524
525	/ Without FSM, always fall out of the loop and extend /
526	if (!use_fsm)
527	break;
528
529	/*
530	* Update FSM as to condition of this page, and ask for another page
531	* to try.
532	*/
533	targetBlock = RecordAndGetPageWithFreeSpace(relation,
534	targetBlock,
535	pageFreeSpace,
536	len + saveFreeSpace);
537	}
538
539	/*
540	* Have to extend the relation.
541	*
542	* We have to use a lock to ensure no one else is extending the rel at the
543	* same time, else we will both try to initialize the same new page. We
544	* can skip locking for new or temp relations, however, since no one else
545	* could be accessing them.
546	*/
547	needLock = !RELATION_IS_LOCAL(relation);
548
549	/*
550	* If we need the lock but are not able to acquire it immediately, we'll
551	* consider extending the relation by multiple blocks at a time to manage
552	* contention on the relation extension lock. However, this only makes
553	* sense if we're using the FSM; otherwise, there's no point.
554	*/
555	if (needLock)
556	{
557	if (!use_fsm)
558	LockRelationForExtension(relation, ExclusiveLock);
559	else if (!ConditionalLockRelationForExtension(relation, ExclusiveLock))
560	{
561	/ Couldn't get the lock immediately; wait for it. /
562	LockRelationForExtension(relation, ExclusiveLock);
563
564	/*
565	* Check if some other backend has extended a block for us while
566	* we were waiting on the lock.
567	*/
568	targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
569
570	/*
571	* If some other waiter has already extended the relation, we
572	* don't need to do so; just use the existing freespace.
573	*/
574	if (targetBlock != InvalidBlockNumber)
575	{
576	UnlockRelationForExtension(relation, ExclusiveLock);
577	goto loop;
578	}
579
580	/ Time to bulk-extend. /
581	RelationAddExtraBlocks(relation, bistate);
582	}
583	}
584
585	/*
586	* In addition to whatever extension we performed above, we always add at
587	* least one block to satisfy our own request.
588	*
589	* XXX This does an lseek - rather expensive - but at the moment it is the
590	* only way to accurately determine how many blocks are in a relation. Is
591	* it worth keeping an accurate file length in shared memory someplace,
592	* rather than relying on the kernel to do it for us?
593	*/
594	buffer = ReadBufferBI(relation, P_NEW, RBM_ZERO_AND_LOCK, bistate);
595
596	/*
597	* We need to initialize the empty new page. Double-check that it really
598	* is empty (this should never happen, but if it does we don't want to
599	* risk wiping out valid data).
600	*/
601	page = BufferGetPage(buffer);
602
603	if (!PageIsNew(page))
604	elog(ERROR, "page %u of relation \"%s\" should be empty but is not",
605	BufferGetBlockNumber(buffer),
606	RelationGetRelationName(relation));
607
608	PageInit(page, BufferGetPageSize(buffer), `0`);
609	MarkBufferDirty(buffer);
610
611	/*
612	* Release the file-extension lock; it's now OK for someone else to extend
613	* the relation some more.
614	*/
615	if (needLock)
616	UnlockRelationForExtension(relation, ExclusiveLock);
617
618	/*
619	* Lock the other buffer. It's guaranteed to be of a lower page number
620	* than the new page. To conform with the deadlock prevent rules, we ought
621	* to lock otherBuffer first, but that would give other backends a chance
622	* to put tuples on our page. To reduce the likelihood of that, attempt to
623	* lock the other buffer conditionally, that's very likely to work.
624	* Otherwise we need to lock buffers in the correct order, and retry if
625	* the space has been used in the mean time.
626	*
627	* Alternatively, we could acquire the lock on otherBuffer before
628	* extending the relation, but that'd require holding the lock while
629	* performing IO, which seems worse than an unlikely retry.
630	*/
631	if (otherBuffer != InvalidBuffer)
632	{
633	Assert(otherBuffer != buffer);
634
635	if (unlikely(!ConditionalLockBuffer(otherBuffer)))
636	{
637	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
638	LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
639	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
640
641	/*
642	* Because the buffer was unlocked for a while, it's possible,
643	* although unlikely, that the page was filled. If so, just retry
644	* from start.
645	*/
646	if (len > PageGetHeapFreeSpace(page))
647	{
648	LockBuffer(otherBuffer, BUFFER_LOCK_UNLOCK);
649	UnlockReleaseBuffer(buffer);
650
651	goto loop;
652	}
653	}
654	}
655
656	if (len > PageGetHeapFreeSpace(page))
657	{
658	/ We should not get here given the test at the top /
659	elog(PANIC, "tuple is too big: size %zu", len);
660	}
661
662	/*
663	* Remember the new page as our target for future insertions.
664	*
665	* XXX should we enter the new page into the free space map immediately,
666	* or just keep it for this backend's exclusive use in the short run
667	* (until VACUUM sees it)? Seems to depend on whether you expect the
668	* current backend to make more insertions or not, which is probably a
669	* good bet most of the time. So for now, don't add it to FSM yet.
670	*/
671	RelationSetTargetBlock(relation, BufferGetBlockNumber(buffer));
672
673	return buffer;
674	}
675

Browse the source code of PostgreSQL/src/backend/access/heap/hio.c