Skip to content

Commit 6c9d0e2

Browse files
author
Michail Nikolaev
committed
Fix btree race condition
1 parent baf4871 commit 6c9d0e2

File tree

1 file changed

+33
-34
lines changed

1 file changed

+33
-34
lines changed

src/backend/access/nbtree/nbtxlog.c

+33-34
Original file line numberDiff line numberDiff line change
@@ -649,55 +649,37 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
649649
xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) XLogRecGetData(record);
650650
BlockNumber leftsib;
651651
BlockNumber rightsib;
652-
Buffer buffer;
652+
Buffer leafbuf, lbuff = InvalidBuffer, rbuff, buff;
653653
Page page;
654654
BTPageOpaque pageop;
655655

656656
leftsib = xlrec->leftsib;
657657
rightsib = xlrec->rightsib;
658658

659659
/*
660-
* In normal operation, we would lock all the pages this WAL record
661-
* touches before changing any of them. In WAL replay, it should be okay
662-
* to lock just one page at a time, since no concurrent index updates can
663-
* be happening, and readers should not care whether they arrive at the
664-
* target page or not (since it's surely empty).
660+
* We have to lock the pages we need to modify in the moving right order.
661+
* Else we will go into the race against _bt_walk_left.
665662
*/
666663

667-
/* Fix left-link of right sibling */
668-
if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
669-
{
670-
page = (Page) BufferGetPage(buffer);
671-
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
672-
pageop->btpo_prev = leftsib;
673-
674-
PageSetLSN(page, lsn);
675-
MarkBufferDirty(buffer);
676-
}
677-
if (BufferIsValid(buffer))
678-
UnlockReleaseBuffer(buffer);
679-
680664
/* Fix right-link of left sibling, if any */
681665
if (leftsib != P_NONE)
682666
{
683-
if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
667+
if (XLogReadBufferForRedo(record, 1, &lbuff) == BLK_NEEDS_REDO)
684668
{
685-
page = (Page) BufferGetPage(buffer);
669+
page = (Page) BufferGetPage(lbuff);
686670
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
687671
pageop->btpo_next = rightsib;
688672

689673
PageSetLSN(page, lsn);
690-
MarkBufferDirty(buffer);
674+
MarkBufferDirty(lbuff);
691675
}
692-
if (BufferIsValid(buffer))
693-
UnlockReleaseBuffer(buffer);
694676
}
695677

696678
/* Rewrite target page as empty deleted page */
697-
buffer = XLogInitBufferForRedo(record, 0);
698-
page = (Page) BufferGetPage(buffer);
679+
buff = XLogInitBufferForRedo(record, 0);
680+
page = (Page) BufferGetPage(buff);
699681

700-
_bt_pageinit(page, BufferGetPageSize(buffer));
682+
_bt_pageinit(page, BufferGetPageSize(buff));
701683
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
702684

703685
pageop->btpo_prev = leftsib;
@@ -707,9 +689,26 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
707689
pageop->btpo_cycleid = 0;
708690

709691
PageSetLSN(page, lsn);
710-
MarkBufferDirty(buffer);
711-
UnlockReleaseBuffer(buffer);
692+
MarkBufferDirty(buff);
693+
694+
/* Fix left-link of right sibling */
695+
if (XLogReadBufferForRedo(record, 2, &rbuff) == BLK_NEEDS_REDO)
696+
{
697+
page = (Page) BufferGetPage(rbuff);
698+
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
699+
pageop->btpo_prev = leftsib;
712700

701+
PageSetLSN(page, lsn);
702+
MarkBufferDirty(rbuff);
703+
}
704+
705+
/* Release all buffers */
706+
if (BufferIsValid(lbuff))
707+
UnlockReleaseBuffer(lbuff);
708+
UnlockReleaseBuffer(buff);
709+
if (BufferIsValid(rbuff))
710+
UnlockReleaseBuffer(rbuff);
711+
713712
/*
714713
* If we deleted a parent of the targeted leaf page, instead of the leaf
715714
* itself, update the leaf to point to the next remaining child in the
@@ -723,10 +722,10 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
723722
*/
724723
IndexTupleData trunctuple;
725724

726-
buffer = XLogInitBufferForRedo(record, 3);
727-
page = (Page) BufferGetPage(buffer);
725+
leafbuf = XLogInitBufferForRedo(record, 3);
726+
page = (Page) BufferGetPage(leafbuf);
728727

729-
_bt_pageinit(page, BufferGetPageSize(buffer));
728+
_bt_pageinit(page, BufferGetPageSize(leafbuf));
730729
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
731730

732731
pageop->btpo_flags = BTP_HALF_DEAD | BTP_LEAF;
@@ -745,8 +744,8 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
745744
elog(ERROR, "could not add dummy high key to half-dead page");
746745

747746
PageSetLSN(page, lsn);
748-
MarkBufferDirty(buffer);
749-
UnlockReleaseBuffer(buffer);
747+
MarkBufferDirty(leafbuf);
748+
UnlockReleaseBuffer(leafbuf);
750749
}
751750

752751
/* Update metapage if needed */

0 commit comments

Comments
 (0)