Content-Length: 514886 | pFad | http://github.com/tonybelloni/postgres/commit/fa0f466d5329e10b16f3b38c8eaf5306f7e234e8

77 Log the creation of an init fork unconditionally. · tonybelloni/postgres@fa0f466 · GitHub
Skip to content

Commit fa0f466

Browse files
committed
Log the creation of an init fork unconditionally.
Previously, it was thought that this only needed to be done for the benefit of possible standbys, so wal_level = minimal skipped it. But that's not safe, because during crash recovery we might replay XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record which recursively removes the directory that contains the new init fork. So log it always. The user-visible effect of this bug is that if you create a database or tablespace, then create an unlogged table, then crash without checkpointing, then restart, accessing the table will fail, because the it won't have been properly reset. This commit fixes that. Michael Paquier, per a report from Konstantin Knizhnik. Wording of the comments per a suggestion from me.
1 parent 0b78106 commit fa0f466

File tree

4 files changed

+38
-24
lines changed

4 files changed

+38
-24
lines changed

contrib/bloom/blinsert.c

+9-4
Original file line numberDiff line numberDiff line change
@@ -164,13 +164,18 @@ blbuildempty(Relation index)
164164
metapage = (Page) palloc(BLCKSZ);
165165
BloomFillMetapage(index, metapage);
166166

167-
/* Write the page. If archiving/streaming, XLOG it. */
167+
/*
168+
* Write the page and log it. It might seem that an immediate sync
169+
* would be sufficient to guarantee that the file exists on disk, but
170+
* recovery itself might remove it while replaying, for example, an
171+
* XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we
172+
* need this even when wal_level=minimal.
173+
*/
168174
PageSetChecksumInplace(metapage, BLOOM_METAPAGE_BLKNO);
169175
smgrwrite(index->rd_smgr, INIT_FORKNUM, BLOOM_METAPAGE_BLKNO,
170176
(char *) metapage, true);
171-
if (XLogIsNeeded())
172-
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
173-
BLOOM_METAPAGE_BLKNO, metapage, false);
177+
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
178+
BLOOM_METAPAGE_BLKNO, metapage, false);
174179

175180
/*
176181
* An immediate sync is required even if we xlog'd the page, because the

src/backend/access/nbtree/nbtree.c

+9-4
Original file line numberDiff line numberDiff line change
@@ -242,13 +242,18 @@ btbuildempty(Relation index)
242242
metapage = (Page) palloc(BLCKSZ);
243243
_bt_initmetapage(metapage, P_NONE, 0);
244244

245-
/* Write the page. If archiving/streaming, XLOG it. */
245+
/*
246+
* Write the page and log it. It might seem that an immediate sync
247+
* would be sufficient to guarantee that the file exists on disk, but
248+
* recovery itself might remove it while replaying, for example, an
249+
* XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we
250+
* need this even when wal_level=minimal.
251+
*/
246252
PageSetChecksumInplace(metapage, BTREE_METAPAGE);
247253
smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE,
248254
(char *) metapage, true);
249-
if (XLogIsNeeded())
250-
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
251-
BTREE_METAPAGE, metapage, false);
255+
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
256+
BTREE_METAPAGE, metapage, false);
252257

253258
/*
254259
* An immediate sync is required even if we xlog'd the page, because the

src/backend/access/spgist/spginsert.c

+13-10
Original file line numberDiff line numberDiff line change
@@ -161,33 +161,36 @@ spgbuildempty(Relation index)
161161
page = (Page) palloc(BLCKSZ);
162162
SpGistInitMetapage(page);
163163

164-
/* Write the page. If archiving/streaming, XLOG it. */
164+
/*
165+
* Write the page and log it unconditionally. This is important
166+
* particularly for indexes created on tablespaces and databases
167+
* whose creation happened after the last redo pointer as recovery
168+
* removes any of their existing content when the corresponding
169+
* create records are replayed.
170+
*/
165171
PageSetChecksumInplace(page, SPGIST_METAPAGE_BLKNO);
166172
smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_METAPAGE_BLKNO,
167173
(char *) page, true);
168-
if (XLogIsNeeded())
169-
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
170-
SPGIST_METAPAGE_BLKNO, page, false);
174+
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
175+
SPGIST_METAPAGE_BLKNO, page, false);
171176

172177
/* Likewise for the root page. */
173178
SpGistInitPage(page, SPGIST_LEAF);
174179

175180
PageSetChecksumInplace(page, SPGIST_ROOT_BLKNO);
176181
smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_ROOT_BLKNO,
177182
(char *) page, true);
178-
if (XLogIsNeeded())
179-
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
180-
SPGIST_ROOT_BLKNO, page, true);
183+
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
184+
SPGIST_ROOT_BLKNO, page, true);
181185

182186
/* Likewise for the null-tuples root page. */
183187
SpGistInitPage(page, SPGIST_LEAF | SPGIST_NULLS);
184188

185189
PageSetChecksumInplace(page, SPGIST_NULL_BLKNO);
186190
smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_NULL_BLKNO,
187191
(char *) page, true);
188-
if (XLogIsNeeded())
189-
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
190-
SPGIST_NULL_BLKNO, page, true);
192+
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
193+
SPGIST_NULL_BLKNO, page, true);
191194

192195
/*
193196
* An immediate sync is required even if we xlog'd the pages, because the

src/backend/catalog/heap.c

+7-6
Original file line numberDiff line numberDiff line change
@@ -1380,18 +1380,19 @@ heap_create_with_catalog(const char *relname,
13801380

13811381
/*
13821382
* Set up an init fork for an unlogged table so that it can be correctly
1383-
* reinitialized on restart. Since we're going to do an immediate sync, we
1384-
* only need to xlog this if archiving or streaming is enabled. And the
1385-
* immediate sync is required, because otherwise there's no guarantee that
1386-
* this will hit the disk before the next checkpoint moves the redo pointer.
1383+
* reinitialized on restart. An immediate sync is required even if the
1384+
* page has been logged, because the write did not go through
1385+
* shared_buffers and therefore a concurrent checkpoint may have moved
1386+
* the redo pointer past our xlog record. Recovery may as well remove it
1387+
* while replaying, for example, XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE
1388+
* record. Therefore, logging is necessary even if wal_level=minimal.
13871389
*/
13881390
void
13891391
heap_create_init_fork(Relation rel)
13901392
{
13911393
RelationOpenSmgr(rel);
13921394
smgrcreate(rel->rd_smgr, INIT_FORKNUM, false);
1393-
if (XLogIsNeeded())
1394-
log_smgrcreate(&rel->rd_smgr->smgr_rnode.node, INIT_FORKNUM);
1395+
log_smgrcreate(&rel->rd_smgr->smgr_rnode.node, INIT_FORKNUM);
13951396
smgrimmedsync(rel->rd_smgr, INIT_FORKNUM);
13961397
}
13971398

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/tonybelloni/postgres/commit/fa0f466d5329e10b16f3b38c8eaf5306f7e234e8

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy