Monday, June 30, 2008

Re: [PATCHES] [HACKERS] Hint Bits and Write I/O

Index: src/backend/access/gist/gistget.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/gist/gistget.c,v
retrieving revision 1.73
diff -c -r1.73 gistget.c
*** src/backend/access/gist/gistget.c 12 May 2008 00:00:44 -0000 1.73
--- src/backend/access/gist/gistget.c 30 Jun 2008 22:05:43 -0000
***************
*** 49,55 ****
/* page unchanged, so all is simple */
offset = ItemPointerGetOffsetNumber(iptr);
ItemIdMarkDead(PageGetItemId(p, offset));
! SetBufferCommitInfoNeedsSave(buffer);
LockBuffer(buffer, GIST_UNLOCK);
break;
}
--- 49,55 ----
/* page unchanged, so all is simple */
offset = ItemPointerGetOffsetNumber(iptr);
ItemIdMarkDead(PageGetItemId(p, offset));
! SetBufferCommitInfoNeedsSave(buffer, true);
LockBuffer(buffer, GIST_UNLOCK);
break;
}
***************
*** 64,70 ****
{
/* found */
ItemIdMarkDead(PageGetItemId(p, offset));
! SetBufferCommitInfoNeedsSave(buffer);
LockBuffer(buffer, GIST_UNLOCK);
if (buffer != so->curbuf)
ReleaseBuffer(buffer);
--- 64,70 ----
{
/* found */
ItemIdMarkDead(PageGetItemId(p, offset));
! SetBufferCommitInfoNeedsSave(buffer, true);
LockBuffer(buffer, GIST_UNLOCK);
if (buffer != so->curbuf)
ReleaseBuffer(buffer);
Index: src/backend/access/hash/hash.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/hash/hash.c,v
retrieving revision 1.103
diff -c -r1.103 hash.c
*** src/backend/access/hash/hash.c 12 May 2008 00:00:44 -0000 1.103
--- src/backend/access/hash/hash.c 30 Jun 2008 22:05:43 -0000
***************
*** 242,251 ****

/*
* Since this can be redone later if needed, it's treated the same
! * as a commit-hint-bit status update for heap tuples: we mark the
! * buffer dirty but don't make a WAL log entry.
*/
! SetBufferCommitInfoNeedsSave(so->hashso_curbuf);
}

/*
--- 242,251 ----

/*
* Since this can be redone later if needed, it's treated the same
! * as a commit-hint-bit status update for heap tuples: we don't make
! * a WAL log entry, and mark the page for a flexible dirty write.
*/
! SetBufferCommitInfoNeedsSave(so->hashso_curbuf, true);
}

/*
Index: src/backend/access/heap/heapam.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/heap/heapam.c,v
retrieving revision 1.259
diff -c -r1.259 heapam.c
*** src/backend/access/heap/heapam.c 12 Jun 2008 09:12:30 -0000 1.259
--- src/backend/access/heap/heapam.c 30 Jun 2008 22:05:43 -0000
***************
*** 1533,1539 ****
*/
if (all_dead && *all_dead &&
HeapTupleSatisfiesVacuum(heapTuple.t_data, RecentGlobalXmin,
! buffer) != HEAPTUPLE_DEAD)
*all_dead = false;

/*
--- 1533,1539 ----
*/
if (all_dead && *all_dead &&
HeapTupleSatisfiesVacuum(heapTuple.t_data, RecentGlobalXmin,
! buffer, true) != HEAPTUPLE_DEAD)
*all_dead = false;

/*
***************
*** 1717,1726 ****
{
if (TransactionIdDidCommit(xid))
HeapTupleSetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
! xid);
else
HeapTupleSetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
}
}

--- 1717,1726 ----
{
if (TransactionIdDidCommit(xid))
HeapTupleSetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
! xid, true);
else
HeapTupleSetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, true);
}
}

Index: src/backend/access/heap/pruneheap.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/heap/pruneheap.c,v
retrieving revision 1.14
diff -c -r1.14 pruneheap.c
*** src/backend/access/heap/pruneheap.c 12 Jun 2008 09:12:30 -0000 1.14
--- src/backend/access/heap/pruneheap.c 30 Jun 2008 22:05:43 -0000
***************
*** 267,274 ****
{
/*
* If we didn't prune anything, but have found a new value for the
! * pd_prune_xid field, update it and mark the buffer dirty.
! * This is treated as a non-WAL-logged hint.
*
* Also clear the "page is full" flag if it is set, since there's no
* point in repeating the prune/defrag process until something else
--- 267,274 ----
{
/*
* If we didn't prune anything, but have found a new value for the
! * pd_prune_xid field, update it and mark the buffer for a flexible
! * dirty write. This is treated as a non-WAL-logged hint.
*
* Also clear the "page is full" flag if it is set, since there's no
* point in repeating the prune/defrag process until something else
***************
*** 279,285 ****
{
((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
PageClearFull(page);
! SetBufferCommitInfoNeedsSave(buffer);
}
}

--- 279,285 ----
{
((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
PageClearFull(page);
! SetBufferCommitInfoNeedsSave(buffer, true);
}
}

***************
*** 391,397 ****
* either here or while following a chain below. Whichever path
* gets there first will mark the tuple unused.
*/
! if (HeapTupleSatisfiesVacuum(htup, OldestXmin, buffer)
== HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup))
{
heap_prune_record_unused(prstate, rootoffnum);
--- 391,397 ----
* either here or while following a chain below. Whichever path
* gets there first will mark the tuple unused.
*/
! if (HeapTupleSatisfiesVacuum(htup, OldestXmin, buffer, true)
== HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup))
{
heap_prune_record_unused(prstate, rootoffnum);
***************
*** 469,475 ****
*/
tupdead = recent_dead = false;

! switch (HeapTupleSatisfiesVacuum(htup, OldestXmin, buffer))
{
case HEAPTUPLE_DEAD:
tupdead = true;
--- 469,475 ----
*/
tupdead = recent_dead = false;

! switch (HeapTupleSatisfiesVacuum(htup, OldestXmin, buffer, true))
{
case HEAPTUPLE_DEAD:
tupdead = true;
Index: src/backend/access/index/indexam.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/index/indexam.c,v
retrieving revision 1.108
diff -c -r1.108 indexam.c
*** src/backend/access/index/indexam.c 12 May 2008 00:00:45 -0000 1.108
--- src/backend/access/index/indexam.c 30 Jun 2008 22:05:43 -0000
***************
*** 597,607 ****
* If we can't see it, maybe no one else can either. Check to see
* if the tuple is dead to all transactions. If we find that all
* the tuples in the HOT chain are dead, we'll signal the index AM
! * to not return that TID on future indexscans.
*/
if (scan->xs_hot_dead &&
HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin,
! scan->xs_cbuf) != HEAPTUPLE_DEAD)
scan->xs_hot_dead = false;

/*
--- 597,610 ----
* If we can't see it, maybe no one else can either. Check to see
* if the tuple is dead to all transactions. If we find that all
* the tuples in the HOT chain are dead, we'll signal the index AM
! * to not return that TID on future indexscans. We can be flexible
! * about writing the hint bits because once the index AM has been
! * told to return that TID it is unlikely that the tuple will be
! * re-accessed, except by a seq scan.
*/
if (scan->xs_hot_dead &&
HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin,
! scan->xs_cbuf, true) != HEAPTUPLE_DEAD)
scan->xs_hot_dead = false;

/*
Index: src/backend/access/nbtree/nbtinsert.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/nbtree/nbtinsert.c,v
retrieving revision 1.167
diff -c -r1.167 nbtinsert.c
*** src/backend/access/nbtree/nbtinsert.c 11 Jun 2008 08:38:56 -0000 1.167
--- src/backend/access/nbtree/nbtinsert.c 30 Jun 2008 22:05:43 -0000
***************
*** 305,319 ****
/*
* The conflicting tuple (or whole HOT chain) is dead to
* everyone, so we may as well mark the index entry
! * killed.
*/
ItemIdMarkDead(curitemid);
opaque->btpo_flags |= BTP_HAS_GARBAGE;
/* be sure to mark the proper buffer dirty... */
if (nbuf != InvalidBuffer)
! SetBufferCommitInfoNeedsSave(nbuf);
else
! SetBufferCommitInfoNeedsSave(buf);
}
}
}
--- 305,319 ----
/*
* The conflicting tuple (or whole HOT chain) is dead to
* everyone, so we may as well mark the index entry
! * killed, though with a flexibile dirty write.
*/
ItemIdMarkDead(curitemid);
opaque->btpo_flags |= BTP_HAS_GARBAGE;
/* be sure to mark the proper buffer dirty... */
if (nbuf != InvalidBuffer)
! SetBufferCommitInfoNeedsSave(nbuf, true);
else
! SetBufferCommitInfoNeedsSave(buf, true);
}
}
}
Index: src/backend/access/nbtree/nbtree.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/nbtree/nbtree.c,v
retrieving revision 1.160
diff -c -r1.160 nbtree.c
*** src/backend/access/nbtree/nbtree.c 12 May 2008 00:00:45 -0000 1.160
--- src/backend/access/nbtree/nbtree.c 30 Jun 2008 22:05:43 -0000
***************
*** 902,908 ****
opaque->btpo_cycleid == vstate->cycleid)
{
opaque->btpo_cycleid = 0;
! SetBufferCommitInfoNeedsSave(buf);
}
}

--- 902,908 ----
opaque->btpo_cycleid == vstate->cycleid)
{
opaque->btpo_cycleid = 0;
! SetBufferCommitInfoNeedsSave(buf, true);
}
}

Index: src/backend/access/nbtree/nbtutils.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/nbtree/nbtutils.c,v
retrieving revision 1.90
diff -c -r1.90 nbtutils.c
*** src/backend/access/nbtree/nbtutils.c 12 May 2008 00:00:45 -0000 1.90
--- src/backend/access/nbtree/nbtutils.c 30 Jun 2008 22:05:43 -0000
***************
*** 1161,1168 ****

/*
* Since this can be redone later if needed, it's treated the same as a
! * commit-hint-bit status update for heap tuples: we mark the buffer dirty
! * but don't make a WAL log entry.
*
* Whenever we mark anything LP_DEAD, we also set the page's
* BTP_HAS_GARBAGE flag, which is likewise just a hint.
--- 1161,1168 ----

/*
* Since this can be redone later if needed, it's treated the same as a
! * commit-hint-bit status update for heap tuples: we don't make a WAL log
! * entry, though we mark the page for a flexible dirty write.
*
* Whenever we mark anything LP_DEAD, we also set the page's
* BTP_HAS_GARBAGE flag, which is likewise just a hint.
***************
*** 1170,1176 ****
if (killedsomething)
{
opaque->btpo_flags |= BTP_HAS_GARBAGE;
! SetBufferCommitInfoNeedsSave(so->currPos.buf);
}

if (!haveLock)
--- 1170,1176 ----
if (killedsomething)
{
opaque->btpo_flags |= BTP_HAS_GARBAGE;
! SetBufferCommitInfoNeedsSave(so->currPos.buf, true);
}

if (!haveLock)
Index: src/backend/catalog/index.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/catalog/index.c,v
retrieving revision 1.299
diff -c -r1.299 index.c
*** src/backend/catalog/index.c 12 May 2008 20:01:59 -0000 1.299
--- src/backend/catalog/index.c 30 Jun 2008 22:05:43 -0000
***************
*** 1572,1582 ****
* since caller should hold ShareLock on the relation, but let's
* be conservative about it. (This remark is still correct even
* with HOT-pruning: our pin on the buffer prevents pruning.)
*/
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);

switch (HeapTupleSatisfiesVacuum(heapTuple->t_data, OldestXmin,
! scan->rs_cbuf))
{
case HEAPTUPLE_DEAD:
/* Definitely dead, we can ignore it */
--- 1572,1583 ----
* since caller should hold ShareLock on the relation, but let's
* be conservative about it. (This remark is still correct even
* with HOT-pruning: our pin on the buffer prevents pruning.)
+ * We're flexible on hint bit writes here also.
*/
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);

switch (HeapTupleSatisfiesVacuum(heapTuple->t_data, OldestXmin,
! scan->rs_cbuf, true))
{
case HEAPTUPLE_DEAD:
/* Definitely dead, we can ignore it */
Index: src/backend/commands/analyze.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/commands/analyze.c,v
retrieving revision 1.122
diff -c -r1.122 analyze.c
*** src/backend/commands/analyze.c 8 Jun 2008 22:00:47 -0000 1.122
--- src/backend/commands/analyze.c 30 Jun 2008 22:05:43 -0000
***************
*** 392,398 ****
}

/*
! * Acquire the sample rows
*/
rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
numrows = acquire_sample_rows(onerel, rows, targrows,
--- 392,399 ----
}

/*
! * Acquire the sample rows. Set hints with flexibility only if we
! * are running ANALYZE without VACUUM.
*/
rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
numrows = acquire_sample_rows(onerel, rows, targrows,
***************
*** 928,936 ****
targtuple.t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
targtuple.t_len = ItemIdGetLength(itemid);

switch (HeapTupleSatisfiesVacuum(targtuple.t_data,
OldestXmin,
! targbuffer))
{
case HEAPTUPLE_LIVE:
sample_it = true;
--- 929,941 ----
targtuple.t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
targtuple.t_len = ItemIdGetLength(itemid);

+ /*
+ * We are flexible about setting hint bits because we want
+ * to reduce the I/O overhead for larger tables
+ */
switch (HeapTupleSatisfiesVacuum(targtuple.t_data,
OldestXmin,
! targbuffer, true))
{
case HEAPTUPLE_LIVE:
sample_it = true;
Index: src/backend/commands/cluster.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/commands/cluster.c,v
retrieving revision 1.176
diff -c -r1.176 cluster.c
*** src/backend/commands/cluster.c 12 May 2008 20:01:59 -0000 1.176
--- src/backend/commands/cluster.c 30 Jun 2008 22:05:43 -0000
***************
*** 789,796 ****

LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);

switch (HeapTupleSatisfiesVacuum(tuple->t_data, OldestXmin,
! scan->xs_cbuf))
{
case HEAPTUPLE_DEAD:
/* Definitely dead */
--- 789,800 ----

LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);

+ /*
+ * We can be flexible about setting hint bits because we are
+ * going to rewrite the whole table anyway
+ */
switch (HeapTupleSatisfiesVacuum(tuple->t_data, OldestXmin,
! scan->xs_cbuf, true))
{
case HEAPTUPLE_DEAD:
/* Definitely dead */
Index: src/backend/commands/vacuum.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/commands/vacuum.c,v
retrieving revision 1.375
diff -c -r1.375 vacuum.c
*** src/backend/commands/vacuum.c 5 Jun 2008 15:47:32 -0000 1.375
--- src/backend/commands/vacuum.c 30 Jun 2008 22:05:43 -0000
***************
*** 1474,1480 ****
tuple.t_len = ItemIdGetLength(itemid);
ItemPointerSet(&(tuple.t_self), blkno, offnum);

! switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf))
{
case HEAPTUPLE_LIVE:
/* Tuple is good --- but let's do some validity checks */
--- 1474,1484 ----
tuple.t_len = ItemIdGetLength(itemid);
ItemPointerSet(&(tuple.t_self), blkno, offnum);

! /*
! * There is no flexibility here for hint bit setting. All
! * hints must be set and written.
! */
! switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf, false))
{
case HEAPTUPLE_LIVE:
/* Tuple is good --- but let's do some validity checks */
***************
*** 2164,2169 ****
--- 2168,2175 ----
* been removed, and perhaps even replaced by a MOVED_IN
* tuple. We don't want to include any DEAD tuples in the
* chain, so we have to recheck HeapTupleSatisfiesVacuum.
+ * There is no flexibility here for hint bit setting. All
+ * hints must be set and written.
*/
while (!(tp.t_data->t_infomask & (HEAP_XMAX_INVALID |
HEAP_IS_LOCKED)) &&
***************
*** 2218,2224 ****
LockBuffer(nextBuf, BUFFER_LOCK_SHARE);
nextTstatus = HeapTupleSatisfiesVacuum(nextTdata,
OldestXmin,
! nextBuf);
if (nextTstatus == HEAPTUPLE_DEAD ||
nextTstatus == HEAPTUPLE_INSERT_IN_PROGRESS)
{
--- 2224,2230 ----
LockBuffer(nextBuf, BUFFER_LOCK_SHARE);
nextTstatus = HeapTupleSatisfiesVacuum(nextTdata,
OldestXmin,
! nextBuf, false);
if (nextTstatus == HEAPTUPLE_DEAD ||
nextTstatus == HEAPTUPLE_INSERT_IN_PROGRESS)
{
Index: src/backend/commands/vacuumlazy.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/commands/vacuumlazy.c,v
retrieving revision 1.107
diff -c -r1.107 vacuumlazy.c
*** src/backend/commands/vacuumlazy.c 12 May 2008 00:00:48 -0000 1.107
--- src/backend/commands/vacuumlazy.c 30 Jun 2008 22:05:43 -0000
***************
*** 451,457 ****

tupgone = false;

! switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf))
{
case HEAPTUPLE_DEAD:

--- 451,461 ----

tupgone = false;

! /*
! * There is no flexibility here for hint bit setting. All
! * hints must be set and written.
! */
! switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf, false))
{
case HEAPTUPLE_DEAD:

Index: src/backend/storage/buffer/buf_init.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/storage/buffer/buf_init.c,v
retrieving revision 1.80
diff -c -r1.80 buf_init.c
*** src/backend/storage/buffer/buf_init.c 1 Jan 2008 19:45:51 -0000 1.80
--- src/backend/storage/buffer/buf_init.c 30 Jun 2008 22:13:32 -0000
***************
*** 112,117 ****
--- 112,118 ----
CLEAR_BUFFERTAG(buf->tag);
buf->flags = 0;
buf->usage_count = 0;
+ buf->hint_count = 0;
buf->refcount = 0;
buf->wait_backend_pid = 0;

Index: src/backend/storage/buffer/bufmgr.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/storage/buffer/bufmgr.c,v
retrieving revision 1.232
diff -c -r1.232 bufmgr.c
*** src/backend/storage/buffer/bufmgr.c 12 Jun 2008 09:12:31 -0000 1.232
--- src/backend/storage/buffer/bufmgr.c 30 Jun 2008 22:17:20 -0000
***************
*** 84,90 ****
static void PinBuffer_Locked(volatile BufferDesc *buf);
static void UnpinBuffer(volatile BufferDesc *buf, bool fixOwner);
static void BufferSync(int flags);
! static int SyncOneBuffer(int buf_id, bool skip_recently_used);
static void WaitIO(volatile BufferDesc *buf);
static bool StartBufferIO(volatile BufferDesc *buf, bool forInput);
static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
--- 84,90 ----
static void PinBuffer_Locked(volatile BufferDesc *buf);
static void UnpinBuffer(volatile BufferDesc *buf, bool fixOwner);
static void BufferSync(int flags);
! static int SyncOneBuffer(int buf_id, bool LRU_scan);
static void WaitIO(volatile BufferDesc *buf);
static bool StartBufferIO(volatile BufferDesc *buf, bool forInput);
static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
***************
*** 492,498 ****
* won't prevent hint-bit updates). We will recheck the dirty bit
* after re-locking the buffer header.
*/
! if (oldFlags & BM_DIRTY)
{
/*
* We need a share-lock on the buffer contents to write it out
--- 492,498 ----
* won't prevent hint-bit updates). We will recheck the dirty bit
* after re-locking the buffer header.
*/
! if (oldFlags & BM_DIRTY || buf->hint_count > 0)
{
/*
* We need a share-lock on the buffer contents to write it out
***************
*** 680,685 ****
--- 680,686 ----
buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR);
buf->flags |= BM_TAG_VALID;
buf->usage_count = 1;
+ buf->hint_count = 0;

UnlockBufHdr(buf);

***************
*** 790,795 ****
--- 791,797 ----
CLEAR_BUFFERTAG(buf->tag);
buf->flags = 0;
buf->usage_count = 0;
+ buf->hint_count = 0;

UnlockBufHdr(buf);

***************
*** 1428,1434 ****
/*
* SyncOneBuffer -- process a single buffer during syncing.
*
! * If skip_recently_used is true, we don't write currently-pinned buffers, nor
* buffers marked recently used, as these are not replacement candidates.
*
* Returns a bitmask containing the following flag bits:
--- 1430,1436 ----
/*
* SyncOneBuffer -- process a single buffer during syncing.
*
! * If LRU_scan is true, we don't write currently-pinned buffers, nor
* buffers marked recently used, as these are not replacement candidates.
*
* Returns a bitmask containing the following flag bits:
***************
*** 1442,1448 ****
* Note: caller must have done ResourceOwnerEnlargeBuffers.
*/
static int
! SyncOneBuffer(int buf_id, bool skip_recently_used)
{
volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
int result = 0;
--- 1444,1450 ----
* Note: caller must have done ResourceOwnerEnlargeBuffers.
*/
static int
! SyncOneBuffer(int buf_id, bool LRU_scan)
{
volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
int result = 0;
***************
*** 1460,1473 ****

if (bufHdr->refcount == 0 && bufHdr->usage_count == 0)
result |= BUF_REUSABLE;
! else if (skip_recently_used)
{
/* Caller told us not to write recently-used buffers */
UnlockBufHdr(bufHdr);
return result;
}

! if (!(bufHdr->flags & BM_VALID) || !(bufHdr->flags & BM_DIRTY))
{
/* It's clean, so nothing to do */
UnlockBufHdr(bufHdr);
--- 1462,1477 ----

if (bufHdr->refcount == 0 && bufHdr->usage_count == 0)
result |= BUF_REUSABLE;
! else if (LRU_scan)
{
/* Caller told us not to write recently-used buffers */
UnlockBufHdr(bufHdr);
return result;
}

! if (!(bufHdr->flags & BM_VALID) ||
! !(bufHdr->flags & BM_DIRTY ||
! (LRU_scan && bufHdr->hint_count > 0)))
{
/* It's clean, so nothing to do */
UnlockBufHdr(bufHdr);
***************
*** 2163,2169 ****
* to be associated with a WAL-entry-creating action.
*/
void
! SetBufferCommitInfoNeedsSave(Buffer buffer)
{
volatile BufferDesc *bufHdr;

--- 2167,2173 ----
* to be associated with a WAL-entry-creating action.
*/
void
! SetBufferCommitInfoNeedsSave(Buffer buffer, bool flexible)
{
volatile BufferDesc *bufHdr;

***************
*** 2173,2178 ****
--- 2177,2187 ----
if (BufferIsLocal(buffer))
{
MarkLocalBufferDirty(buffer);
+
+ #ifdef BGW_DEBUG
+ if (bufHdr->hint_count < UINT8_MAX)
+ bufHdr->hint_count++;
+ #endif
return;
}

***************
*** 2194,2205 ****
if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) !=
(BM_DIRTY | BM_JUST_DIRTIED))
{
! LockBufHdr(bufHdr);
! Assert(bufHdr->refcount > 0);
! if (!(bufHdr->flags & BM_DIRTY) && VacuumCostActive)
! VacuumCostBalance += VacuumCostPageDirty;
! bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
! UnlockBufHdr(bufHdr);
}
}

--- 2203,2232 ----
if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) !=
(BM_DIRTY | BM_JUST_DIRTIED))
{
! if (flexible)
! {
! /*
! * If the buffer isn't dirty yet then update the hint_count
! * without holding the buffer header spin lock. It doesn't really
! * matter if we get the wrong result since hint_count is used
! * as a hint anyway, never for anything that needs to be correct.
! */
! if (bufHdr->hint_count < UINT8_MAX)
! bufHdr->hint_count++;
! }
! else
! {
! LockBufHdr(bufHdr);
! Assert(bufHdr->refcount > 0);
! if (!(bufHdr->flags & BM_DIRTY) && VacuumCostActive)
! VacuumCostBalance += VacuumCostPageDirty;
! bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
! #ifdef BGW_DEBUG
! if (bufHdr->hint_count < UINT8_MAX)
! bufHdr->hint_count++;
! #endif
! UnlockBufHdr(bufHdr);
! }
}
}

Index: src/backend/storage/buffer/localbuf.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/storage/buffer/localbuf.c,v
retrieving revision 1.80
diff -c -r1.80 localbuf.c
*** src/backend/storage/buffer/localbuf.c 12 Jun 2008 09:12:31 -0000 1.80
--- src/backend/storage/buffer/localbuf.c 30 Jun 2008 22:13:25 -0000
***************
*** 280,285 ****
--- 280,286 ----
CLEAR_BUFFERTAG(bufHdr->tag);
bufHdr->flags = 0;
bufHdr->usage_count = 0;
+ bufHdr->hint_count = 0;
}
}
}
Index: src/backend/utils/time/tqual.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/utils/time/tqual.c,v
retrieving revision 1.110
diff -c -r1.110 tqual.c
*** src/backend/utils/time/tqual.c 26 Mar 2008 16:20:47 -0000 1.110
--- src/backend/utils/time/tqual.c 30 Jun 2008 22:08:30 -0000
***************
*** 84,90 ****
*/
static inline void
SetHintBits(HeapTupleHeader tuple, Buffer buffer,
! uint16 infomask, TransactionId xid)
{
if (TransactionIdIsValid(xid))
{
--- 84,90 ----
*/
static inline void
SetHintBits(HeapTupleHeader tuple, Buffer buffer,
! uint16 infomask, TransactionId xid, bool flexible)
{
if (TransactionIdIsValid(xid))
{
***************
*** 96,102 ****
}

tuple->t_infomask |= infomask;
! SetBufferCommitInfoNeedsSave(buffer);
}

/*
--- 96,102 ----
}

tuple->t_infomask |= infomask;
! SetBufferCommitInfoNeedsSave(buffer, flexible);
}

/*
***************
*** 107,115 ****
*/
void
HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
! uint16 infomask, TransactionId xid)
{
! SetHintBits(tuple, buffer, infomask, xid);
}


--- 107,115 ----
*/
void
HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
! uint16 infomask, TransactionId xid, bool flexible)
{
! SetHintBits(tuple, buffer, infomask, xid, flexible);
}


***************
*** 156,166 ****
if (TransactionIdDidCommit(xvac))
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return false;
}
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId);
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
--- 156,166 ----
if (TransactionIdDidCommit(xvac))
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, false);
return false;
}
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId, false);
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
***************
*** 173,183 ****
return false;
if (TransactionIdDidCommit(xvac))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId);
else
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return false;
}
}
--- 173,183 ----
return false;
if (TransactionIdDidCommit(xvac))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId, false);
else
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, false);
return false;
}
}
***************
*** 196,202 ****
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
return true;
}

--- 196,202 ----
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, false);
return true;
}

***************
*** 206,217 ****
return false;
else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! HeapTupleHeaderGetXmin(tuple));
else
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return false;
}
}
--- 206,217 ----
return false;
else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! HeapTupleHeaderGetXmin(tuple), false);
else
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, false);
return false;
}
}
***************
*** 249,255 ****
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
return true;
}

--- 249,255 ----
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, false);
return true;
}

***************
*** 258,269 ****
if (tuple->t_infomask & HEAP_IS_LOCKED)
{
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
return true;
}

SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
! HeapTupleHeaderGetXmax(tuple));
return false;
}

--- 258,269 ----
if (tuple->t_infomask & HEAP_IS_LOCKED)
{
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, false);
return true;
}

SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
! HeapTupleHeaderGetXmax(tuple), false);
return false;
}

***************
*** 327,337 ****
if (TransactionIdDidCommit(xvac))
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return false;
}
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId);
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
--- 327,337 ----
if (TransactionIdDidCommit(xvac))
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, false);
return false;
}
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId, false);
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
***************
*** 344,354 ****
return false;
if (TransactionIdDidCommit(xvac))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId);
else
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return false;
}
}
--- 344,354 ----
return false;
if (TransactionIdDidCommit(xvac))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId, false);
else
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, false);
return false;
}
}
***************
*** 370,376 ****
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
return true;
}

--- 370,376 ----
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, false);
return true;
}

***************
*** 383,394 ****
return false;
else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! HeapTupleHeaderGetXmin(tuple));
else
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return false;
}
}
--- 383,394 ----
return false;
else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! HeapTupleHeaderGetXmin(tuple), false);
else
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, false);
return false;
}
}
***************
*** 429,435 ****
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
return true;
}

--- 429,435 ----
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, false);
return true;
}

***************
*** 438,449 ****
if (tuple->t_infomask & HEAP_IS_LOCKED)
{
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
return true;
}

SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
! HeapTupleHeaderGetXmax(tuple));
return false;
}

--- 438,449 ----
if (tuple->t_infomask & HEAP_IS_LOCKED)
{
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, false);
return true;
}

SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
! HeapTupleHeaderGetXmax(tuple), false);
return false;
}

***************
*** 491,501 ****
if (TransactionIdDidCommit(xvac))
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return false;
}
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId);
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
--- 491,501 ----
if (TransactionIdDidCommit(xvac))
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, true);
return false;
}
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId, true);
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
***************
*** 508,518 ****
return false;
if (TransactionIdDidCommit(xvac))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId);
else
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return false;
}
}
--- 508,518 ----
return false;
if (TransactionIdDidCommit(xvac))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId, true);
else
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, true);
return false;
}
}
***************
*** 549,554 ****
--- 549,556 ----
* the case where the tuple is share-locked by a MultiXact, even if the
* MultiXact includes the current transaction. Callers that want to
* distinguish that case must test for it themselves.)
+ *
+ * We are flexible with hint bit setting in all cases here.
*/
HTSU_Result
HeapTupleSatisfiesUpdate(HeapTupleHeader tuple, CommandId curcid,
***************
*** 570,580 ****
if (TransactionIdDidCommit(xvac))
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return HeapTupleInvisible;
}
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId);
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
--- 572,582 ----
if (TransactionIdDidCommit(xvac))
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, true);
return HeapTupleInvisible;
}
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId, true);
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
***************
*** 587,597 ****
return HeapTupleInvisible;
if (TransactionIdDidCommit(xvac))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId);
else
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return HeapTupleInvisible;
}
}
--- 589,599 ----
return HeapTupleInvisible;
if (TransactionIdDidCommit(xvac))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId, true);
else
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, true);
return HeapTupleInvisible;
}
}
***************
*** 613,619 ****
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
return HeapTupleMayBeUpdated;
}

--- 615,621 ----
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, true);
return HeapTupleMayBeUpdated;
}

***************
*** 626,637 ****
return HeapTupleInvisible;
else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! HeapTupleHeaderGetXmin(tuple));
else
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return HeapTupleInvisible;
}
}
--- 628,639 ----
return HeapTupleInvisible;
else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! HeapTupleHeaderGetXmin(tuple), true);
else
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, true);
return HeapTupleInvisible;
}
}
***************
*** 656,662 ****
if (MultiXactIdIsRunning(HeapTupleHeaderGetXmax(tuple)))
return HeapTupleBeingUpdated;
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
return HeapTupleMayBeUpdated;
}

--- 658,664 ----
if (MultiXactIdIsRunning(HeapTupleHeaderGetXmax(tuple)))
return HeapTupleBeingUpdated;
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, true);
return HeapTupleMayBeUpdated;
}

***************
*** 677,683 ****
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
return HeapTupleMayBeUpdated;
}

--- 679,685 ----
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, true);
return HeapTupleMayBeUpdated;
}

***************
*** 686,697 ****
if (tuple->t_infomask & HEAP_IS_LOCKED)
{
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
return HeapTupleMayBeUpdated;
}

SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
! HeapTupleHeaderGetXmax(tuple));
return HeapTupleUpdated; /* updated by other */
}

--- 688,699 ----
if (tuple->t_infomask & HEAP_IS_LOCKED)
{
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, true);
return HeapTupleMayBeUpdated;
}

SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
! HeapTupleHeaderGetXmax(tuple), true);
return HeapTupleUpdated; /* updated by other */
}

***************
*** 737,747 ****
if (TransactionIdDidCommit(xvac))
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return false;
}
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId);
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
--- 739,749 ----
if (TransactionIdDidCommit(xvac))
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, false);
return false;
}
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId, false);
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
***************
*** 754,764 ****
return false;
if (TransactionIdDidCommit(xvac))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId);
else
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return false;
}
}
--- 756,766 ----
return false;
if (TransactionIdDidCommit(xvac))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId, false);
else
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, false);
return false;
}
}
***************
*** 777,783 ****
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
return true;
}

--- 779,785 ----
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, false);
return true;
}

***************
*** 791,802 ****
}
else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! HeapTupleHeaderGetXmin(tuple));
else
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return false;
}
}
--- 793,804 ----
}
else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! HeapTupleHeaderGetXmin(tuple), false);
else
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, false);
return false;
}
}
***************
*** 837,843 ****
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
return true;
}

--- 839,845 ----
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, false);
return true;
}

***************
*** 846,857 ****
if (tuple->t_infomask & HEAP_IS_LOCKED)
{
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
return true;
}

SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
! HeapTupleHeaderGetXmax(tuple));
return false; /* updated by other */
}

--- 848,859 ----
if (tuple->t_infomask & HEAP_IS_LOCKED)
{
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, false);
return true;
}

SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
! HeapTupleHeaderGetXmax(tuple), false);
return false; /* updated by other */
}

***************
*** 875,880 ****
--- 877,884 ----
* (Notice, however, that the tuple status hint bits will be updated on the
* basis of the true state of the transaction, even if we then pretend we
* can't see it.)
+ *
+ * We are flexible with hint bit setting in all cases here.
*/
bool
HeapTupleSatisfiesMVCC(HeapTupleHeader tuple, Snapshot snapshot,
***************
*** 896,906 ****
if (TransactionIdDidCommit(xvac))
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return false;
}
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId);
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
--- 900,910 ----
if (TransactionIdDidCommit(xvac))
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, true);
return false;
}
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId, true);
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
***************
*** 913,923 ****
return false;
if (TransactionIdDidCommit(xvac))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId);
else
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return false;
}
}
--- 917,927 ----
return false;
if (TransactionIdDidCommit(xvac))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId, false);
else
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, false);
return false;
}
}
***************
*** 939,945 ****
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
return true;
}

--- 943,949 ----
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, true);
return true;
}

***************
*** 952,963 ****
return false;
else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! HeapTupleHeaderGetXmin(tuple));
else
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return false;
}
}
--- 956,967 ----
return false;
else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! HeapTupleHeaderGetXmin(tuple), true);
else
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, true);
return false;
}
}
***************
*** 999,1011 ****
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
return true;
}

/* xmax transaction committed */
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
! HeapTupleHeaderGetXmax(tuple));
}

/*
--- 1003,1015 ----
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, true);
return true;
}

/* xmax transaction committed */
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
! HeapTupleHeaderGetXmax(tuple), true);
}

/*
***************
*** 1032,1038 ****
*/
HTSV_Result
HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin,
! Buffer buffer)
{
/*
* Has inserting transaction committed?
--- 1036,1042 ----
*/
HTSV_Result
HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin,
! Buffer buffer, bool flexible)
{
/*
* Has inserting transaction committed?
***************
*** 1055,1065 ****
if (TransactionIdDidCommit(xvac))
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return HEAPTUPLE_DEAD;
}
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId);
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
{
--- 1059,1069 ----
if (TransactionIdDidCommit(xvac))
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, flexible);
return HEAPTUPLE_DEAD;
}
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId, flexible);
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
{
***************
*** 1071,1081 ****
return HEAPTUPLE_INSERT_IN_PROGRESS;
if (TransactionIdDidCommit(xvac))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId);
else
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return HEAPTUPLE_DEAD;
}
}
--- 1075,1085 ----
return HEAPTUPLE_INSERT_IN_PROGRESS;
if (TransactionIdDidCommit(xvac))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! InvalidTransactionId, flexible);
else
{
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, flexible);
return HEAPTUPLE_DEAD;
}
}
***************
*** 1090,1103 ****
}
else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! HeapTupleHeaderGetXmin(tuple));
else
{
/*
* Not in Progress, Not Committed, so either Aborted or crashed
*/
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId);
return HEAPTUPLE_DEAD;
}

--- 1094,1107 ----
}
else if (TransactionIdDidCommit(HeapTupleHeaderGetXmin(tuple)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
! HeapTupleHeaderGetXmin(tuple), flexible);
else
{
/*
* Not in Progress, Not Committed, so either Aborted or crashed
*/
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
! InvalidTransactionId, flexible);
return HEAPTUPLE_DEAD;
}

***************
*** 1144,1150 ****
* never actually update it.
*/
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
}
return HEAPTUPLE_LIVE;
}
--- 1148,1154 ----
* never actually update it.
*/
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, flexible);
}
return HEAPTUPLE_LIVE;
}
***************
*** 1162,1175 ****
return HEAPTUPLE_DELETE_IN_PROGRESS;
else if (TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple)))
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
! HeapTupleHeaderGetXmax(tuple));
else
{
/*
* Not in Progress, Not Committed, so either Aborted or crashed
*/
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId);
return HEAPTUPLE_LIVE;
}

--- 1166,1179 ----
return HEAPTUPLE_DELETE_IN_PROGRESS;
else if (TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple)))
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
! HeapTupleHeaderGetXmax(tuple), flexible);
else
{
/*
* Not in Progress, Not Committed, so either Aborted or crashed
*/
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
! InvalidTransactionId, flexible);
return HEAPTUPLE_LIVE;
}

Index: src/include/storage/buf_internals.h
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/storage/buf_internals.h,v
retrieving revision 1.96
diff -c -r1.96 buf_internals.h
*** src/include/storage/buf_internals.h 12 Jun 2008 09:12:31 -0000 1.96
--- src/include/storage/buf_internals.h 30 Jun 2008 22:08:30 -0000
***************
*** 128,134 ****
{
BufferTag tag; /* ID of page contained in buffer */
BufFlags flags; /* see bit definitions above */
! uint16 usage_count; /* usage counter for clock sweep code */
unsigned refcount; /* # of backends holding pins on buffer */
int wait_backend_pid; /* backend PID of pin-count waiter */

--- 128,135 ----
{
BufferTag tag; /* ID of page contained in buffer */
BufFlags flags; /* see bit definitions above */
! uint8 usage_count; /* usage counter for clock sweep code */
! uint8 hint_count; /* hint counter for clock sweep code */
unsigned refcount; /* # of backends holding pins on buffer */
int wait_backend_pid; /* backend PID of pin-count waiter */

Index: src/include/storage/bufmgr.h
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/storage/bufmgr.h,v
retrieving revision 1.113
diff -c -r1.113 bufmgr.h
*** src/include/storage/bufmgr.h 12 Jun 2008 09:12:31 -0000 1.113
--- src/include/storage/bufmgr.h 30 Jun 2008 22:08:30 -0000
***************
*** 177,183 ****
extern Size BufferShmemSize(void);
extern RelFileNode BufferGetFileNode(Buffer buffer);

! extern void SetBufferCommitInfoNeedsSave(Buffer buffer);

extern void UnlockBuffers(void);
extern void LockBuffer(Buffer buffer, int mode);
--- 177,183 ----
extern Size BufferShmemSize(void);
extern RelFileNode BufferGetFileNode(Buffer buffer);

! extern void SetBufferCommitInfoNeedsSave(Buffer buffer, bool flexible);

extern void UnlockBuffers(void);
extern void LockBuffer(Buffer buffer, int mode);
Index: src/include/utils/tqual.h
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/utils/tqual.h,v
retrieving revision 1.73
diff -c -r1.73 tqual.h
*** src/include/utils/tqual.h 26 Mar 2008 21:10:39 -0000 1.73
--- src/include/utils/tqual.h 30 Jun 2008 22:08:30 -0000
***************
*** 82,90 ****
extern HTSU_Result HeapTupleSatisfiesUpdate(HeapTupleHeader tuple,
CommandId curcid, Buffer buffer);
extern HTSV_Result HeapTupleSatisfiesVacuum(HeapTupleHeader tuple,
! TransactionId OldestXmin, Buffer buffer);

extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
! uint16 infomask, TransactionId xid);

#endif /* TQUAL_H */
--- 82,90 ----
extern HTSU_Result HeapTupleSatisfiesUpdate(HeapTupleHeader tuple,
CommandId curcid, Buffer buffer);
extern HTSV_Result HeapTupleSatisfiesVacuum(HeapTupleHeader tuple,
! TransactionId OldestXmin, Buffer buffer, bool flexible);

extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
! uint16 infomask, TransactionId xid, bool flexible);

#endif /* TQUAL_H */
On Fri, 2008-06-27 at 16:02 +0100, Simon Riggs wrote:
> The patch splits into two parts:
> * the machinery to *not* dirty a page when we set hints
> * behaviour modifications now that we can tell the difference between
> dirty and hinted pages
>
> Nobody has yet come up with any comments about the first half, which is
> good. The second part is clearly where much debate will occur. I'm going
> to literally split the patch into two, so we can get the machinery into
> CVS and then fiddle and argue over the second part over next few months.

The first "half" is actually quite large, but that makes it even more
sensible to commit this part now.

The enclosed patch introduces the machinery by which we might later
optimise hint bit setting. It differentiates between hint bit setting
and block dirtying, when the distinction can safely be made. It acts
safely during VACUUM and correctly during checkpoint. In all other
respects it emulates current behaviour.

The actual tuning patch can be discussed later, probably at length.
Later patches will be fairly small in comparison and so various people
can fairly easily come up with their own favoured modifications for
testing.

--
Simon Riggs

www.2ndQuadrant.com

PostgreSQL Training, Services and Support

No comments: