Friday, September 26, 2008

Re: [HACKERS] lock contention on parallel COPY ?

Index: src/backend/access/heap/heapam.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/heap/heapam.c,v
retrieving revision 1.263
diff -c -r1.263 heapam.c
*** src/backend/access/heap/heapam.c 11 Sep 2008 14:01:09 -0000 1.263
--- src/backend/access/heap/heapam.c 26 Sep 2008 19:16:27 -0000
***************
*** 1728,1733 ****
--- 1728,1748 ----
}
}

+ /*
+ * Begin/End Bulk Inserts
+ *
+ */
+ void
+ heap_begin_bulk_insert(void)
+ {
+ ReleaseBulkInsertBufferIfAny();
+ }
+
+ void
+ heap_end_bulk_insert(void)
+ {
+ ReleaseBulkInsertBufferIfAny();
+ }

/*
* heap_insert - insert tuple into a heap
***************
*** 1755,1765 ****
*/
Oid
heap_insert(Relation relation, HeapTuple tup, CommandId cid,
! bool use_wal, bool use_fsm)
{
TransactionId xid = GetCurrentTransactionId();
HeapTuple heaptup;
Buffer buffer;

if (relation->rd_rel->relhasoids)
{
--- 1770,1781 ----
*/
Oid
heap_insert(Relation relation, HeapTuple tup, CommandId cid,
! bool use_wal, bool use_fsm, bool bulk_insert_request)
{
TransactionId xid = GetCurrentTransactionId();
HeapTuple heaptup;
Buffer buffer;
+ bool bulk_insert = bulk_insert_request && !relation->rd_istemp;

if (relation->rd_rel->relhasoids)
{
***************
*** 1812,1820 ****
else
heaptup = tup;

! /* Find buffer to insert this tuple into */
! buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
! InvalidBuffer, use_fsm);

/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
--- 1828,1845 ----
else
heaptup = tup;

! /*
! * Find buffer to insert this tuple into
! */
! if (bulk_insert)
! {
! buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
! GetBulkInsertBuffer(), use_fsm, true);
! SetBulkInsertBuffer(buffer);
! }
! else
! buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
! InvalidBuffer, use_fsm, false);

/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
***************
*** 1893,1899 ****

END_CRIT_SECTION();

! UnlockReleaseBuffer(buffer);

/*
* If tuple is cachable, mark it for invalidation from the caches in case
--- 1918,1930 ----

END_CRIT_SECTION();

! /*
! * Keep buffer pinned if we are in bulk insert mode
! */
! if (bulk_insert)
! LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
! else
! UnlockReleaseBuffer(buffer);

/*
* If tuple is cachable, mark it for invalidation from the caches in case
***************
*** 1930,1936 ****
Oid
simple_heap_insert(Relation relation, HeapTuple tup)
{
! return heap_insert(relation, tup, GetCurrentCommandId(true), true, true);
}

/*
--- 1961,1967 ----
Oid
simple_heap_insert(Relation relation, HeapTuple tup)
{
! return heap_insert(relation, tup, GetCurrentCommandId(true), true, true, false);
}

/*
***************
*** 2553,2559 ****
{
/* Assume there's no chance to put heaptup on same page. */
newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
! buffer, true);
}
else
{
--- 2584,2590 ----
{
/* Assume there's no chance to put heaptup on same page. */
newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
! buffer, true, false);
}
else
{
***************
*** 2570,2576 ****
*/
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
! buffer, true);
}
else
{
--- 2601,2607 ----
*/
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
! buffer, true, false);
}
else
{
Index: src/backend/access/heap/hio.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/heap/hio.c,v
retrieving revision 1.72
diff -c -r1.72 hio.c
*** src/backend/access/heap/hio.c 13 Jul 2008 20:45:47 -0000 1.72
--- src/backend/access/heap/hio.c 26 Sep 2008 19:16:27 -0000
***************
*** 104,110 ****
*/
Buffer
RelationGetBufferForTuple(Relation relation, Size len,
! Buffer otherBuffer, bool use_fsm)
{
Buffer buffer = InvalidBuffer;
Page page;
--- 104,110 ----
*/
Buffer
RelationGetBufferForTuple(Relation relation, Size len,
! Buffer otherBuffer, bool use_fsm, bool bulk_insert)
{
Buffer buffer = InvalidBuffer;
Page page;
***************
*** 199,217 ****
buffer = otherBuffer;
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
}
! else if (otherBlock < targetBlock)
{
! /* lock other buffer first */
buffer = ReadBuffer(relation, targetBlock);
- LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
}
! else
{
! /* lock target buffer first */
! buffer = ReadBuffer(relation, targetBlock);
! LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
! LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
}

/*
--- 199,226 ----
buffer = otherBuffer;
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
}
! else if (bulk_insert)
{
! ReleaseBuffer(otherBuffer);
buffer = ReadBuffer(relation, targetBlock);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
}
! else if (!bulk_insert)
{
! if (otherBlock < targetBlock)
! {
! /* lock other buffer first */
! buffer = ReadBuffer(relation, targetBlock);
! LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
! LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
! }
! else
! {
! /* lock target buffer first */
! buffer = ReadBuffer(relation, targetBlock);
! LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
! LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
! }
}

/*
***************
*** 266,271 ****
--- 275,283 ----
*/
needLock = !RELATION_IS_LOCAL(relation);

+ if (bulk_insert && otherBuffer != InvalidBuffer)
+ ReleaseBuffer(otherBuffer);
+
if (needLock)
LockRelationForExtension(relation, ExclusiveLock);

***************
*** 281,287 ****
* We can be certain that locking the otherBuffer first is OK, since it
* must have a lower page number.
*/
! if (otherBuffer != InvalidBuffer)
LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);

/*
--- 293,299 ----
* We can be certain that locking the otherBuffer first is OK, since it
* must have a lower page number.
*/
! if (!bulk_insert && otherBuffer != InvalidBuffer)
LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);

/*
Index: src/backend/access/heap/tuptoaster.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/heap/tuptoaster.c,v
retrieving revision 1.89
diff -c -r1.89 tuptoaster.c
*** src/backend/access/heap/tuptoaster.c 19 Jun 2008 00:46:03 -0000 1.89
--- src/backend/access/heap/tuptoaster.c 26 Sep 2008 19:16:27 -0000
***************
*** 1218,1224 ****
memcpy(VARDATA(&chunk_data), data_p, chunk_size);
toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);

! heap_insert(toastrel, toasttup, mycid, use_wal, use_fsm);

/*
* Create the index entry. We cheat a little here by not using
--- 1218,1224 ----
memcpy(VARDATA(&chunk_data), data_p, chunk_size);
toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);

! heap_insert(toastrel, toasttup, mycid, use_wal, use_fsm, false);

/*
* Create the index entry. We cheat a little here by not using
Index: src/backend/access/transam/xact.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/access/transam/xact.c,v
retrieving revision 1.265
diff -c -r1.265 xact.c
*** src/backend/access/transam/xact.c 11 Aug 2008 11:05:10 -0000 1.265
--- src/backend/access/transam/xact.c 26 Sep 2008 19:16:27 -0000
***************
*** 2105,2110 ****
--- 2105,2111 ----
RESOURCE_RELEASE_BEFORE_LOCKS,
false, true);
AtEOXact_Buffers(false);
+ AtEOXact_BulkInsert();
AtEOXact_RelationCache(false);
AtEOXact_Inval(false);
smgrDoPendingDeletes(false);
***************
*** 3941,3946 ****
--- 3942,3948 ----
ResourceOwnerRelease(s->curTransactionOwner,
RESOURCE_RELEASE_BEFORE_LOCKS,
false, false);
+ AtEOXact_BulkInsert();
AtEOSubXact_RelationCache(false, s->subTransactionId,
s->parent->subTransactionId);
AtEOSubXact_Inval(false);
Index: src/backend/commands/copy.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/commands/copy.c,v
retrieving revision 1.299
diff -c -r1.299 copy.c
*** src/backend/commands/copy.c 12 May 2008 20:01:59 -0000 1.299
--- src/backend/commands/copy.c 26 Sep 2008 19:16:27 -0000
***************
*** 1655,1660 ****
--- 1655,1661 ----
CommandId mycid = GetCurrentCommandId(true);
bool use_wal = true; /* by default, use WAL logging */
bool use_fsm = true; /* by default, use FSM for free space */
+ bool bulk_insert = true; /* by default, use bulk inserts */

Assert(cstate->rel);

***************
*** 1899,1904 ****
--- 1900,1908 ----
done = CopyReadLine(cstate);
}

+ if (bulk_insert)
+ heap_begin_bulk_insert();
+
while (!done)
{
bool skip_tuple;
***************
*** 2111,2117 ****
ExecConstraints(resultRelInfo, slot, estate);

/* OK, store the tuple and create index entries for it */
! heap_insert(cstate->rel, tuple, mycid, use_wal, use_fsm);

if (resultRelInfo->ri_NumIndices > 0)
ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
--- 2115,2121 ----
ExecConstraints(resultRelInfo, slot, estate);

/* OK, store the tuple and create index entries for it */
! heap_insert(cstate->rel, tuple, mycid, use_wal, use_fsm, bulk_insert);

if (resultRelInfo->ri_NumIndices > 0)
ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
***************
*** 2128,2133 ****
--- 2132,2140 ----
}
}

+ if (bulk_insert)
+ heap_end_bulk_insert();
+
/* Done, clean up */
error_context_stack = errcontext.previous;

Index: src/backend/executor/execMain.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/executor/execMain.c,v
retrieving revision 1.313
diff -c -r1.313 execMain.c
*** src/backend/executor/execMain.c 25 Aug 2008 22:42:32 -0000 1.313
--- src/backend/executor/execMain.c 26 Sep 2008 19:18:27 -0000
***************
*** 1648,1654 ****
*/
newId = heap_insert(resultRelationDesc, tuple,
estate->es_output_cid,
! true, true);

IncrAppended();
(estate->es_processed)++;
--- 1648,1654 ----
*/
newId = heap_insert(resultRelationDesc, tuple,
estate->es_output_cid,
! true, true, false);

IncrAppended();
(estate->es_processed)++;
***************
*** 2837,2843 ****
static void
intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
{
! /* no-op */
}

/*
--- 2837,2843 ----
static void
intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
{
! heap_begin_bulk_insert();
}

/*
***************
*** 2859,2865 ****
tuple,
myState->estate->es_output_cid,
myState->use_wal,
! false); /* never any point in using FSM */

/* We know this is a newly created relation, so there are no indexes */

--- 2859,2866 ----
tuple,
myState->estate->es_output_cid,
myState->use_wal,
! false, /* never any point in using FSM */
! true); /* always run a bulk insert */

/* We know this is a newly created relation, so there are no indexes */

***************
*** 2872,2878 ****
static void
intorel_shutdown(DestReceiver *self)
{
! /* no-op */
}

/*
--- 2873,2879 ----
static void
intorel_shutdown(DestReceiver *self)
{
! heap_end_bulk_insert();
}

/*
Index: src/backend/storage/buffer/bufmgr.c
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/backend/storage/buffer/bufmgr.c,v
retrieving revision 1.238
diff -c -r1.238 bufmgr.c
*** src/backend/storage/buffer/bufmgr.c 17 Sep 2008 13:15:55 -0000 1.238
--- src/backend/storage/buffer/bufmgr.c 26 Sep 2008 19:16:27 -0000
***************
*** 71,76 ****
--- 71,78 ----
/* local state for LockBufferForCleanup */
static volatile BufferDesc *PinCountWaitBuf = NULL;

+ /* local state for bulk inserts */
+ static Buffer BulkInsertBuffer = InvalidBuffer;

static Buffer ReadBuffer_relcache(Relation reln, ForkNumber forkNum,
BlockNumber blockNum, bool zeroPage, BufferAccessStrategy strategy);
***************
*** 2193,2198 ****
--- 2195,2230 ----
}

/*
+ * BulkInsertBuffer manipulation
+ */
+ Buffer
+ GetBulkInsertBuffer(void)
+ {
+ return BulkInsertBuffer;
+ }
+
+ void
+ SetBulkInsertBuffer(Buffer buffer)
+ {
+ BulkInsertBuffer = buffer;
+ }
+
+ void
+ ReleaseBulkInsertBufferIfAny(void)
+ {
+ if (BufferIsValid(BulkInsertBuffer))
+ ReleaseBuffer(BulkInsertBuffer);
+
+ BulkInsertBuffer = InvalidBuffer;
+ }
+
+ void
+ AtEOXact_BulkInsert(void)
+ {
+ BulkInsertBuffer = InvalidBuffer;
+ }
+
+ /*
* IncrBufferRefCount
* Increment the pin count on a buffer that we have *already* pinned
* at least once.
Index: src/include/access/heapam.h
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/access/heapam.h,v
retrieving revision 1.138
diff -c -r1.138 heapam.h
*** src/include/access/heapam.h 11 Aug 2008 11:05:11 -0000 1.138
--- src/include/access/heapam.h 26 Sep 2008 19:16:27 -0000
***************
*** 84,91 ****
ItemPointer tid);
extern void setLastTid(const ItemPointer tid);

extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid,
! bool use_wal, bool use_fsm);
extern HTSU_Result heap_delete(Relation relation, ItemPointer tid,
ItemPointer ctid, TransactionId *update_xmax,
CommandId cid, Snapshot crosscheck, bool wait);
--- 84,93 ----
ItemPointer tid);
extern void setLastTid(const ItemPointer tid);

+ void heap_begin_bulk_insert(void);
+ void heap_end_bulk_insert(void);
extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid,
! bool use_wal, bool use_fsm, bool bulk_insert);
extern HTSU_Result heap_delete(Relation relation, ItemPointer tid,
ItemPointer ctid, TransactionId *update_xmax,
CommandId cid, Snapshot crosscheck, bool wait);
Index: src/include/access/hio.h
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/access/hio.h,v
retrieving revision 1.36
diff -c -r1.36 hio.h
*** src/include/access/hio.h 19 Jun 2008 00:46:06 -0000 1.36
--- src/include/access/hio.h 26 Sep 2008 19:16:27 -0000
***************
*** 21,26 ****
extern void RelationPutHeapTuple(Relation relation, Buffer buffer,
HeapTuple tuple);
extern Buffer RelationGetBufferForTuple(Relation relation, Size len,
! Buffer otherBuffer, bool use_fsm);

#endif /* HIO_H */
--- 21,26 ----
extern void RelationPutHeapTuple(Relation relation, Buffer buffer,
HeapTuple tuple);
extern Buffer RelationGetBufferForTuple(Relation relation, Size len,
! Buffer otherBuffer, bool use_fsm, bool bulk_insert);

#endif /* HIO_H */
Index: src/include/storage/bufmgr.h
===================================================================
RCS file: /home/sriggs/pg/REPOSITORY/pgsql/src/include/storage/bufmgr.h,v
retrieving revision 1.115
diff -c -r1.115 bufmgr.h
*** src/include/storage/bufmgr.h 11 Aug 2008 11:05:11 -0000 1.115
--- src/include/storage/bufmgr.h 26 Sep 2008 19:16:27 -0000
***************
*** 197,202 ****
--- 197,207 ----

extern void AtProcExit_LocalBuffers(void);

+ extern Buffer GetBulkInsertBuffer(void);
+ extern void SetBulkInsertBuffer(Buffer buffer);
+ extern void ReleaseBulkInsertBufferIfAny(void);
+ extern void AtEOXact_BulkInsert(void);
+
/* in freelist.c */
extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
extern void FreeAccessStrategy(BufferAccessStrategy strategy);
On Fri, 2008-09-26 at 20:07 +0200, Stefan Kaltenbrunner wrote:
> Tom Lane wrote:
> > Simon Riggs <simon@2ndQuadrant.com> writes:
> >> Stefan Kaltenbrunner <stefan@kaltenbrunner.cc> writes:
> >>> samples % symbol name
> >>> 55526 16.5614 LWLockAcquire
> >>> 29721 8.8647 DoCopy
> >>> 26581 7.9281 CopyReadLine
> >>> 25105 7.4879 LWLockRelease
> >>> 15743 4.6956 PinBuffer
> >>> 14725 4.3919 heap_formtuple
> >
> >> Probably loading a table with a generated PK or loading data in
> >> ascending sequence, so its contending heavily for the rightmost edge of
> >> the index.
> >
> > No, given that DoCopy and CopyReadLine are right up there, I think we're
> > still looking at the COPY phase, not index building.
> >
> > The profile will probably change completely once index building
> > starts...
>
> yeah this profile is only showing the COPY phase ...

Try using this Postgres core patch. It's a updated version of my
fast_copy.v4.patch from Patches: Bulk Insert tuning 20 Mar 2008

--
Simon Riggs www.2ndQuadrant.com
PostgreSQL Training, Services and Support

No comments: