本文简单介绍了PG插入数据部分的源码,主要内容包括ExecInsert函数的实现逻辑,该函数位于nodeModifyTable.c文件中。
一、基础信息
按惯例,首先看看ExecInsert函数使用的数据结构、宏定义以及依赖的函数等。
数据结构/宏定义
1、ModifyTableState
/* ----------------
* PlanState node
*
* We never actually instantiate any PlanState nodes; this is just the common
* abstract superclass for all PlanState-type nodes.
* ----------------
*/
typedef struct PlanState
{
NodeTag type;
Plan *plan; /* associated Plan node */
EState *state; /* at execution time, states of individual
* nodes point to one EState for the whole
* top-level plan */
ExecProcNodeMtd ExecProcNode; /* function to return next tuple */
ExecProcNodeMtd ExecProcNodeReal; /* actual function, if above is a
* wrapper */
Instrumentation *instrument; /* Optional runtime stats for this node */
WorkerInstrumentation *worker_instrument; /* per-worker instrumentation */
/*
* Common structural data for all Plan types. These links to subsidiary
* state trees parallel links in the associated plan tree (except for the
* subPlan list, which does not exist in the plan tree).
*/
ExprState *qual; /* boolean qual condition */
struct PlanState *lefttree; /* input plan tree(s) */
struct PlanState *righttree;
List *initPlan; /* Init SubPlanState nodes (un-correlated expr
* subselects) */
List *subPlan; /* SubPlanState nodes in my expressions */
/*
* State for management of parameter-change-driven rescanning
*/
Bitmapset *chgParam; /* set of IDs of changed Params */
/*
* Other run-time state needed by most if not all node types.
*/
TupleTableSlot *ps_ResultTupleSlot; /* slot for my result tuples */
ExprContext *ps_ExprContext; /* node's expression-evaluation context */
ProjectionInfo *ps_ProjInfo; /* info for doing tuple projection */
/*
* Scanslot's descriptor if known. This is a bit of a hack, but otherwise
* it's hard for expression compilation to optimize based on the
* descriptor, without encoding knowledge about all executor nodes.
*/
TupleDesc scandesc;
} PlanState;
/* ----------------
* ModifyTableState information
* ----------------
*/
typedef struct ModifyTableState
{
PlanState ps; /* its first field is NodeTag */
CmdType operation; /* INSERT, UPDATE, or DELETE */
bool canSetTag; /* do we set the command tag/es_processed? */
bool mt_done; /* are we done? */
PlanState **mt_plans; /* subplans (one per target rel) */
int mt_nplans; /* number of plans in the array */
int mt_whichplan; /* which one is being executed (0..n-1) */
ResultRelInfo *resultRelInfo; /* per-subplan target relations */
ResultRelInfo *rootResultRelInfo; /* root target relation (partitioned
* table root) */
List **mt_arowmarks; /* per-subplan ExecAuxRowMark lists */
EPQState mt_epqstate; /* for evaluating EvalPlanQual rechecks */
bool fireBSTriggers; /* do we need to fire stmt triggers? */
TupleTableSlot *mt_existing; /* slot to store existing target tuple in */
List *mt_excludedtlist; /* the excluded pseudo relation's tlist */
TupleTableSlot *mt_conflproj; /* CONFLICT ... SET ... projection target */
/* Tuple-routing support info */
struct PartitionTupleRouting *mt_partition_tuple_routing;
/* controls transition table population for specified operation */
struct TransitionCaptureState *mt_transition_capture;
/* controls transition table population for INSERT...ON CONFLICT UPDATE */
struct TransitionCaptureState *mt_oc_transition_capture;
/* Per plan map for tuple conversion from child to root */
TupleConversionMap **mt_per_subplan_tupconv_maps;
} ModifyTableState;
2、TupleTableSlot
/*----------
* The executor stores tuples in a "tuple table" which is a List of
* independent TupleTableSlots. There are several cases we need to handle:
* 1. physical tuple in a disk buffer page
* 2. physical tuple constructed in palloc'ed memory
* 3. "minimal" physical tuple constructed in palloc'ed memory
* 4. "virtual" tuple consisting of Datum/isnull arrays
*
* The first two cases are similar in that they both deal with "materialized"
* tuples, but resource management is different. For a tuple in a disk page
* we need to hold a pin on the buffer until the TupleTableSlot's reference
* to the tuple is dropped; while for a palloc'd tuple we usually want the
* tuple pfree'd when the TupleTableSlot's reference is dropped.
*
* A "minimal" tuple is handled similarly to a palloc'd regular tuple.
* At present, minimal tuples never are stored in buffers, so there is no
* parallel to case 1. Note that a minimal tuple has no "system columns".
* (Actually, it could have an OID, but we have no need to access the OID.)
*
* A "virtual" tuple is an optimization used to minimize physical data
* copying in a nest of plan nodes. Any pass-by-reference Datums in the
* tuple point to storage that is not directly associated with the
* TupleTableSlot; generally they will point to part of a tuple stored in
* a lower plan node's output TupleTableSlot, or to a function result
* constructed in a plan node's per-tuple econtext. It is the responsibility
* of the generating plan node to be sure these resources are not released
* for as long as the virtual tuple needs to be valid. We only use virtual
* tuples in the result slots of plan nodes --- tuples to be copied anywhere
* else need to be "materialized" into physical tuples. Note also that a
* virtual tuple does not have any "system columns".
*
* It is also possible for a TupleTableSlot to hold both physical and minimal
* copies of a tuple. This is done when the slot is requested to provide
* the format other than the one it currently holds. (Originally we attempted
* to handle such requests by replacing one format with the other, but that
* had the fatal defect of invalidating any pass-by-reference Datums pointing
* into the existing slot contents.) Both copies must contain identical data
* payloads when this is the case.
*
* The Datum/isnull arrays of a TupleTableSlot serve double duty. When the
* slot contains a virtual tuple, they are the authoritative data. When the
* slot contains a physical tuple, the arrays contain data extracted from
* the tuple. (In this state, any pass-by-reference Datums point into
* the physical tuple.) The extracted information is built "lazily",
* ie, only as needed. This serves to avoid repeated extraction of data
* from the physical tuple.
*
* A TupleTableSlot can also be "empty", holding no valid data. This is
* the only valid state for a freshly-created slot that has not yet had a
* tuple descriptor assigned to it. In this state, tts_isempty must be
* true, tts_shouldFree false, tts_tuple NULL, tts_buffer InvalidBuffer,
* and tts_nvalid zero.
*
* The tupleDescriptor is simply referenced, not copied, by the TupleTableSlot
* code. The caller of ExecSetSlotDescriptor() is responsible for providing
* a descriptor that will live as long as the slot does. (Typically, both
* slots and descriptors are in per-query memory and are freed by memory
* context deallocation at query end; so it's not worth providing any extra
* mechanism to do more. However, the slot will increment the tupdesc
* reference count if a reference-counted tupdesc is supplied.)
*
* When tts_shouldFree is true, the physical tuple is "owned" by the slot
* and should be freed when the slot's reference to the tuple is dropped.
*
* If tts_buffer is not InvalidBuffer, then the slot is holding a pin
* on the indicated buffer page; drop the pin when we release the
* slot's reference to that buffer. (tts_shouldFree should always be
* false in such a case, since presumably tts_tuple is pointing at the
* buffer page.)
*
* tts_nvalid indicates the number of valid columns in the tts_values/isnull
* arrays. When the slot is holding a "virtual" tuple this must be equal
* to the descriptor's natts. When the slot is holding a physical tuple
* this is equal to the number of columns we have extracted (we always
* extract columns from left to right, so there are no holes).
*
* tts_values/tts_isnull are allocated when a descriptor is assigned to the
* slot; they are of length equal to the descriptor's natts.
*
* tts_mintuple must always be NULL if the slot does not hold a "minimal"
* tuple. When it does, tts_mintuple points to the actual MinimalTupleData
* object (the thing to be pfree'd if tts_shouldFreeMin is true). If the slot
* has only a minimal and not also a regular physical tuple, then tts_tuple
* points at tts_minhdr and the fields of that struct are set correctly
* for access to the minimal tuple; in particular, tts_minhdr.t_data points
* MINIMAL_TUPLE_OFFSET bytes before tts_mintuple. This allows column
* extraction to treat the case identically to regular physical tuples.
*
* tts_slow/tts_off are saved state for slot_deform_tuple, and should not
* be touched by any other code.
*----------
*/
typedef struct TupleTableSlot
{
NodeTag type;
bool tts_isempty; /* true = slot is empty */
bool tts_shouldFree; /* should pfree tts_tuple? */
bool tts_shouldFreeMin; /* should pfree tts_mintuple? */
#define FIELDNO_TUPLETABLESLOT_SLOW 4
bool tts_slow; /* saved state for slot_deform_tuple */
#define FIELDNO_TUPLETABLESLOT_TUPLE 5
HeapTuple tts_tuple; /* physical tuple, or NULL if virtual */
#define FIELDNO_TUPLETABLESLOT_TUPLEDESCRIPTOR 6
TupleDesc tts_tupleDescriptor; /* slot's tuple descriptor */
MemoryContext tts_mcxt; /* slot itself is in this context */
Buffer tts_buffer; /* tuple's buffer, or InvalidBuffer */
#define FIELDNO_TUPLETABLESLOT_NVALID 9
int tts_nvalid; /* # of valid values in tts_values */
#define FIELDNO_TUPLETABLESLOT_VALUES 10
Datum *tts_values; /* current per-attribute values */
#define FIELDNO_TUPLETABLESLOT_ISNULL 11
bool *tts_isnull; /* current per-attribute isnull flags */
MinimalTuple tts_mintuple; /* minimal tuple, or NULL if none */
HeapTupleData tts_minhdr; /* workspace for minimal-tuple-only case */
#define FIELDNO_TUPLETABLESLOT_OFF 14
uint32 tts_off; /* saved state for slot_deform_tuple */
bool tts_fixedTupleDescriptor; /* descriptor can't be changed */
} TupleTableSlot;
3、EState
/* ----------------
* EState information
*
* Master working state for an Executor invocation
* ----------------
*/
typedef struct EState
{
NodeTag type;
/* Basic state for all query types: */
ScanDirection es_direction; /* current scan direction */
Snapshot es_snapshot; /* time qual to use */
Snapshot es_crosscheck_snapshot; /* crosscheck time qual for RI */
List *es_range_table; /* List of RangeTblEntry */
PlannedStmt *es_plannedstmt; /* link to top of plan tree */
const char *es_sourceText; /* Source text from QueryDesc */
JunkFilter *es_junkFilter; /* top-level junk filter, if any */
/* If query can insert/delete tuples, the command ID to mark them with */
CommandId es_output_cid;
/* Info about target table(s) for insert/update/delete queries: */
ResultRelInfo *es_result_relations; /* array of ResultRelInfos */
int es_num_result_relations; /* length of array */
ResultRelInfo *es_result_relation_info; /* currently active array elt */
/*
* Info about the target partitioned target table root(s) for
* update/delete queries. They required only to fire any per-statement
* triggers defined on the table. It exists separately from
* es_result_relations, because partitioned tables don't appear in the
* plan tree for the update/delete cases.
*/
ResultRelInfo *es_root_result_relations; /* array of ResultRelInfos */
int es_num_root_result_relations; /* length of the array */
/*
* The following list contains ResultRelInfos created by the tuple routing
* code for partitions that don't already have one.
*/
List *es_tuple_routing_result_relations;
/* Stuff used for firing triggers: */
List *es_trig_target_relations; /* trigger-only ResultRelInfos */
TupleTableSlot *es_trig_tuple_slot; /* for trigger output tuples */
TupleTableSlot *es_trig_oldtup_slot; /* for TriggerEnabled */
TupleTableSlot *es_trig_newtup_slot; /* for TriggerEnabled */
/* Parameter info: */
ParamListInfo es_param_list_info; /* values of external params */
ParamExecData *es_param_exec_vals; /* values of internal params */
QueryEnvironment *es_queryEnv; /* query environment */
/* Other working state: */
MemoryContext es_query_cxt; /* per-query context in which EState lives */
List *es_tupleTable; /* List of TupleTableSlots */
List *es_rowMarks; /* List of ExecRowMarks */
uint64 es_processed; /* # of tuples processed */
Oid es_lastoid; /* last oid processed (by INSERT) */
int es_top_eflags; /* eflags passed to ExecutorStart */
int es_instrument; /* OR of InstrumentOption flags */
bool es_finished; /* true when ExecutorFinish is done */
List *es_exprcontexts; /* List of ExprContexts within EState */
List *es_subplanstates; /* List of PlanState for SubPlans */
List *es_auxmodifytables; /* List of secondary ModifyTableStates */
/*
* this ExprContext is for per-output-tuple operations, such as constraint
* checks and index-value computations. It will be reset for each output
* tuple. Note that it will be created only if needed.
*/
ExprContext *es_per_tuple_exprcontext;
/*
* These fields are for re-evaluating plan quals when an updated tuple is
* substituted in READ COMMITTED mode. es_epqTuple[] contains tuples that
* scan plan nodes should return instead of whatever they'd normally
* return, or NULL if nothing to return; es_epqTupleSet[] is true if a
* particular array entry is valid; and es_epqScanDone[] is state to
* remember if the tuple has been returned already. Arrays are of size
* list_length(es_range_table) and are indexed by scan node scanrelid - 1.
*/
HeapTuple *es_epqTuple; /* array of EPQ substitute tuples */
bool *es_epqTupleSet; /* true if EPQ tuple is provided */
bool *es_epqScanDone; /* true if EPQ tuple has been fetched */
bool es_use_parallel_mode; /* can we use parallel workers? */
/* The per-query shared memory area to use for parallel execution. */
struct dsa_area *es_query_dsa;
/*
* JIT information. es_jit_flags indicates whether JIT should be performed
* and with which options. es_jit is created on-demand when JITing is
* performed.
*/
int es_jit_flags;
struct JitContext *es_jit;
} EState;
4、ResultRelInfo
/*
* ResultRelInfo
*
* Whenever we update an existing relation, we have to update indexes on the
* relation, and perhaps also fire triggers. ResultRelInfo holds all the
* information needed about a result relation, including indexes.
*/
typedef struct ResultRelInfo
{
NodeTag type;
/* result relation's range table index */
Index ri_RangeTableIndex;
/* relation descriptor for result relation */
Relation ri_RelationDesc;
/* # of indices existing on result relation */
int ri_NumIndices;
/* array of relation descriptors for indices */
RelationPtr ri_IndexRelationDescs;
/* array of key/attr info for indices */
IndexInfo **ri_IndexRelationInfo;
/* triggers to be fired, if any */
TriggerDesc *ri_TrigDesc;
/* cached lookup info for trigger functions */
FmgrInfo *ri_TrigFunctions;
/* array of trigger WHEN expr states */
ExprState **ri_TrigWhenExprs;
/* optional runtime measurements for triggers */
Instrumentation *ri_TrigInstrument;
/* FDW callback functions, if foreign table */
struct FdwRoutine *ri_FdwRoutine;
/* available to save private state of FDW */
void *ri_FdwState;
/* true when modifying foreign table directly */
bool ri_usesFdwDirectModify;
/* list of WithCheckOption's to be checked */
List *ri_WithCheckOptions;
/* list of WithCheckOption expr states */
List *ri_WithCheckOptionExprs;
/* array of constraint-checking expr states */
ExprState **ri_ConstraintExprs;
/* for removing junk attributes from tuples */
JunkFilter *ri_junkFilter;
/* list of RETURNING expressions */
List *ri_returningList;
/* for computing a RETURNING list */
ProjectionInfo *ri_projectReturning;
/* list of arbiter indexes to use to check conflicts */
List *ri_onConflictArbiterIndexes;
/* ON CONFLICT evaluation state */
OnConflictSetState *ri_onConflict;
/* partition check expression */
List *ri_PartitionCheck;
/* partition check expression state */
ExprState *ri_PartitionCheckExpr;
/* relation descriptor for root partitioned table */
Relation ri_PartitionRoot;
/* true if ready for tuple routing */
bool ri_PartitionReadyForRouting;
} ResultRelInfo;
5、List
typedef struct ListCell ListCell;
typedef struct List
{
NodeTag type; /* T_List, T_IntList, or T_OidList */
int length;
ListCell *head;
ListCell *tail;
} List;
struct ListCell
{
union
{
void *ptr_value;
int int_value;
Oid oid_value;
} data;
ListCell *next;
};
6、TransitionCaptureState
typedef struct TransitionCaptureState
{
/*
* Is there at least one trigger specifying each transition relation on
* the relation explicitly named in the DML statement or COPY command?
* Note: in current usage, these flags could be part of the private state,
* but it seems possibly useful to let callers see them.
*/
bool tcs_delete_old_table;
bool tcs_update_old_table;
bool tcs_update_new_table;
bool tcs_insert_new_table;
/*
* For UPDATE and DELETE, AfterTriggerSaveEvent may need to convert the
* new and old tuples from a child table's format to the format of the
* relation named in a query so that it is compatible with the transition
* tuplestores. The caller must store the conversion map here if so.
*/
TupleConversionMap *tcs_map;
/*
* For INSERT and COPY, it would be wasteful to convert tuples from child
* format to parent format after they have already been converted in the
* opposite direction during routing. In that case we bypass conversion
* and allow the inserting code (copy.c and nodeModifyTable.c) to provide
* the original tuple directly.
*/
HeapTuple tcs_original_insert_tuple;
/*
* Private data including the tuplestore(s) into which to insert tuples.
*/
struct AfterTriggersTableData *tcs_private;
} TransitionCaptureState;
*7、ModifyTable *
/* ----------------
* ModifyTable node -
* Apply rows produced by subplan(s) to result table(s),
* by inserting, updating, or deleting.
*
* Note that rowMarks and epqParam are presumed to be valid for all the
* subplan(s); they can't contain any info that varies across subplans.
* ----------------
*/
typedef struct ModifyTable
{
Plan plan;
CmdType operation; /* INSERT, UPDATE, or DELETE */
bool canSetTag; /* do we set the command tag/es_processed? */
Index nominalRelation; /* Parent RT index for use of EXPLAIN */
/* RT indexes of non-leaf tables in a partition tree */
List *partitioned_rels;
bool partColsUpdated; /* some part key in hierarchy updated */
List *resultRelations; /* integer list of RT indexes */
int resultRelIndex; /* index of first resultRel in plan's list */
int rootResultRelIndex; /* index of the partitioned table root */
List *plans; /* plan(s) producing source data */
List *withCheckOptionLists; /* per-target-table WCO lists */
List *returningLists; /* per-target-table RETURNING tlists */
List *fdwPrivLists; /* per-target-table FDW private data lists */
Bitmapset *fdwDirectModifyPlans; /* indices of FDW DM plans */
List *rowMarks; /* PlanRowMarks (non-locking only) */
int epqParam; /* ID of Param for EvalPlanQual re-eval */
OnConflictAction onConflictAction; /* ON CONFLICT action */
List *arbiterIndexes; /* List of ON CONFLICT arbiter index OIDs */
List *onConflictSet; /* SET for INSERT ON CONFLICT DO UPDATE */
Node *onConflictWhere; /* WHERE for ON CONFLICT UPDATE */
Index exclRelRTI; /* RTI of the EXCLUDED pseudo relation */
List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */
} ModifyTable;
8、OnConflictAction
/*
* OnConflictAction -
* "ON CONFLICT" clause type of query
*
* This is needed in both parsenodes.h and plannodes.h, so put it here...
*/
typedef enum OnConflictAction
{
ONCONFLICT_NONE, /* No "ON CONFLICT" clause */
ONCONFLICT_NOTHING, /* ON CONFLICT ... DO NOTHING */
ONCONFLICT_UPDATE /* ON CONFLICT ... DO UPDATE */
} OnConflictAction;
8、MemoryContext
typedef struct MemoryContextData
{
NodeTag type; /* identifies exact kind of context */
/* these two fields are placed here to minimize alignment wastage: */
bool isReset; /* T = no space alloced since last reset */
bool allowInCritSection; /* allow palloc in critical section */
const MemoryContextMethods *methods; /* virtual function table */
MemoryContext parent; /* NULL if no parent (toplevel context) */
MemoryContext firstchild; /* head of linked list of children */
MemoryContext prevchild; /* previous child of same parent */
MemoryContext nextchild; /* next child of same parent */
const char *name; /* context name (just for debugging) */
const char *ident; /* context ID if any (just for debugging) */
MemoryContextCallback *reset_cbs; /* list of reset/delete callbacks */
} MemoryContextData;
/* utils/palloc.h contains typedef struct MemoryContextData *MemoryContext */
/*
* Type MemoryContextData is declared in nodes/memnodes.h. Most users
* of memory allocation should just treat it as an abstract type, so we
* do not provide the struct contents here.
*/
typedef struct MemoryContextData *MemoryContext;
依赖的函数
1、ExecMaterializeSlot
/* --------------------------------
* ExecMaterializeSlot
* Force a slot into the "materialized" state.
*
* This causes the slot's tuple to be a local copy not dependent on
* any external storage. A pointer to the contained tuple is returned.
*
* A typical use for this operation is to prepare a computed tuple
* for being stored on disk. The original data may or may not be
* virtual, but in any case we need a private copy for heap_insert
* to scribble on.
* --------------------------------
*/
HeapTuple
ExecMaterializeSlot(TupleTableSlot *slot)
{
MemoryContext oldContext;
/*
* sanity checks
*/
Assert(slot != NULL);
Assert(!slot->tts_isempty);
/*
* If we have a regular physical tuple, and it's locally palloc'd, we have
* nothing to do.
*/
if (slot->tts_tuple && slot->tts_shouldFree)
return slot->tts_tuple;
/*
* Otherwise, copy or build a physical tuple, and store it into the slot.
*
* We may be called in a context that is shorter-lived than the tuple
* slot, but we have to ensure that the materialized tuple will survive
* anyway.
*/
oldContext = MemoryContextSwitchTo(slot->tts_mcxt);//内存上下文切换至slot->tts_mcxt
slot->tts_tuple = ExecCopySlotTuple(slot);
slot->tts_shouldFree = true;
MemoryContextSwitchTo(oldContext);//内存上下文切换回原来
/*
* Drop the pin on the referenced buffer, if there is one.
*/
if (BufferIsValid(slot->tts_buffer))
ReleaseBuffer(slot->tts_buffer);
slot->tts_buffer = InvalidBuffer;
/*
* Mark extracted state invalid. This is important because the slot is
* not supposed to depend any more on the previous external data; we
* mustn't leave any dangling pass-by-reference datums in tts_values.
* However, we have not actually invalidated any such datums, if there
* happen to be any previously fetched from the slot. (Note in particular
* that we have not pfree'd tts_mintuple, if there is one.)
*/
slot->tts_nvalid = 0;
/*
* On the same principle of not depending on previous remote storage,
* forget the mintuple if it's not local storage. (If it is local
* storage, we must not pfree it now, since callers might have already
* fetched datum pointers referencing it.)
*/
if (!slot->tts_shouldFreeMin)
slot->tts_mintuple = NULL;
return slot->tts_tuple;
}
#ifndef FRONTEND
static inline MemoryContext
MemoryContextSwitchTo(MemoryContext context)
{
MemoryContext old = CurrentMemoryContext;
CurrentMemoryContext = context;
return old;
}
#endif /* FRONTEND */
2、HeapTupleSetOid
#define HeapTupleSetOid(tuple, oid) \
HeapTupleHeaderSetOid((tuple)->t_data, (oid))
3、ExecBRInsertTriggers
TupleTableSlot *
ExecBRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
TupleTableSlot *slot)
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
HeapTuple slottuple = ExecMaterializeSlot(slot);
HeapTuple newtuple = slottuple;
HeapTuple oldtuple;
TriggerData LocTriggerData;
int i;
LocTriggerData.type = T_TriggerData;
LocTriggerData.tg_event = TRIGGER_EVENT_INSERT |
TRIGGER_EVENT_ROW |
TRIGGER_EVENT_BEFORE;
LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
LocTriggerData.tg_newtuple = NULL;
LocTriggerData.tg_oldtable = NULL;
LocTriggerData.tg_newtable = NULL;
LocTriggerData.tg_newtuplebuf = InvalidBuffer;
for (i = 0; i < trigdesc->numtriggers; i++)
{
Trigger *trigger = &trigdesc->triggers[i];
if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
TRIGGER_TYPE_ROW,
TRIGGER_TYPE_BEFORE,
TRIGGER_TYPE_INSERT))
continue;
if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
NULL, NULL, newtuple))
continue;
LocTriggerData.tg_trigtuple = oldtuple = newtuple;
LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
LocTriggerData.tg_trigger = trigger;
newtuple = ExecCallTriggerFunc(&LocTriggerData,
i,
relinfo->ri_TrigFunctions,
relinfo->ri_TrigInstrument,
GetPerTupleMemoryContext(estate));
if (oldtuple != newtuple && oldtuple != slottuple)
heap_freetuple(oldtuple);
if (newtuple == NULL)
return NULL; /* "do nothing" */
}
if (newtuple != slottuple)
{
/*
* Return the modified tuple using the es_trig_tuple_slot. We assume
* the tuple was allocated in per-tuple memory context, and therefore
* will go away by itself. The tuple table slot should not try to
* clear it.
*/
TupleTableSlot *newslot = estate->es_trig_tuple_slot;
TupleDesc tupdesc = RelationGetDescr(relinfo->ri_RelationDesc);
if (newslot->tts_tupleDescriptor != tupdesc)
ExecSetSlotDescriptor(newslot, tupdesc);
ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
slot = newslot;
}
return slot;
}
4、ExecIRInsertTriggers
TupleTableSlot *
ExecIRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
TupleTableSlot *slot)
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
HeapTuple slottuple = ExecMaterializeSlot(slot);
HeapTuple newtuple = slottuple;
HeapTuple oldtuple;
TriggerData LocTriggerData;
int i;
LocTriggerData.type = T_TriggerData;
LocTriggerData.tg_event = TRIGGER_EVENT_INSERT |
TRIGGER_EVENT_ROW |
TRIGGER_EVENT_INSTEAD;
LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
LocTriggerData.tg_newtuple = NULL;
LocTriggerData.tg_oldtable = NULL;
LocTriggerData.tg_newtable = NULL;
LocTriggerData.tg_newtuplebuf = InvalidBuffer;
for (i = 0; i < trigdesc->numtriggers; i++)
{
Trigger *trigger = &trigdesc->triggers[i];
if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
TRIGGER_TYPE_ROW,
TRIGGER_TYPE_INSTEAD,
TRIGGER_TYPE_INSERT))
continue;
if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
NULL, NULL, newtuple))
continue;
LocTriggerData.tg_trigtuple = oldtuple = newtuple;
LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
LocTriggerData.tg_trigger = trigger;
newtuple = ExecCallTriggerFunc(&LocTriggerData,
i,
relinfo->ri_TrigFunctions,
relinfo->ri_TrigInstrument,
GetPerTupleMemoryContext(estate));
if (oldtuple != newtuple && oldtuple != slottuple)
heap_freetuple(oldtuple);
if (newtuple == NULL)
return NULL; /* "do nothing" */
}
if (newtuple != slottuple)
{
/*
* Return the modified tuple using the es_trig_tuple_slot. We assume
* the tuple was allocated in per-tuple memory context, and therefore
* will go away by itself. The tuple table slot should not try to
* clear it.
*/
TupleTableSlot *newslot = estate->es_trig_tuple_slot;
TupleDesc tupdesc = RelationGetDescr(relinfo->ri_RelationDesc);
if (newslot->tts_tupleDescriptor != tupdesc)
ExecSetSlotDescriptor(newslot, tupdesc);
ExecStoreTuple(newtuple, newslot, InvalidBuffer, false);
slot = newslot;
}
return slot;
}
5、ExecForeignInsert
-- 函数指针
typedef TupleTableSlot *(*ExecForeignInsert_function) (EState *estate,
ResultRelInfo *rinfo,
TupleTableSlot *slot,
TupleTableSlot *planSlot);
ExecForeignInsert_function ExecForeignInsert;
6、RelationGetRelid
/*
* RelationGetRelid
* Returns the OID of the relation
*/
#define RelationGetRelid(relation) ((relation)->rd_id)
7、ExecWithCheckOptions
/*
* ExecWithCheckOptions -- check that tuple satisfies any WITH CHECK OPTIONs
* of the specified kind.
*
* Note that this needs to be called multiple times to ensure that all kinds of
* WITH CHECK OPTIONs are handled (both those from views which have the WITH
* CHECK OPTION set and from row level security policies). See ExecInsert()
* and ExecUpdate().
*/
void
ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo,
TupleTableSlot *slot, EState *estate)
{
Relation rel = resultRelInfo->ri_RelationDesc;
TupleDesc tupdesc = RelationGetDescr(rel);
ExprContext *econtext;
ListCell *l1,
*l2;
/*
* We will use the EState's per-tuple context for evaluating constraint
* expressions (creating it if it's not already there).
*/
econtext = GetPerTupleExprContext(estate);
/* Arrange for econtext's scan tuple to be the tuple under test */
econtext->ecxt_scantuple = slot;
/* Check each of the constraints */
forboth(l1, resultRelInfo->ri_WithCheckOptions,
l2, resultRelInfo->ri_WithCheckOptionExprs)
{
WithCheckOption *wco = (WithCheckOption *) lfirst(l1);
ExprState *wcoExpr = (ExprState *) lfirst(l2);
/*
* Skip any WCOs which are not the kind we are looking for at this
* time.
*/
if (wco->kind != kind)
continue;
/*
* WITH CHECK OPTION checks are intended to ensure that the new tuple
* is visible (in the case of a view) or that it passes the
* 'with-check' policy (in the case of row security). If the qual
* evaluates to NULL or FALSE, then the new tuple won't be included in
* the view or doesn't pass the 'with-check' policy for the table.
*/
if (!ExecQual(wcoExpr, econtext))
{
char *val_desc;
Bitmapset *modifiedCols;
Bitmapset *insertedCols;
Bitmapset *updatedCols;
switch (wco->kind)
{
/*
* For WITH CHECK OPTIONs coming from views, we might be
* able to provide the details on the row, depending on
* the permissions on the relation (that is, if the user
* could view it directly anyway). For RLS violations, we
* don't include the data since we don't know if the user
* should be able to view the tuple as that depends on the
* USING policy.
*/
case WCO_VIEW_CHECK:
/* See the comment in ExecConstraints(). */
if (resultRelInfo->ri_PartitionRoot)
{
HeapTuple tuple = ExecFetchSlotTuple(slot);
TupleDesc old_tupdesc = RelationGetDescr(rel);
TupleConversionMap *map;
rel = resultRelInfo->ri_PartitionRoot;
tupdesc = RelationGetDescr(rel);
/* a reverse map */
map = convert_tuples_by_name(old_tupdesc, tupdesc,
gettext_noop("could not convert row type"));
if (map != NULL)
{
tuple = do_convert_tuple(tuple, map);
ExecSetSlotDescriptor(slot, tupdesc);
ExecStoreTuple(tuple, slot, InvalidBuffer, false);
}
}
insertedCols = GetInsertedColumns(resultRelInfo, estate);
updatedCols = GetUpdatedColumns(resultRelInfo, estate);
modifiedCols = bms_union(insertedCols, updatedCols);
val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
slot,
tupdesc,
modifiedCols,
64);
ereport(ERROR,
(errcode(ERRCODE_WITH_CHECK_OPTION_VIOLATION),
errmsg("new row violates check option for view \"%s\"",
wco->relname),
val_desc ? errdetail("Failing row contains %s.",
val_desc) : 0));
break;
case WCO_RLS_INSERT_CHECK:
case WCO_RLS_UPDATE_CHECK:
if (wco->polname != NULL)
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("new row violates row-level security policy \"%s\" for table \"%s\"",
wco->polname, wco->relname)));
else
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("new row violates row-level security policy for table \"%s\"",
wco->relname)));
break;
case WCO_RLS_CONFLICT_CHECK:
if (wco->polname != NULL)
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("new row violates row-level security policy \"%s\" (USING expression) for table \"%s\"",
wco->polname, wco->relname)));
else
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("new row violates row-level security policy (USING expression) for table \"%s\"",
wco->relname)));
break;
default:
elog(ERROR, "unrecognized WCO kind: %u", wco->kind);
break;
}
}
}
}
8、ExecConstraints
/*
* ExecConstraints - check constraints of the tuple in 'slot'
*
* This checks the traditional NOT NULL and check constraints.
*
* The partition constraint is *NOT* checked.
*
* Note: 'slot' contains the tuple to check the constraints of, which may
* have been converted from the original input tuple after tuple routing.
* 'resultRelInfo' is the final result relation, after tuple routing.
*/
void
ExecConstraints(ResultRelInfo *resultRelInfo,
TupleTableSlot *slot, EState *estate)
{
Relation rel = resultRelInfo->ri_RelationDesc;
TupleDesc tupdesc = RelationGetDescr(rel);
TupleConstr *constr = tupdesc->constr;
Bitmapset *modifiedCols;
Bitmapset *insertedCols;
Bitmapset *updatedCols;
Assert(constr || resultRelInfo->ri_PartitionCheck);
if (constr && constr->has_not_null)
{
int natts = tupdesc->natts;
int attrChk;
for (attrChk = 1; attrChk <= natts; attrChk++)
{
Form_pg_attribute att = TupleDescAttr(tupdesc, attrChk - 1);
if (att->attnotnull && slot_attisnull(slot, attrChk))
{
char *val_desc;
Relation orig_rel = rel;
TupleDesc orig_tupdesc = RelationGetDescr(rel);
/*
* If the tuple has been routed, it's been converted to the
* partition's rowtype, which might differ from the root
* table's. We must convert it back to the root table's
* rowtype so that val_desc shown error message matches the
* input tuple.
*/
if (resultRelInfo->ri_PartitionRoot)
{
HeapTuple tuple = ExecFetchSlotTuple(slot);
TupleConversionMap *map;
rel = resultRelInfo->ri_PartitionRoot;
tupdesc = RelationGetDescr(rel);
/* a reverse map */
map = convert_tuples_by_name(orig_tupdesc, tupdesc,
gettext_noop("could not convert row type"));
if (map != NULL)
{
tuple = do_convert_tuple(tuple, map);
ExecSetSlotDescriptor(slot, tupdesc);
ExecStoreTuple(tuple, slot, InvalidBuffer, false);
}
}
insertedCols = GetInsertedColumns(resultRelInfo, estate);
updatedCols = GetUpdatedColumns(resultRelInfo, estate);
modifiedCols = bms_union(insertedCols, updatedCols);
val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
slot,
tupdesc,
modifiedCols,
64);
ereport(ERROR,
(errcode(ERRCODE_NOT_NULL_VIOLATION),
errmsg("null value in column \"%s\" violates not-null constraint",
NameStr(att->attname)),
val_desc ? errdetail("Failing row contains %s.", val_desc) : 0,
errtablecol(orig_rel, attrChk)));
}
}
}
if (constr && constr->num_check > 0)
{
const char *failed;
if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL)
{
char *val_desc;
Relation orig_rel = rel;
/* See the comment above. */
if (resultRelInfo->ri_PartitionRoot)
{
HeapTuple tuple = ExecFetchSlotTuple(slot);
TupleDesc old_tupdesc = RelationGetDescr(rel);
TupleConversionMap *map;
rel = resultRelInfo->ri_PartitionRoot;
tupdesc = RelationGetDescr(rel);
/* a reverse map */
map = convert_tuples_by_name(old_tupdesc, tupdesc,
gettext_noop("could not convert row type"));
if (map != NULL)
{
tuple = do_convert_tuple(tuple, map);
ExecSetSlotDescriptor(slot, tupdesc);
ExecStoreTuple(tuple, slot, InvalidBuffer, false);
}
}
insertedCols = GetInsertedColumns(resultRelInfo, estate);
updatedCols = GetUpdatedColumns(resultRelInfo, estate);
modifiedCols = bms_union(insertedCols, updatedCols);
val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
slot,
tupdesc,
modifiedCols,
64);
ereport(ERROR,
(errcode(ERRCODE_CHECK_VIOLATION),
errmsg("new row for relation \"%s\" violates check constraint \"%s\"",
RelationGetRelationName(orig_rel), failed),
val_desc ? errdetail("Failing row contains %s.", val_desc) : 0,
errtableconstraint(orig_rel, failed)));
}
}
}
9、ExecPartitionCheck
/*
* ExecPartitionCheck --- check that tuple meets the partition constraint.
*
* Returns true if it meets the partition constraint. If the constraint
* fails and we're asked to emit to error, do so and don't return; otherwise
* return false.
*/
bool
ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
EState *estate, bool emitError)
{
ExprContext *econtext;
bool success;
/*
* If first time through, build expression state tree for the partition
* check expression. Keep it in the per-query memory context so they'll
* survive throughout the query.
*/
if (resultRelInfo->ri_PartitionCheckExpr == NULL)
{
List *qual = resultRelInfo->ri_PartitionCheck;
resultRelInfo->ri_PartitionCheckExpr = ExecPrepareCheck(qual, estate);
}
/*
* We will use the EState's per-tuple context for evaluating constraint
* expressions (creating it if it's not already there).
*/
econtext = GetPerTupleExprContext(estate);
/* Arrange for econtext's scan tuple to be the tuple under test */
econtext->ecxt_scantuple = slot;
/*
* As in case of the catalogued constraints, we treat a NULL result as
* success here, not a failure.
*/
success = ExecCheck(resultRelInfo->ri_PartitionCheckExpr, econtext);
/* if asked to emit error, don't actually return on failure */
if (!success && emitError)
ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
return success;
}
10、ExecCheckIndexConstraints
/* ----------------------------------------------------------------
* ExecCheckIndexConstraints
*
* This routine checks if a tuple violates any unique or
* exclusion constraints. Returns true if there is no conflict.
* Otherwise returns false, and the TID of the conflicting
* tuple is returned in *conflictTid.
*
* If 'arbiterIndexes' is given, only those indexes are checked.
* NIL means all indexes.
*
* Note that this doesn't lock the values in any way, so it's
* possible that a conflicting tuple is inserted immediately
* after this returns. But this can be used for a pre-check
* before insertion.
* ----------------------------------------------------------------
*/
bool
ExecCheckIndexConstraints(TupleTableSlot *slot,
EState *estate, ItemPointer conflictTid,
List *arbiterIndexes)
{
ResultRelInfo *resultRelInfo;
int i;
int numIndices;
RelationPtr relationDescs;
Relation heapRelation;
IndexInfo **indexInfoArray;
ExprContext *econtext;
Datum values[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
ItemPointerData invalidItemPtr;
bool checkedIndex = false;
ItemPointerSetInvalid(conflictTid);
ItemPointerSetInvalid(&invalidItemPtr);
/*
* Get information from the result relation info structure.
*/
resultRelInfo = estate->es_result_relation_info;
numIndices = resultRelInfo->ri_NumIndices;
relationDescs = resultRelInfo->ri_IndexRelationDescs;
indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
heapRelation = resultRelInfo->ri_RelationDesc;
/*
* We will use the EState's per-tuple context for evaluating predicates
* and index expressions (creating it if it's not already there).
*/
econtext = GetPerTupleExprContext(estate);
/* Arrange for econtext's scan tuple to be the tuple under test */
econtext->ecxt_scantuple = slot;
/*
* For each index, form index tuple and check if it satisfies the
* constraint.
*/
for (i = 0; i < numIndices; i++)
{
Relation indexRelation = relationDescs[i];
IndexInfo *indexInfo;
bool satisfiesConstraint;
if (indexRelation == NULL)
continue;
indexInfo = indexInfoArray[i];
if (!indexInfo->ii_Unique && !indexInfo->ii_ExclusionOps)
continue;
/* If the index is marked as read-only, ignore it */
if (!indexInfo->ii_ReadyForInserts)
continue;
/* When specific arbiter indexes requested, only examine them */
if (arbiterIndexes != NIL &&
!list_member_oid(arbiterIndexes,
indexRelation->rd_index->indexrelid))
continue;
if (!indexRelation->rd_index->indimmediate)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("ON CONFLICT does not support deferrable unique constraints/exclusion constraints as arbiters"),
errtableconstraint(heapRelation,
RelationGetRelationName(indexRelation))));
checkedIndex = true;
/* Check for partial index */
if (indexInfo->ii_Predicate != NIL)
{
ExprState *predicate;
/*
* If predicate state not set up yet, create it (in the estate's
* per-query context)
*/
predicate = indexInfo->ii_PredicateState;
if (predicate == NULL)
{
predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
indexInfo->ii_PredicateState = predicate;
}
/* Skip this index-update if the predicate isn't satisfied */
if (!ExecQual(predicate, econtext))
continue;
}
/*
* FormIndexDatum fills in its values and isnull parameters with the
* appropriate values for the column(s) of the index.
*/
FormIndexDatum(indexInfo,
slot,
estate,
values,
isnull);
satisfiesConstraint =
check_exclusion_or_unique_constraint(heapRelation, indexRelation,
indexInfo, &invalidItemPtr,
values, isnull, estate, false,
CEOUC_WAIT, true,
conflictTid);
if (!satisfiesConstraint)
return false;
}
if (arbiterIndexes != NIL && !checkedIndex)
elog(ERROR, "unexpected failure to find arbiter index");
return true;
}
11、ExecOnConflictUpdate
/*
* ExecOnConflictUpdate --- execute UPDATE of INSERT ON CONFLICT DO UPDATE
*
* Try to lock tuple for update as part of speculative insertion. If
* a qual originating from ON CONFLICT DO UPDATE is satisfied, update
* (but still lock row, even though it may not satisfy estate's
* snapshot).
*
* Returns true if we're done (with or without an update), or false if
* the caller must retry the INSERT from scratch.
*/
static bool
ExecOnConflictUpdate(ModifyTableState *mtstate,
ResultRelInfo *resultRelInfo,
ItemPointer conflictTid,
TupleTableSlot *planSlot,
TupleTableSlot *excludedSlot,
EState *estate,
bool canSetTag,
TupleTableSlot **returning)
{
ExprContext *econtext = mtstate->ps.ps_ExprContext;
Relation relation = resultRelInfo->ri_RelationDesc;
ExprState *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
HeapTupleData tuple;
HeapUpdateFailureData hufd;
LockTupleMode lockmode;
HTSU_Result test;
Buffer buffer;
/* Determine lock mode to use */
lockmode = ExecUpdateLockMode(estate, resultRelInfo);
/*
* Lock tuple for update. Don't follow updates when tuple cannot be
* locked without doing so. A row locking conflict here means our
* previous conclusion that the tuple is conclusively committed is not
* true anymore.
*/
tuple.t_self = *conflictTid;
test = heap_lock_tuple(relation, &tuple, estate->es_output_cid,
lockmode, LockWaitBlock, false, &buffer,
&hufd);
switch (test)
{
case HeapTupleMayBeUpdated:
/* success! */
break;
case HeapTupleInvisible:
/*
* This can occur when a just inserted tuple is updated again in
* the same command. E.g. because multiple rows with the same
* conflicting key values are inserted.
*
* This is somewhat similar to the ExecUpdate()
* HeapTupleSelfUpdated case. We do not want to proceed because
* it would lead to the same row being updated a second time in
* some unspecified order, and in contrast to plain UPDATEs
* there's no historical behavior to break.
*
* It is the user's responsibility to prevent this situation from
* occurring. These problems are why SQL-2003 similarly specifies
* that for SQL MERGE, an exception must be raised in the event of
* an attempt to update the same row twice.
*/
if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple.t_data)))
ereport(ERROR,
(errcode(ERRCODE_CARDINALITY_VIOLATION),
errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"),
errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
/* This shouldn't happen */
elog(ERROR, "attempted to lock invisible tuple");
break;
case HeapTupleSelfUpdated:
/*
* This state should never be reached. As a dirty snapshot is used
* to find conflicting tuples, speculative insertion wouldn't have
* seen this row to conflict with.
*/
elog(ERROR, "unexpected self-updated tuple");
break;
case HeapTupleUpdated:
if (IsolationUsesXactSnapshot())
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
/*
* As long as we don't support an UPDATE of INSERT ON CONFLICT for
* a partitioned table we shouldn't reach to a case where tuple to
* be lock is moved to another partition due to concurrent update
* of the partition key.
*/
Assert(!ItemPointerIndicatesMovedPartitions(&hufd.ctid));
/*
* Tell caller to try again from the very start.
*
* It does not make sense to use the usual EvalPlanQual() style
* loop here, as the new version of the row might not conflict
* anymore, or the conflicting tuple has actually been deleted.
*/
ReleaseBuffer(buffer);
return false;
default:
elog(ERROR, "unrecognized heap_lock_tuple status: %u", test);
}
/*
* Success, the tuple is locked.
*
* Reset per-tuple memory context to free any expression evaluation
* storage allocated in the previous cycle.
*/
ResetExprContext(econtext);
/*
* Verify that the tuple is visible to our MVCC snapshot if the current
* isolation level mandates that.
*
* It's not sufficient to rely on the check within ExecUpdate() as e.g.
* CONFLICT ... WHERE clause may prevent us from reaching that.
*
* This means we only ever continue when a new command in the current
* transaction could see the row, even though in READ COMMITTED mode the
* tuple will not be visible according to the current statement's
* snapshot. This is in line with the way UPDATE deals with newer tuple
* versions.
*/
ExecCheckHeapTupleVisible(estate, &tuple, buffer);
/* Store target's existing tuple in the state's dedicated slot */
ExecStoreTuple(&tuple, mtstate->mt_existing, buffer, false);
/*
* Make tuple and any needed join variables available to ExecQual and
* ExecProject. The EXCLUDED tuple is installed in ecxt_innertuple, while
* the target's existing tuple is installed in the scantuple. EXCLUDED
* has been made to reference INNER_VAR in setrefs.c, but there is no
* other redirection.
*/
econtext->ecxt_scantuple = mtstate->mt_existing;
econtext->ecxt_innertuple = excludedSlot;
econtext->ecxt_outertuple = NULL;
if (!ExecQual(onConflictSetWhere, econtext))
{
ReleaseBuffer(buffer);
InstrCountFiltered1(&mtstate->ps, 1);
return true; /* done with the tuple */
}
if (resultRelInfo->ri_WithCheckOptions != NIL)
{
/*
* Check target's existing tuple against UPDATE-applicable USING
* security barrier quals (if any), enforced here as RLS checks/WCOs.
*
* The rewriter creates UPDATE RLS checks/WCOs for UPDATE security
* quals, and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK,
* but that's almost the extent of its special handling for ON
* CONFLICT DO UPDATE.
*
* The rewriter will also have associated UPDATE applicable straight
* RLS checks/WCOs for the benefit of the ExecUpdate() call that
* follows. INSERTs and UPDATEs naturally have mutually exclusive WCO
* kinds, so there is no danger of spurious over-enforcement in the
* INSERT or UPDATE path.
*/
ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo,
mtstate->mt_existing,
mtstate->ps.state);
}
/* Project the new tuple version */
ExecProject(resultRelInfo->ri_onConflict->oc_ProjInfo);
/*
* Note that it is possible that the target tuple has been modified in
* this session, after the above heap_lock_tuple. We choose to not error
* out in that case, in line with ExecUpdate's treatment of similar cases.
* This can happen if an UPDATE is triggered from within ExecQual(),
* ExecWithCheckOptions() or ExecProject() above, e.g. by selecting from a
* wCTE in the ON CONFLICT's SET.
*/
/* Execute UPDATE with projection */
*returning = ExecUpdate(mtstate, &tuple.t_self, NULL,
mtstate->mt_conflproj, planSlot,
&mtstate->mt_epqstate, mtstate->ps.state,
canSetTag);
ReleaseBuffer(buffer);
return true;
}
12、InstrCountTuples2
/* Macros for inline access to certain instrumentation counters */
#define InstrCountTuples2(node, delta) \
do { \
if (((PlanState *)(node))->instrument) \
((PlanState *)(node))->instrument->ntuples2 += (delta); \
} while (0)
13、ExecCheckTIDVisible
/*
* ExecCheckTIDVisible -- convenience variant of ExecCheckHeapTupleVisible()
*/
static void
ExecCheckTIDVisible(EState *estate,
ResultRelInfo *relinfo,
ItemPointer tid)
{
Relation rel = relinfo->ri_RelationDesc;
Buffer buffer;
HeapTupleData tuple;
/* Redundantly check isolation level */
if (!IsolationUsesXactSnapshot())
return;
tuple.t_self = *tid;
if (!heap_fetch(rel, SnapshotAny, &tuple, &buffer, false, NULL))
elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
ExecCheckHeapTupleVisible(estate, &tuple, buffer);
ReleaseBuffer(buffer);
}
14、SpeculativeInsertionLockAcquire
/*
* SpeculativeInsertionLockAcquire
*
* Insert a lock showing that the given transaction ID is inserting a tuple,
* but hasn't yet decided whether it's going to keep it. The lock can then be
* used to wait for the decision to go ahead with the insertion, or aborting
* it.
*
* The token is used to distinguish multiple insertions by the same
* transaction. It is returned to caller.
*/
uint32
SpeculativeInsertionLockAcquire(TransactionId xid)
{
LOCKTAG tag;
speculativeInsertionToken++;
/*
* Check for wrap-around. Zero means no token is held, so don't use that.
*/
if (speculativeInsertionToken == 0)
speculativeInsertionToken = 1;
SET_LOCKTAG_SPECULATIVE_INSERTION(tag, xid, speculativeInsertionToken);
(void) LockAcquire(&tag, ExclusiveLock, false, false);
return speculativeInsertionToken;
}
15、HeapTupleHeaderSetSpeculativeToken
#define HeapTupleHeaderSetSpeculativeToken(tup, token) \
( \
ItemPointerSet(&(tup)->t_ctid, token, SpecTokenOffsetNumber) \
)
16、heap_insert
//上一节已介绍
17、ExecInsertIndexTuples
/* ----------------------------------------------------------------
* ExecInsertIndexTuples
*
* This routine takes care of inserting index tuples
* into all the relations indexing the result relation
* when a heap tuple is inserted into the result relation.
*
* Unique and exclusion constraints are enforced at the same
* time. This returns a list of index OIDs for any unique or
* exclusion constraints that are deferred and that had
* potential (unconfirmed) conflicts. (if noDupErr == true,
* the same is done for non-deferred constraints, but report
* if conflict was speculative or deferred conflict to caller)
*
* If 'arbiterIndexes' is nonempty, noDupErr applies only to
* those indexes. NIL means noDupErr applies to all indexes.
*
* CAUTION: this must not be called for a HOT update.
* We can't defend against that here for lack of info.
* Should we change the API to make it safer?
* ----------------------------------------------------------------
*/
List *
ExecInsertIndexTuples(TupleTableSlot *slot,
ItemPointer tupleid,
EState *estate,
bool noDupErr,
bool *specConflict,
List *arbiterIndexes)
{
List *result = NIL;
ResultRelInfo *resultRelInfo;
int i;
int numIndices;
RelationPtr relationDescs;
Relation heapRelation;
IndexInfo **indexInfoArray;
ExprContext *econtext;
Datum values[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
/*
* Get information from the result relation info structure.
*/
resultRelInfo = estate->es_result_relation_info;
numIndices = resultRelInfo->ri_NumIndices;
relationDescs = resultRelInfo->ri_IndexRelationDescs;
indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
heapRelation = resultRelInfo->ri_RelationDesc;
/*
* We will use the EState's per-tuple context for evaluating predicates
* and index expressions (creating it if it's not already there).
*/
econtext = GetPerTupleExprContext(estate);
/* Arrange for econtext's scan tuple to be the tuple under test */
econtext->ecxt_scantuple = slot;
/*
* for each index, form and insert the index tuple
*/
for (i = 0; i < numIndices; i++)
{
Relation indexRelation = relationDescs[i];
IndexInfo *indexInfo;
bool applyNoDupErr;
IndexUniqueCheck checkUnique;
bool satisfiesConstraint;
if (indexRelation == NULL)
continue;
indexInfo = indexInfoArray[i];
/* If the index is marked as read-only, ignore it */
if (!indexInfo->ii_ReadyForInserts)
continue;
/* Check for partial index */
if (indexInfo->ii_Predicate != NIL)
{
ExprState *predicate;
/*
* If predicate state not set up yet, create it (in the estate's
* per-query context)
*/
predicate = indexInfo->ii_PredicateState;
if (predicate == NULL)
{
predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
indexInfo->ii_PredicateState = predicate;
}
/* Skip this index-update if the predicate isn't satisfied */
if (!ExecQual(predicate, econtext))
continue;
}
/*
* FormIndexDatum fills in its values and isnull parameters with the
* appropriate values for the column(s) of the index.
*/
FormIndexDatum(indexInfo,
slot,
estate,
values,
isnull);
/* Check whether to apply noDupErr to this index */
applyNoDupErr = noDupErr &&
(arbiterIndexes == NIL ||
list_member_oid(arbiterIndexes,
indexRelation->rd_index->indexrelid));
/*
* The index AM does the actual insertion, plus uniqueness checking.
*
* For an immediate-mode unique index, we just tell the index AM to
* throw error if not unique.
*
* For a deferrable unique index, we tell the index AM to just detect
* possible non-uniqueness, and we add the index OID to the result
* list if further checking is needed.
*
* For a speculative insertion (used by INSERT ... ON CONFLICT), do
* the same as for a deferrable unique index.
*/
if (!indexRelation->rd_index->indisunique)
checkUnique = UNIQUE_CHECK_NO;
else if (applyNoDupErr)
checkUnique = UNIQUE_CHECK_PARTIAL;
else if (indexRelation->rd_index->indimmediate)
checkUnique = UNIQUE_CHECK_YES;
else
checkUnique = UNIQUE_CHECK_PARTIAL;
satisfiesConstraint =
index_insert(indexRelation, /* index relation */
values, /* array of index Datums */
isnull, /* null flags */
tupleid, /* tid of heap tuple */
heapRelation, /* heap relation */
checkUnique, /* type of uniqueness check to do */
indexInfo); /* index AM may need this */
/*
* If the index has an associated exclusion constraint, check that.
* This is simpler than the process for uniqueness checks since we
* always insert first and then check. If the constraint is deferred,
* we check now anyway, but don't throw error on violation or wait for
* a conclusive outcome from a concurrent insertion; instead we'll
* queue a recheck event. Similarly, noDupErr callers (speculative
* inserters) will recheck later, and wait for a conclusive outcome
* then.
*
* An index for an exclusion constraint can't also be UNIQUE (not an
* essential property, we just don't allow it in the grammar), so no
* need to preserve the prior state of satisfiesConstraint.
*/
if (indexInfo->ii_ExclusionOps != NULL)
{
bool violationOK;
CEOUC_WAIT_MODE waitMode;
if (applyNoDupErr)
{
violationOK = true;
waitMode = CEOUC_LIVELOCK_PREVENTING_WAIT;
}
else if (!indexRelation->rd_index->indimmediate)
{
violationOK = true;
waitMode = CEOUC_NOWAIT;
}
else
{
violationOK = false;
waitMode = CEOUC_WAIT;
}
satisfiesConstraint =
check_exclusion_or_unique_constraint(heapRelation,
indexRelation, indexInfo,
tupleid, values, isnull,
estate, false,
waitMode, violationOK, NULL);
}
if ((checkUnique == UNIQUE_CHECK_PARTIAL ||
indexInfo->ii_ExclusionOps != NULL) &&
!satisfiesConstraint)
{
/*
* The tuple potentially violates the uniqueness or exclusion
* constraint, so make a note of the index so that we can re-check
* it later. Speculative inserters are told if there was a
* speculative conflict, since that always requires a restart.
*/
result = lappend_oid(result, RelationGetRelid(indexRelation));
if (indexRelation->rd_index->indimmediate && specConflict)
*specConflict = true;
}
}
return result;
}
18、heap_finish_speculative
/*
* heap_finish_speculative - mark speculative insertion as successful
*
* To successfully finish a speculative insertion we have to clear speculative
* token from tuple. To do so the t_ctid field, which will contain a
* speculative token value, is modified in place to point to the tuple itself,
* which is characteristic of a newly inserted ordinary tuple.
*
* NB: It is not ok to commit without either finishing or aborting a
* speculative insertion. We could treat speculative tuples of committed
* transactions implicitly as completed, but then we would have to be prepared
* to deal with speculative tokens on committed tuples. That wouldn't be
* difficult - no-one looks at the ctid field of a tuple with invalid xmax -
* but clearing the token at completion isn't very expensive either.
* An explicit confirmation WAL record also makes logical decoding simpler.
*/
void
heap_finish_speculative(Relation relation, HeapTuple tuple)
{
Buffer buffer;
Page page;
OffsetNumber offnum;
ItemId lp = NULL;
HeapTupleHeader htup;
buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&(tuple->t_self)));
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
page = (Page) BufferGetPage(buffer);
offnum = ItemPointerGetOffsetNumber(&(tuple->t_self));
if (PageGetMaxOffsetNumber(page) >= offnum)
lp = PageGetItemId(page, offnum);
if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
elog(ERROR, "invalid lp");
htup = (HeapTupleHeader) PageGetItem(page, lp);
/* SpecTokenOffsetNumber should be distinguishable from any real offset */
StaticAssertStmt(MaxOffsetNumber < SpecTokenOffsetNumber,
"invalid speculative token constant");
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
Assert(HeapTupleHeaderIsSpeculative(tuple->t_data));
MarkBufferDirty(buffer);
/*
* Replace the speculative insertion token with a real t_ctid, pointing to
* itself like it does on regular tuples.
*/
htup->t_ctid = tuple->t_self;
/* XLOG stuff */
if (RelationNeedsWAL(relation))
{
xl_heap_confirm xlrec;
XLogRecPtr recptr;
xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
XLogBeginInsert();
/* We want the same filtering on this as on a plain insert */
XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm);
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM);
PageSetLSN(page, recptr);
}
END_CRIT_SECTION();
UnlockReleaseBuffer(buffer);
}
19、heap_abort_speculative
/*
* heap_abort_speculative - kill a speculatively inserted tuple
*
* Marks a tuple that was speculatively inserted in the same command as dead,
* by setting its xmin as invalid. That makes it immediately appear as dead
* to all transactions, including our own. In particular, it makes
* HeapTupleSatisfiesDirty() regard the tuple as dead, so that another backend
* inserting a duplicate key value won't unnecessarily wait for our whole
* transaction to finish (it'll just wait for our speculative insertion to
* finish).
*
* Killing the tuple prevents "unprincipled deadlocks", which are deadlocks
* that arise due to a mutual dependency that is not user visible. By
* definition, unprincipled deadlocks cannot be prevented by the user
* reordering lock acquisition in client code, because the implementation level
* lock acquisitions are not under the user's direct control. If speculative
* inserters did not take this precaution, then under high concurrency they
* could deadlock with each other, which would not be acceptable.
*
* This is somewhat redundant with heap_delete, but we prefer to have a
* dedicated routine with stripped down requirements. Note that this is also
* used to delete the TOAST tuples created during speculative insertion.
*
* This routine does not affect logical decoding as it only looks at
* confirmation records.
*/
void
heap_abort_speculative(Relation relation, HeapTuple tuple)
{
TransactionId xid = GetCurrentTransactionId();
ItemPointer tid = &(tuple->t_self);
ItemId lp;
HeapTupleData tp;
Page page;
BlockNumber block;
Buffer buffer;
Assert(ItemPointerIsValid(tid));
block = ItemPointerGetBlockNumber(tid);
buffer = ReadBuffer(relation, block);
page = BufferGetPage(buffer);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/*
* Page can't be all visible, we just inserted into it, and are still
* running.
*/
Assert(!PageIsAllVisible(page));
lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
Assert(ItemIdIsNormal(lp));
tp.t_tableOid = RelationGetRelid(relation);
tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
tp.t_len = ItemIdGetLength(lp);
tp.t_self = *tid;
/*
* Sanity check that the tuple really is a speculatively inserted tuple,
* inserted by us.
*/
if (tp.t_data->t_choice.t_heap.t_xmin != xid)
elog(ERROR, "attempted to kill a tuple inserted by another transaction");
if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
elog(ERROR, "attempted to kill a non-speculative tuple");
Assert(!HeapTupleHeaderIsHeapOnly(tp.t_data));
/*
* No need to check for serializable conflicts here. There is never a
* need for a combocid, either. No need to extract replica identity, or
* do anything special with infomask bits.
*/
START_CRIT_SECTION();
/*
* The tuple will become DEAD immediately. Flag that this page
* immediately is a candidate for pruning by setting xmin to
* RecentGlobalXmin. That's not pretty, but it doesn't seem worth
* inventing a nicer API for this.
*/
Assert(TransactionIdIsValid(RecentGlobalXmin));
PageSetPrunable(page, RecentGlobalXmin);
/* store transaction information of xact deleting the tuple */
tp.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
tp.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
/*
* Set the tuple header xmin to InvalidTransactionId. This makes the
* tuple immediately invisible everyone. (In particular, to any
* transactions waiting on the speculative token, woken up later.)
*/
HeapTupleHeaderSetXmin(tp.t_data, InvalidTransactionId);
/* Clear the speculative insertion token too */
tp.t_data->t_ctid = tp.t_self;
MarkBufferDirty(buffer);
/*
* XLOG stuff
*
* The WAL records generated here match heap_delete(). The same recovery
* routines are used.
*/
if (RelationNeedsWAL(relation))
{
xl_heap_delete xlrec;
XLogRecPtr recptr;
xlrec.flags = XLH_DELETE_IS_SUPER;
xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
tp.t_data->t_infomask2);
xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
xlrec.xmax = xid;
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
/* No replica identity & replication origin logged */
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
PageSetLSN(page, recptr);
}
END_CRIT_SECTION();
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
if (HeapTupleHasExternal(&tp))
{
Assert(!IsToastRelation(relation));
toast_delete(relation, &tp, true);
}
/*
* Never need to mark tuple for invalidation, since catalogs don't support
* speculative insertion
*/
/* Now we can release the buffer */
ReleaseBuffer(buffer);
/* count deletion, as we counted the insertion too */
pgstat_count_heap_delete(relation);
}
20、SpeculativeInsertionLockRelease
/*
* SpeculativeInsertionLockRelease
*
* Delete the lock showing that the given transaction is speculatively
* inserting a tuple.
*/
void
SpeculativeInsertionLockRelease(TransactionId xid)
{
LOCKTAG tag;
SET_LOCKTAG_SPECULATIVE_INSERTION(tag, xid, speculativeInsertionToken);
LockRelease(&tag, ExclusiveLock, false);
}
21、list_free
/*
* Free all the cells of the list, as well as the list itself. Any
* objects that are pointed-to by the cells of the list are NOT
* free'd.
*
* On return, the argument to this function has been freed, so the
* caller would be wise to set it to NIL for safety's sake.
*/
void
list_free(List *list)
{
list_free_private(list, false);
}
/*
* Free all storage in a list, and optionally the pointed-to elements
*/
static void
list_free_private(List *list, bool deep)
{
ListCell *cell;
check_list_invariants(list);
cell = list_head(list);
while (cell != NULL)
{
ListCell *tmp = cell;
cell = lnext(cell);
if (deep)
pfree(lfirst(tmp));
pfree(tmp);
}
if (list)
pfree(list);
}
22、setLastTid
void
setLastTid(const ItemPointer tid)
{
Current_last_tid = *tid;
}
23、ExecARUpdateTriggers
void
ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
ItemPointer tupleid,
HeapTuple fdw_trigtuple,
HeapTuple newtuple,
List *recheckIndexes,
TransitionCaptureState *transition_capture)
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
if ((trigdesc && trigdesc->trig_update_after_row) ||
(transition_capture &&
(transition_capture->tcs_update_old_table ||
transition_capture->tcs_update_new_table)))
{
HeapTuple trigtuple;
/*
* Note: if the UPDATE is converted into a DELETE+INSERT as part of
* update-partition-key operation, then this function is also called
* separately for DELETE and INSERT to capture transition table rows.
* In such case, either old tuple or new tuple can be NULL.
*/
if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid))
trigtuple = GetTupleForTrigger(estate,
NULL,
relinfo,
tupleid,
LockTupleExclusive,
NULL);
else
trigtuple = fdw_trigtuple;
AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_UPDATE,
true, trigtuple, newtuple, recheckIndexes,
GetUpdatedColumns(relinfo, estate),
transition_capture);
if (trigtuple != fdw_trigtuple)
heap_freetuple(trigtuple);
}
}
24、ExecARInsertTriggers
void
ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
HeapTuple trigtuple, List *recheckIndexes,
TransitionCaptureState *transition_capture)
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
if ((trigdesc && trigdesc->trig_insert_after_row) ||
(transition_capture && transition_capture->tcs_insert_new_table))
AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_INSERT,
true, NULL, trigtuple,
recheckIndexes, NULL,
transition_capture);
}
25、ExecProcessReturning
/*
* ExecProcessReturning --- evaluate a RETURNING list
*
* resultRelInfo: current result rel
* tupleSlot: slot holding tuple actually inserted/updated/deleted
* planSlot: slot holding tuple returned by top subplan node
*
* Note: If tupleSlot is NULL, the FDW should have already provided econtext's
* scan tuple.
*
* Returns a slot holding the result tuple
*/
static TupleTableSlot *
ExecProcessReturning(ResultRelInfo *resultRelInfo,
TupleTableSlot *tupleSlot,
TupleTableSlot *planSlot)
{
ProjectionInfo *projectReturning = resultRelInfo->ri_projectReturning;
ExprContext *econtext = projectReturning->pi_exprContext;
/*
* Reset per-tuple memory context to free any expression evaluation
* storage allocated in the previous cycle.
*/
ResetExprContext(econtext);
/* Make tuple and any needed join variables available to ExecProject */
if (tupleSlot)
econtext->ecxt_scantuple = tupleSlot;
else
{
HeapTuple tuple;
/*
* RETURNING expressions might reference the tableoid column, so
* initialize t_tableOid before evaluating them.
*/
Assert(!TupIsNull(econtext->ecxt_scantuple));
tuple = ExecMaterializeSlot(econtext->ecxt_scantuple);
tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
}
econtext->ecxt_outertuple = planSlot;
/* Compute the RETURNING expressions */
return ExecProject(projectReturning);
}
二、源码解读
这部分源码需要二次解读(2018-8-6 Mark)
/* ----------------------------------------------------------------
* ExecInsert
*
* For INSERT, we have to insert the tuple into the target relation
* and insert appropriate tuples into the index relations.
*
* Returns RETURNING result if any, otherwise NULL.
* ----------------------------------------------------------------
*/
/*
输入:
mtstate-存储“更新”数据表(包括INSERT, UPDATE, or DELETE)时的状态
slot-执行器使用Tuple Table存储Tuples,每个Tuple存储在该Table的一个Tuple Slot中
planSlot-TODO
estate-管理Executor调用的工作状态
canSetTag-TODO
输出:
TupleTableSlot-相应的Tuple的Slot
*/
static TupleTableSlot *
ExecInsert(ModifyTableState *mtstate,
TupleTableSlot *slot,
TupleTableSlot *planSlot,
EState *estate,
bool canSetTag)
{
HeapTuple tuple;//要插入的tuple
ResultRelInfo *resultRelInfo;//执行更新操作时Relation相关的所有信息
Relation resultRelationDesc;//目标Relation信息
Oid newId;//数据表Oid
List *recheckIndexes = NIL;//需要检查的Index的列表
TupleTableSlot *result = NULL;//结果Tuple对应的Slot
TransitionCaptureState *ar_insert_trig_tcs;//TODO
ModifyTable *node = (ModifyTable *) mtstate->ps.plan;//更新表Node
OnConflictAction onconflict = node->onConflictAction;//冲突处理
/*
* get the heap tuple out of the tuple table slot, making sure we have a
* writable copy
*/
tuple = ExecMaterializeSlot(slot);//"物化"Tuple
/*
* get information on the (current) result relation
*/
//获取Relation相关信息
resultRelInfo = estate->es_result_relation_info;
resultRelationDesc = resultRelInfo->ri_RelationDesc;
/*
* If the result relation has OIDs, force the tuple's OID to zero so that
* heap_insert will assign a fresh OID. Usually the OID already will be
* zero at this point, but there are corner cases where the plan tree can
* return a tuple extracted literally from some table with the same
* rowtype.
*
* XXX if we ever wanted to allow users to assign their own OIDs to new
* rows, this'd be the place to do it. For the moment, we make a point of
* doing this before calling triggers, so that a user-supplied trigger
* could hack the OID if desired.
*/
//设置Oid为InvalidOid(0),在heap_insert时会更新Oid
if (resultRelationDesc->rd_rel->relhasoids)
HeapTupleSetOid(tuple, InvalidOid);
/*
* BEFORE ROW INSERT Triggers.
*
* Note: We fire BEFORE ROW TRIGGERS for every attempted insertion in an
* INSERT ... ON CONFLICT statement. We cannot check for constraint
* violations before firing these triggers, because they can change the
* values to insert. Also, they can run arbitrary user-defined code with
* side-effects that we can't cancel by just not inserting the tuple.
*/
//触发器-TODO
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_insert_before_row)
{
slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
if (slot == NULL) /* "do nothing" */
return NULL;
/* trigger might have changed tuple */
tuple = ExecMaterializeSlot(slot);
}
//触发器-TODO
/* INSTEAD OF ROW INSERT Triggers */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
{
slot = ExecIRInsertTriggers(estate, resultRelInfo, slot);
if (slot == NULL) /* "do nothing" */
return NULL;
/* trigger might have changed tuple */
tuple = ExecMaterializeSlot(slot);
newId = InvalidOid;
}
else if (resultRelInfo->ri_FdwRoutine)//FDW-TODO
{
/*
* insert into foreign table: let the FDW do it
*/
slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
resultRelInfo,
slot,
planSlot);
if (slot == NULL) /* "do nothing" */
return NULL;
/* FDW might have changed tuple */
tuple = ExecMaterializeSlot(slot);
/*
* AFTER ROW Triggers or RETURNING expressions might reference the
* tableoid column, so initialize t_tableOid before evaluating them.
*/
tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
newId = InvalidOid;
}
else//正常数据表
{
WCOKind wco_kind;//With Check Option类型
/*
* Constraints might reference the tableoid column, so initialize
* t_tableOid before evaluating them.
*/
//获取RelationID
tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
/*
* Check any RLS WITH CHECK policies.
*
* Normally we should check INSERT policies. But if the insert is the
* result of a partition key update that moved the tuple to a new
* partition, we should instead check UPDATE policies, because we are
* executing policies defined on the target table, and not those
* defined on the child partitions.
*/
//With Check Option类型:Update OR Insert?
wco_kind = (mtstate->operation == CMD_UPDATE) ?
WCO_RLS_UPDATE_CHECK : WCO_RLS_INSERT_CHECK;
/*
* ExecWithCheckOptions() will skip any WCOs which are not of the kind
* we are looking for at this point.
*/
//执行检查
if (resultRelInfo->ri_WithCheckOptions != NIL)
ExecWithCheckOptions(wco_kind, resultRelInfo, slot, estate);
/*
* Check the constraints of the tuple.
*/
//检查约束
if (resultRelationDesc->rd_att->constr)
ExecConstraints(resultRelInfo, slot, estate);
/*
* Also check the tuple against the partition constraint, if there is
* one; except that if we got here via tuple-routing, we don't need to
* if there's no BR trigger defined on the partition.
*/
//分区表
if (resultRelInfo->ri_PartitionCheck &&
(resultRelInfo->ri_PartitionRoot == NULL ||
(resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_insert_before_row)))
ExecPartitionCheck(resultRelInfo, slot, estate, true);
//索引检查
if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
{
/* Perform a speculative insertion. */
uint32 specToken;//Token
ItemPointerData conflictTid;//数据行指针
bool specConflict;//冲突声明
List *arbiterIndexes;//相关索引
arbiterIndexes = resultRelInfo->ri_onConflictArbiterIndexes;
/*
* Do a non-conclusive check for conflicts first.
*
* We're not holding any locks yet, so this doesn't guarantee that
* the later insert won't conflict. But it avoids leaving behind
* a lot of canceled speculative insertions, if you run a lot of
* INSERT ON CONFLICT statements that do conflict.
*
* We loop back here if we find a conflict below, either during
* the pre-check, or when we re-check after inserting the tuple
* speculatively.
*/
vlock:
specConflict = false;
if (!ExecCheckIndexConstraints(slot, estate, &conflictTid,
arbiterIndexes))
{
/* committed conflict tuple found */
if (onconflict == ONCONFLICT_UPDATE)
{
/*
* In case of ON CONFLICT DO UPDATE, execute the UPDATE
* part. Be prepared to retry if the UPDATE fails because
* of another concurrent UPDATE/DELETE to the conflict
* tuple.
*/
TupleTableSlot *returning = NULL;
if (ExecOnConflictUpdate(mtstate, resultRelInfo,
&conflictTid, planSlot, slot,
estate, canSetTag, &returning))
{
InstrCountTuples2(&mtstate->ps, 1);
return returning;
}
else
goto vlock;
}
else
{
/*
* In case of ON CONFLICT DO NOTHING, do nothing. However,
* verify that the tuple is visible to the executor's MVCC
* snapshot at higher isolation levels.
*/
Assert(onconflict == ONCONFLICT_NOTHING);
ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid);
InstrCountTuples2(&mtstate->ps, 1);
return NULL;
}
}
/*
* Before we start insertion proper, acquire our "speculative
* insertion lock". Others can use that to wait for us to decide
* if we're going to go ahead with the insertion, instead of
* waiting for the whole transaction to complete.
*/
specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
/* insert the tuple, with the speculative token */
newId = heap_insert(resultRelationDesc, tuple,
estate->es_output_cid,
HEAP_INSERT_SPECULATIVE,
NULL);
/* insert index entries for tuple */
recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
estate, true, &specConflict,
arbiterIndexes);
/* adjust the tuple's state accordingly */
if (!specConflict)
heap_finish_speculative(resultRelationDesc, tuple);
else
heap_abort_speculative(resultRelationDesc, tuple);
/*
* Wake up anyone waiting for our decision. They will re-check
* the tuple, see that it's no longer speculative, and wait on our
* XID as if this was a regularly inserted tuple all along. Or if
* we killed the tuple, they will see it's dead, and proceed as if
* the tuple never existed.
*/
SpeculativeInsertionLockRelease(GetCurrentTransactionId());
/*
* If there was a conflict, start from the beginning. We'll do
* the pre-check again, which will now find the conflicting tuple
* (unless it aborts before we get there).
*/
if (specConflict)
{
list_free(recheckIndexes);
goto vlock;
}
/* Since there was no insertion conflict, we're done */
}
else//无需Index检查
{
/*
* insert the tuple normally.
*
* Note: heap_insert returns the tid (location) of the new tuple
* in the t_self field.
*/
newId = heap_insert(resultRelationDesc, tuple,
estate->es_output_cid,
0, NULL);//插入数据
/* insert index entries for tuple */
if (resultRelInfo->ri_NumIndices > 0)
recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
estate, false, NULL,
NIL);//索引
}
}
if (canSetTag)
{
(estate->es_processed)++;
estate->es_lastoid = newId;
setLastTid(&(tuple->t_self));
}
/*
* If this insert is the result of a partition key update that moved the
* tuple to a new partition, put this row into the transition NEW TABLE,
* if there is one. We need to do this separately for DELETE and INSERT
* because they happen on different tables.
*/
//触发器-TODO
ar_insert_trig_tcs = mtstate->mt_transition_capture;
if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
&& mtstate->mt_transition_capture->tcs_update_new_table)
{
ExecARUpdateTriggers(estate, resultRelInfo, NULL,
NULL,
tuple,
NULL,
mtstate->mt_transition_capture);
/*
* We've already captured the NEW TABLE row, so make sure any AR
* INSERT trigger fired below doesn't capture it again.
*/
ar_insert_trig_tcs = NULL;
}
/* AFTER ROW INSERT Triggers */
ExecARInsertTriggers(estate, resultRelInfo, tuple, recheckIndexes,
ar_insert_trig_tcs);
list_free(recheckIndexes);
/*
* Check any WITH CHECK OPTION constraints from parent views. We are
* required to do this after testing all constraints and uniqueness
* violations per the SQL spec, so we do it after actually inserting the
* record into the heap and all indexes.
*
* ExecWithCheckOptions will elog(ERROR) if a violation is found, so the
* tuple will never be seen, if it violates the WITH CHECK OPTION.
*
* ExecWithCheckOptions() will skip any WCOs which are not of the kind we
* are looking for at this point.
*/
if (resultRelInfo->ri_WithCheckOptions != NIL)
ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
/* Process RETURNING if present */
if (resultRelInfo->ri_projectReturning)
result = ExecProcessReturning(resultRelInfo, slot, planSlot);
return result;
}
三、跟踪分析
添加主键,插入测试数据:
testdb=# -- 获取pid
testdb=# select pg_backend_pid();
pg_backend_pid
----------------
1527
(1 row)
testdb=#
testdb=# -- 添加主键
testdb=# alter table t_insert add constraint pk_t_insert primary key(id);
ALTER TABLE
testdb=# insert into t_insert values(12,'12','12','12');
(挂起)
启动gdb
[root@localhost ~]# gdb -p 1527
GNU gdb (GDB) Red Hat Enterprise Linux 7.6.1-100.el7
Copyright (C) 2013 Free Software Foundation, Inc.
...
(gdb) b ExecInsert
Breakpoint 1 at 0x6c0227: file nodeModifyTable.c, line 273.
#查看输入参数
(gdb) p *mtstate
$1 = {ps = {type = T_ModifyTableState, plan = 0x1257fa0, state = 0x130bf80, ExecProcNode = 0x6c2485 <ExecModifyTable>, ExecProcNodeReal = 0x6c2485 <ExecModifyTable>, instrument = 0x0,
worker_instrument = 0x0, qual = 0x0, lefttree = 0x0, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0, ps_ResultTupleSlot = 0x130d1e0, ps_ExprContext = 0x0, ps_ProjInfo = 0x0,
scandesc = 0x0}, operation = CMD_INSERT, canSetTag = true, mt_done = false, mt_plans = 0x130c4e0, mt_nplans = 1, mt_whichplan = 0, resultRelInfo = 0x130c1c0, rootResultRelInfo = 0x0,
mt_arowmarks = 0x130c4f8, mt_epqstate = {estate = 0x0, planstate = 0x0, origslot = 0x130c950, plan = 0x132de88, arowMarks = 0x0, epqParam = 0}, fireBSTriggers = false, mt_existing = 0x0,
mt_excludedtlist = 0x0, mt_conflproj = 0x0, mt_partition_tuple_routing = 0x0, mt_transition_capture = 0x0, mt_oc_transition_capture = 0x0, mt_per_subplan_tupconv_maps = 0x0}
(gdb) p *(mtstate->ps->plan)
$2 = {type = T_ModifyTable, startup_cost = 0, total_cost = 0.01, plan_rows = 1, plan_width = 136, parallel_aware = false, parallel_safe = false, plan_node_id = 0, targetlist = 0x0, qual = 0x0,
lefttree = 0x0, righttree = 0x0, initPlan = 0x0, extParam = 0x0, allParam = 0x0}
(gdb) p *(mtstate->ps->state)
$3 = {type = T_EState, es_direction = ForwardScanDirection, es_snapshot = 0x1309fa0, es_crosscheck_snapshot = 0x0, es_range_table = 0x132e1f8, es_plannedstmt = 0x1258420,
es_sourceText = 0x1256ef0 "insert into t_insert values(12,'12','12','12');", es_junkFilter = 0x0, es_output_cid = 0, es_result_relations = 0x130c1c0, es_num_result_relations = 1,
es_result_relation_info = 0x130c1c0, es_root_result_relations = 0x0, es_num_root_result_relations = 0, es_tuple_routing_result_relations = 0x0, es_trig_target_relations = 0x0,
es_trig_tuple_slot = 0x130d290, es_trig_oldtup_slot = 0x0, es_trig_newtup_slot = 0x0, es_param_list_info = 0x0, es_param_exec_vals = 0x130c190, es_queryEnv = 0x0, es_query_cxt = 0x130be70,
es_tupleTable = 0x130c810, es_rowMarks = 0x0, es_processed = 0, es_lastoid = 0, es_top_eflags = 0, es_instrument = 0, es_finished = false, es_exprcontexts = 0x130c7b0, es_subplanstates = 0x0,
es_auxmodifytables = 0x0, es_per_tuple_exprcontext = 0x0, es_epqTuple = 0x0, es_epqTupleSet = 0x0, es_epqScanDone = 0x0, es_use_parallel_mode = false, es_query_dsa = 0x0, es_jit_flags = 0,
es_jit = 0x0}
(gdb) p *(mtstate->ps->ps_ResultTupleSlot)
$4 = {type = T_TupleTableSlot, tts_isempty = true, tts_shouldFree = false, tts_shouldFreeMin = false, tts_slow = false, tts_tuple = 0x0, tts_tupleDescriptor = 0x130d1b0, tts_mcxt = 0x130be70,
tts_buffer = 0, tts_nvalid = 0, tts_values = 0x130d240, tts_isnull = 0x130d240, tts_mintuple = 0x0, tts_minhdr = {t_len = 0, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 0}, ip_posid = 0},
t_tableOid = 0, t_data = 0x0}, tts_off = 0, tts_fixedTupleDescriptor = true}
(gdb) p *(mtstate->ps->ps_ResultTupleSlot->tts_tupleDescriptor)
$5 = {natts = 0, tdtypeid = 2249, tdtypmod = -1, tdhasoid = false, tdrefcount = -1, constr = 0x0, attrs = 0x130d1d0}
(gdb) p *(mtstate->ps->ps_ResultTupleSlot->tts_tupleDescriptor->attrs)
$6 = {attrelid = 128, attname = {
data = "\000\000\000\000p\276\060\001\000\000\000\000\b\000\000\000\001", '\000' <repeats 11 times>, "\260\321\060\001\000\000\000\000p\276\060\001", '\000' <repeats 12 times>, "@\322\060\001\000\000\000\000@\322\060\001"}, atttypid = 0, attstattarget = 0, attlen = 0, attnum = 0, attndims = 0, attcacheoff = 0, atttypmod = 0, attbyval = false, attstorage = 0 '\000', attalign = 0 '\000',
attnotnull = false, atthasdef = false, atthasmissing = false, attidentity = 0 '\000', attisdropped = false, attislocal = false, attinhcount = 0, attcollation = 1}
$7 = (PlanState *) 0x130c560
(gdb) p **(mtstate->mt_plans)
$8 = {type = T_ResultState, plan = 0x132de88, state = 0x130bf80, ExecProcNode = 0x6c5094 <ExecResult>, ExecProcNodeReal = 0x6c5094 <ExecResult>, instrument = 0x0, worker_instrument = 0x0, qual = 0x0,
lefttree = 0x0, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0, ps_ResultTupleSlot = 0x130c950, ps_ExprContext = 0x130c670, ps_ProjInfo = 0x130c700, scandesc = 0x0}
(gdb) p **(mtstate->resultRelInfo)
Structure has no component named operator*.
(gdb) p *(mtstate->resultRelInfo)
$9 = {type = T_ResultRelInfo, ri_RangeTableIndex = 1, ri_RelationDesc = 0x7f2af957d9e8, ri_NumIndices = 1, ri_IndexRelationDescs = 0x130c7e0, ri_IndexRelationInfo = 0x130c7f8, ri_TrigDesc = 0x0,
ri_TrigFunctions = 0x0, ri_TrigWhenExprs = 0x0, ri_TrigInstrument = 0x0, ri_FdwRoutine = 0x0, ri_FdwState = 0x0, ri_usesFdwDirectModify = false, ri_WithCheckOptions = 0x0,
ri_WithCheckOptionExprs = 0x0, ri_ConstraintExprs = 0x0, ri_junkFilter = 0x0, ri_returningList = 0x0, ri_projectReturning = 0x0, ri_onConflictArbiterIndexes = 0x0, ri_onConflict = 0x0,
ri_PartitionCheck = 0x0, ri_PartitionCheckExpr = 0x0, ri_PartitionRoot = 0x0, ri_PartitionReadyForRouting = false}
(gdb) p *(mtstate->resultRelInfo->ri_RelationDesc)
$10 = {rd_node = {spcNode = 1663, dbNode = 16477, relNode = 26731}, rd_smgr = 0x0, rd_refcnt = 1, rd_backend = -1, rd_islocaltemp = false, rd_isnailed = false, rd_isvalid = true,
rd_indexvalid = 1 '\001', rd_statvalid = false, rd_createSubid = 0, rd_newRelfilenodeSubid = 0, rd_rel = 0x7f2af94c31a8, rd_att = 0x7f2af957f6e8, rd_id = 26731, rd_lockInfo = {lockRelId = {
relId = 26731, dbId = 16477}}, rd_rules = 0x0, rd_rulescxt = 0x0, trigdesc = 0x0, rd_rsdesc = 0x0, rd_fkeylist = 0x0, rd_fkeyvalid = false, rd_partkeycxt = 0x0, rd_partkey = 0x0, rd_pdcxt = 0x0,
rd_partdesc = 0x0, rd_partcheck = 0x0, rd_indexlist = 0x7f2af94c2818, rd_oidindex = 0, rd_pkindex = 26737, rd_replidindex = 26737, rd_statlist = 0x0, rd_indexattr = 0x0, rd_projindexattr = 0x0,
rd_keyattr = 0x0, rd_pkattr = 0x0, rd_idattr = 0x0, rd_projidx = 0x0, rd_pubactions = 0x0, rd_options = 0x0, rd_index = 0x0, rd_indextuple = 0x0, rd_amhandler = 0, rd_indexcxt = 0x0,
rd_amroutine = 0x0, rd_opfamily = 0x0, rd_opcintype = 0x0, rd_support = 0x0, rd_supportinfo = 0x0, rd_indoption = 0x0, rd_indexprs = 0x0, rd_indpred = 0x0, rd_exclops = 0x0, rd_exclprocs = 0x0,
rd_exclstrats = 0x0, rd_amcache = 0x0, rd_indcollation = 0x0, rd_fdwroutine = 0x0, rd_toastoid = 0, pgstat_info = 0x12d5850}
(gdb)
#第2个参数slot
(gdb) p *slot
$11 = {type = T_TupleTableSlot, tts_isempty = false, tts_shouldFree = false, tts_shouldFreeMin = false, tts_slow = false, tts_tuple = 0x0, tts_tupleDescriptor = 0x130cb70, tts_mcxt = 0x130be70,
tts_buffer = 0, tts_nvalid = 4, tts_values = 0x130c9b0, tts_isnull = 0x130c9d0, tts_mintuple = 0x0, tts_minhdr = {t_len = 0, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 0}, ip_posid = 0},
t_tableOid = 0, t_data = 0x0}, tts_off = 0, tts_fixedTupleDescriptor = true}
(gdb) p *(slot->tts_tupleDescriptor)
$12 = {natts = 4, tdtypeid = 2249, tdtypmod = -1, tdhasoid = false, tdrefcount = -1, constr = 0x0, attrs = 0x130cb90}
(gdb) p *(slot->tts_values)
$13 = 12
(gdb) p *(slot->tts_isnull)
$14 = false
#参数planSlot
(gdb) p *planSlot
$15 = {type = T_TupleTableSlot, tts_isempty = false, tts_shouldFree = false, tts_shouldFreeMin = false, tts_slow = false, tts_tuple = 0x0, tts_tupleDescriptor = 0x130cb70, tts_mcxt = 0x130be70,
tts_buffer = 0, tts_nvalid = 4, tts_values = 0x130c9b0, tts_isnull = 0x130c9d0, tts_mintuple = 0x0, tts_minhdr = {t_len = 0, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 0}, ip_posid = 0},
t_tableOid = 0, t_data = 0x0}, tts_off = 0, tts_fixedTupleDescriptor = true}
(gdb) p *(planSlot->tts_mcxt)
$16 = {type = T_AllocSetContext, isReset = false, allowInCritSection = false, methods = 0xb8c720 <AllocSetMethods>, parent = 0x131f150, firstchild = 0x130fe90, prevchild = 0x0, nextchild = 0x0,
name = 0xb1a840 "ExecutorState", ident = 0x0, reset_cbs = 0x0}
#参数estate
(gdb) p *estate
$17 = {type = T_EState, es_direction = ForwardScanDirection, es_snapshot = 0x1309fa0, es_crosscheck_snapshot = 0x0, es_range_table = 0x132e1f8, es_plannedstmt = 0x1258420,
es_sourceText = 0x1256ef0 "insert into t_insert values(12,'12','12','12');", es_junkFilter = 0x0, es_output_cid = 0, es_result_relations = 0x130c1c0, es_num_result_relations = 1,
es_result_relation_info = 0x130c1c0, es_root_result_relations = 0x0, es_num_root_result_relations = 0, es_tuple_routing_result_relations = 0x0, es_trig_target_relations = 0x0,
es_trig_tuple_slot = 0x130d290, es_trig_oldtup_slot = 0x0, es_trig_newtup_slot = 0x0, es_param_list_info = 0x0, es_param_exec_vals = 0x130c190, es_queryEnv = 0x0, es_query_cxt = 0x130be70,
es_tupleTable = 0x130c810, es_rowMarks = 0x0, es_processed = 0, es_lastoid = 0, es_top_eflags = 0, es_instrument = 0, es_finished = false, es_exprcontexts = 0x130c7b0, es_subplanstates = 0x0,
es_auxmodifytables = 0x0, es_per_tuple_exprcontext = 0x0, es_epqTuple = 0x0, es_epqTupleSet = 0x0, es_epqScanDone = 0x0, es_use_parallel_mode = false, es_query_dsa = 0x0, es_jit_flags = 0,
es_jit = 0x0}
(gdb) p *(estate->es_snapshot)
$18 = {satisfies = 0x9f73fc <HeapTupleSatisfiesMVCC>, xmin = 1612861, xmax = 1612861, xip = 0x0, xcnt = 0, subxip = 0x0, subxcnt = 0, suboverflowed = false, takenDuringRecovery = false, copied = true,
curcid = 0, speculativeToken = 0, active_count = 1, regd_count = 2, ph_node = {first_child = 0xe7bac0 <CatalogSnapshotData+64>, next_sibling = 0x0, prev_or_parent = 0x0}, whenTaken = 0, lsn = 0}
(gdb)
#参数canSetTag
(gdb) p canSetTag
$19 = true
#进入函数内部
(gdb) next
274 TupleTableSlot *result = NULL;
(gdb)
276 ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
(gdb) p *node
$20 = {plan = {type = 1005513616, startup_cost = 3.502247076882698e-317, total_cost = 9.8678822363083824e-317, plan_rows = 4.1888128009757547e-314, plan_width = 3, parallel_aware = 253,
parallel_safe = 127, plan_node_id = 10076823, targetlist = 0x686b0000405d, qual = 0x130c2d0, lefttree = 0x0, righttree = 0x130c1c0, initPlan = 0x6c2485 <ExecModifyTable>, extParam = 0x0,
allParam = 0x7ffd3beeeb50}, operation = 6923989, canSetTag = false, nominalRelation = 4183284200, partitioned_rels = 0x130c1c0, partColsUpdated = 128, resultRelations = 0x1257fa0,
resultRelIndex = 19974480, rootResultRelIndex = 0, plans = 0x0, withCheckOptionLists = 0x33beeeb50, returningLists = 0x130bf80, fdwPrivLists = 0x0, fdwDirectModifyPlans = 0x130c2d0, rowMarks = 0x0,
epqParam = 0, onConflictAction = ONCONFLICT_NONE, arbiterIndexes = 0x130c950, onConflictSet = 0x0, onConflictWhere = 0x130c560, exclRelRTI = 19972544, exclRelTlist = 0x0}
(gdb)
(gdb) p onconflict
$21 = ONCONFLICT_NONE
...
(gdb) next
289 resultRelationDesc = resultRelInfo->ri_RelationDesc;
(gdb) p *resultRelInfo
$26 = {type = T_ResultRelInfo, ri_RangeTableIndex = 1, ri_RelationDesc = 0x7f2af957d9e8, ri_NumIndices = 1, ri_IndexRelationDescs = 0x130c7e0, ri_IndexRelationInfo = 0x130c7f8, ri_TrigDesc = 0x0,
ri_TrigFunctions = 0x0, ri_TrigWhenExprs = 0x0, ri_TrigInstrument = 0x0, ri_FdwRoutine = 0x0, ri_FdwState = 0x0, ri_usesFdwDirectModify = false, ri_WithCheckOptions = 0x0,
ri_WithCheckOptionExprs = 0x0, ri_ConstraintExprs = 0x0, ri_junkFilter = 0x0, ri_returningList = 0x0, ri_projectReturning = 0x0, ri_onConflictArbiterIndexes = 0x0, ri_onConflict = 0x0,
ri_PartitionCheck = 0x0, ri_PartitionCheckExpr = 0x0, ri_PartitionRoot = 0x0, ri_PartitionReadyForRouting = false}
(gdb) p *(tuple->t_data)
$27 = {t_choice = {t_heap = {t_xmin = 244, t_xmax = 4294967295, t_field3 = {t_cid = 2249, t_xvac = 2249}}, t_datum = {datum_len_ = 244, datum_typmod = -1, datum_typeid = 2249}}, t_ctid = {ip_blkid = {
bi_hi = 65535, bi_lo = 65535}, ip_posid = 0}, t_infomask2 = 4, t_infomask = 2, t_hoff = 24 '\030', t_bits = 0x130d36f ""}
(gdb) p *(tuple)
$28 = {t_len = 61, t_self = {ip_blkid = {bi_hi = 65535, bi_lo = 65535}, ip_posid = 0}, t_tableOid = 0, t_data = 0x130d358}
(gdb)
(gdb) p *resultRelationDesc
$29 = {rd_node = {spcNode = 1663, dbNode = 16477, relNode = 26731}, rd_smgr = 0x0, rd_refcnt = 1, rd_backend = -1, rd_islocaltemp = false, rd_isnailed = false, rd_isvalid = true,
rd_indexvalid = 1 '\001', rd_statvalid = false, rd_createSubid = 0, rd_newRelfilenodeSubid = 0, rd_rel = 0x7f2af94c31a8, rd_att = 0x7f2af957f6e8, rd_id = 26731, rd_lockInfo = {lockRelId = {
relId = 26731, dbId = 16477}}, rd_rules = 0x0, rd_rulescxt = 0x0, trigdesc = 0x0, rd_rsdesc = 0x0, rd_fkeylist = 0x0, rd_fkeyvalid = false, rd_partkeycxt = 0x0, rd_partkey = 0x0, rd_pdcxt = 0x0,
rd_partdesc = 0x0, rd_partcheck = 0x0, rd_indexlist = 0x7f2af94c2818, rd_oidindex = 0, rd_pkindex = 26737, rd_replidindex = 26737, rd_statlist = 0x0, rd_indexattr = 0x0, rd_projindexattr = 0x0,
rd_keyattr = 0x0, rd_pkattr = 0x0, rd_idattr = 0x0, rd_projidx = 0x0, rd_pubactions = 0x0, rd_options = 0x0, rd_index = 0x0, rd_indextuple = 0x0, rd_amhandler = 0, rd_indexcxt = 0x0,
rd_amroutine = 0x0, rd_opfamily = 0x0, rd_opcintype = 0x0, rd_support = 0x0, rd_supportinfo = 0x0, rd_indoption = 0x0, rd_indexprs = 0x0, rd_indpred = 0x0, rd_exclops = 0x0, rd_exclprocs = 0x0,
rd_exclstrats = 0x0, rd_amcache = 0x0, rd_indcollation = 0x0, rd_fdwroutine = 0x0, rd_toastoid = 0, pgstat_info = 0x12d5850}
#以上,获取了Relation相关的信息
#没有触发器,无需执行触发器相关逻辑
#进入正常数据表的处理逻辑 line 373行
(gdb) next
315 if (resultRelInfo->ri_TrigDesc &&
(gdb) next
328 if (resultRelInfo->ri_TrigDesc &&
(gdb)
341 else if (resultRelInfo->ri_FdwRoutine)
(gdb)
373 tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
(gdb) next
384 wco_kind = (mtstate->operation == CMD_UPDATE) ?
(gdb) next
391 if (resultRelInfo->ri_WithCheckOptions != NIL)
(gdb) p tuple->t_tableOid
$31 = 26731
(gdb) p wco_kind
$32 = WCO_RLS_INSERT_CHECK
#检查约束
(gdb) next
397 if (resultRelationDesc->rd_att->constr)
(gdb)
398 ExecConstraints(resultRelInfo, slot, estate);
(gdb)
#非分区表,无需检查
#进入正常数据插入逻辑
411 if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
(gdb)
529 newId = heap_insert(resultRelationDesc, tuple,
(gdb) p resultRelInfo->ri_NumIndices
$33 = 1
(gdb) p onconflict
$34 = ONCONFLICT_NONE
(gdb)
(gdb) next
534 if (resultRelInfo->ri_NumIndices > 0)
(gdb) p newId
$35 = 0
(gdb) next
535 recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
(gdb)
541 if (canSetTag)
(gdb)
543 (estate->es_processed)++;
(gdb)
544 estate->es_lastoid = newId;
(gdb)
545 setLastTid(&(tuple->t_self));
(gdb)
554 ar_insert_trig_tcs = mtstate->mt_transition_capture;
(gdb)
555 if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
(gdb)
572 ExecARInsertTriggers(estate, resultRelInfo, tuple, recheckIndexes,
(gdb)
575 list_free(recheckIndexes);
(gdb)
589 if (resultRelInfo->ri_WithCheckOptions != NIL)
(gdb)
593 if (resultRelInfo->ri_projectReturning)
(gdb)
596 return result;
(gdb)
597 }
#DONE!
四、小结
1、其他相关知识:插入数据涉及执行计划等相关的数据结构,需要进一步的了解和理解;
2、触发器:插入前后的触发器调用在此函数中处理;
3、索引:索引Tuple的插入部分暂未深入解读。
最后:涉及的数据结构越来越多,数据结构的结构越来越复杂,慢慢进入了深水区,需做好潜水准备
网友评论