本节简单介绍了PostgreSQL执行聚合函数时的初始化工作,主要实现函数是ExecInitAgg.
AggState
聚合函数执行时状态结构体,内含AggStatePerAgg等结构体
/* ---------------------
* AggState information
*
* ss.ss_ScanTupleSlot refers to output of underlying plan.
* ss.ss_ScanTupleSlot指的是基础计划的输出.
* (ss = ScanState,ps = PlanState)
*
* Note: ss.ps.ps_ExprContext contains ecxt_aggvalues and
* ecxt_aggnulls arrays, which hold the computed agg values for the current
* input group during evaluation of an Agg node's output tuple(s). We
* create a second ExprContext, tmpcontext, in which to evaluate input
* expressions and run the aggregate transition functions.
* 注意:ss.ps.ps_ExprContext包含了ecxt_aggvalues和ecxt_aggnulls数组,
* 这两个数组保存了在计算agg节点的输出元组时当前输入组已计算的agg值.
* ---------------------
*/
/* these structs are private in nodeAgg.c: */
//在nodeAgg.c中私有的结构体
typedef struct AggStatePerAggData *AggStatePerAgg;
typedef struct AggStatePerTransData *AggStatePerTrans;
typedef struct AggStatePerGroupData *AggStatePerGroup;
typedef struct AggStatePerPhaseData *AggStatePerPhase;
typedef struct AggStatePerHashData *AggStatePerHash;
typedef struct AggState
{
//第一个字段是NodeTag(继承自ScanState)
ScanState ss; /* its first field is NodeTag */
//targetlist和quals中所有的Aggref
List *aggs; /* all Aggref nodes in targetlist & quals */
//链表的大小(可以为0)
int numaggs; /* length of list (could be zero!) */
//pertrans条目大小
int numtrans; /* number of pertrans items */
//Agg策略模式
AggStrategy aggstrategy; /* strategy mode */
//agg-splitting模式,参见nodes.h
AggSplit aggsplit; /* agg-splitting mode, see nodes.h */
//指向当前步骤数据的指针
AggStatePerPhase phase; /* pointer to current phase data */
//步骤数(包括0)
int numphases; /* number of phases (including phase 0) */
//当前步骤
int current_phase; /* current phase number */
//per-Aggref信息
AggStatePerAgg peragg; /* per-Aggref information */
//per-Trans状态信息
AggStatePerTrans pertrans; /* per-Trans state information */
//长生命周期数据的ExprContexts(hashtable)
ExprContext *hashcontext; /* econtexts for long-lived data (hashtable) */
////长生命周期数据的ExprContexts(每一个GS使用)
ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */
//输入表达式的ExprContext
ExprContext *tmpcontext; /* econtext for input expressions */
#define FIELDNO_AGGSTATE_CURAGGCONTEXT 14
//当前活跃的aggcontext
ExprContext *curaggcontext; /* currently active aggcontext */
//当前活跃的aggregate(如存在)
AggStatePerAgg curperagg; /* currently active aggregate, if any */
#define FIELDNO_AGGSTATE_CURPERTRANS 16
//当前活跃的trans state
AggStatePerTrans curpertrans; /* currently active trans state, if any */
//输入结束?
bool input_done; /* indicates end of input */
//Agg扫描结束?
bool agg_done; /* indicates completion of Agg scan */
//最后一个grouping set
int projected_set; /* The last projected grouping set */
#define FIELDNO_AGGSTATE_CURRENT_SET 20
//将要解析的当前grouping set
int current_set; /* The current grouping set being evaluated */
//当前投影操作的分组列
Bitmapset *grouped_cols; /* grouped cols in current projection */
//倒序的分组列链表
List *all_grouped_cols; /* list of all grouped cols in DESC order */
/* These fields are for grouping set phase data */
//-------- 下面的列用于grouping set步骤数据
//所有步骤中最大的sets大小
int maxsets; /* The max number of sets in any phase */
//所有步骤的数组
AggStatePerPhase phases; /* array of all phases */
//对于phases > 1,已排序的输入信息
Tuplesortstate *sort_in; /* sorted input to phases > 1 */
//对于下一个步骤,输入已拷贝
Tuplesortstate *sort_out; /* input is copied here for next phase */
//排序结果的slot
TupleTableSlot *sort_slot; /* slot for sort results */
/* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
//------- 下面的列用于AGG_PLAIN和AGG_SORTED模式:
//per-group指针的grouping set编号数组
AggStatePerGroup *pergroups; /* grouping set indexed array of per-group
* pointers */
//当前组的第一个元组拷贝
HeapTuple grp_firstTuple; /* copy of first tuple of current group */
/* these fields are used in AGG_HASHED and AGG_MIXED modes: */
//--------- 下面的列用于AGG_HASHED和AGG_MIXED模式:
//是否已填充hash表?
bool table_filled; /* hash table filled yet? */
//hash桶数?
int num_hashes;
//相应的哈希表数据数组
AggStatePerHash perhash; /* array of per-hashtable data */
//per-group指针的grouping set编号数组
AggStatePerGroup *hash_pergroup; /* grouping set indexed array of
* per-group pointers */
/* support for evaluation of agg input expressions: */
//---------- agg输入表达式解析支持
#define FIELDNO_AGGSTATE_ALL_PERGROUPS 34
//首先是->pergroups,然后是hash_pergroup
AggStatePerGroup *all_pergroups; /* array of first ->pergroups, than
* ->hash_pergroup */
//投影实现机制
ProjectionInfo *combinedproj; /* projection machinery */
} AggState;
/* Primitive options supported by nodeAgg.c: */
//nodeag .c支持的基本选项
#define AGGSPLITOP_COMBINE 0x01 /* substitute combinefn for transfn */
#define AGGSPLITOP_SKIPFINAL 0x02 /* skip finalfn, return state as-is */
#define AGGSPLITOP_SERIALIZE 0x04 /* apply serializefn to output */
#define AGGSPLITOP_DESERIALIZE 0x08 /* apply deserializefn to input */
/* Supported operating modes (i.e., useful combinations of these options): */
//支持的操作模式
typedef enum AggSplit
{
/* Basic, non-split aggregation: */
//基本 : 非split聚合
AGGSPLIT_SIMPLE = 0,
/* Initial phase of partial aggregation, with serialization: */
//部分聚合的初始步骤,序列化
AGGSPLIT_INITIAL_SERIAL = AGGSPLITOP_SKIPFINAL | AGGSPLITOP_SERIALIZE,
/* Final phase of partial aggregation, with deserialization: */
//部分聚合的最终步骤,反序列化
AGGSPLIT_FINAL_DESERIAL = AGGSPLITOP_COMBINE | AGGSPLITOP_DESERIALIZE
} AggSplit;
/* Test whether an AggSplit value selects each primitive option: */
//测试AggSplit选择了哪些基本选项
#define DO_AGGSPLIT_COMBINE(as) (((as) & AGGSPLITOP_COMBINE) != 0)
#define DO_AGGSPLIT_SKIPFINAL(as) (((as) & AGGSPLITOP_SKIPFINAL) != 0)
#define DO_AGGSPLIT_SERIALIZE(as) (((as) & AGGSPLITOP_SERIALIZE) != 0)
#define DO_AGGSPLIT_DESERIALIZE(as) (((as) & AGGSPLITOP_DESERIALIZE) != 0)
ExecInitAgg为优化器生成的agg节点创建运行期信息并初始化outer子树(左树).
其主要实现逻辑如下:
1.初始化AggState结构体
2.计算分为几个阶段(Hash vs Group)
3.如存在grouping set,则初始化相关信息
4.分配内存上下文
5.初始化outer plan子节点
6.初始化结果类型,slot和投影
7.初始化子表达式
8.为AggStatePerPhaseData等结构体分配内存
9.循环遍历各个阶段
9.1计算分组列,存储在phasedata->grouped_cols数组和all_grouped_cols中
9.2初始化AggState->phases数组(数组元素对应的结构体为AggStatePerPhase)
9.3初始化AggState->perhash数组(对应的结构体为AggStatePerHash)
10.转换all_grouped_cols为倒序链表
11.在输出expr上下文中设置aggregate-result存储,同时分配私有per-agg工作存储
12.如使用Hash算法,则调用find_hash_columns和build_hash_table方法初始化相关数据
13.调用initialize_phase/select_current_set初始化阶段数据
14.检索聚合函数信息,初始化per-agg和per-trans数据不可变字段
15.构建一次就完成所有转换工作的表达式.
/* -----------------
* ExecInitAgg
*
* Creates the run-time information for the agg node produced by the
* planner and initializes its outer subtree.
* 为优化器生成的agg节点创建运行期信息并初始化outer子树(左树).
*
* -----------------
*/
AggState *
ExecInitAgg(Agg *node, EState *estate, int eflags)
{
AggState *aggstate;//AggState结构体指针
AggStatePerAgg peraggs;//AggStatePerAggData结构体指针
AggStatePerTrans pertransstates;//聚合状态值信息
AggStatePerGroup *pergroups;//per-aggregate-per-group工作状态
Plan *outerPlan;//outer计划(左树)
ExprContext *econtext;//内存上下文
TupleDesc scanDesc;//扫描描述器
int numaggs,//agg个数
transno,//转换器
aggno;
int phase;//阶段
int phaseidx;//阶段编号
ListCell *l;//临时变量
Bitmapset *all_grouped_cols = NULL;//分组列集合
int numGroupingSets = 1;//Grouping Sets数
int numPhases;//阶段数
int numHashes;//
int i = 0;
int j = 0;
bool use_hashing = (node->aggstrategy == AGG_HASHED ||
node->aggstrategy == AGG_MIXED);//是否使用Hash聚合算法
/* check for unsupported flags */
//检查验证
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
/*
* create state structure
* 创建AggState结构体
*/
aggstate = makeNode(AggState);
aggstate->ss.ps.plan = (Plan *) node;
aggstate->ss.ps.state = estate;
aggstate->ss.ps.ExecProcNode = ExecAgg;
aggstate->aggs = NIL;
aggstate->numaggs = 0;
aggstate->numtrans = 0;
aggstate->aggstrategy = node->aggstrategy;
aggstate->aggsplit = node->aggsplit;
aggstate->maxsets = 0;
aggstate->projected_set = -1;
aggstate->current_set = 0;
aggstate->peragg = NULL;
aggstate->pertrans = NULL;
aggstate->curperagg = NULL;
aggstate->curpertrans = NULL;
aggstate->input_done = false;
aggstate->agg_done = false;
aggstate->pergroups = NULL;
aggstate->grp_firstTuple = NULL;
aggstate->sort_in = NULL;
aggstate->sort_out = NULL;
/*
* phases[0] always exists, but is dummy in sorted/plain mode
* phases[0]通常都会存在,在sorted/plain模式下其实是"虚拟"的.
*/
numPhases = (use_hashing ? 1 : 2);
numHashes = (use_hashing ? 1 : 0);
/*
* Calculate the maximum number of grouping sets in any phase; this
* determines the size of some allocations. Also calculate the number of
* phases, since all hashed/mixed nodes contribute to only a single phase.
* 在所有阶段中计算最大的grouping sets个数.
* 这决定了某些内存分配的大小.同时,计算阶段数,因为所有的hashed/mixed节点只在一个阶段中.
*/
if (node->groupingSets)
{
//存在grouping sets
numGroupingSets = list_length(node->groupingSets);
foreach(l, node->chain)
{
Agg *agg = lfirst(l);
numGroupingSets = Max(numGroupingSets,
list_length(agg->groupingSets));
/*
* additional AGG_HASHED aggs become part of phase 0, but all
* others add an extra phase.
*/
if (agg->aggstrategy != AGG_HASHED)
++numPhases;
else
++numHashes;
}
}
//赋值
aggstate->maxsets = numGroupingSets;
aggstate->numphases = numPhases;
aggstate->aggcontexts = (ExprContext **)
palloc0(sizeof(ExprContext *) * numGroupingSets);
/*
* Create expression contexts. We need three or more, one for
* per-input-tuple processing, one for per-output-tuple processing, one
* for all the hashtables, and one for each grouping set. The per-tuple
* memory context of the per-grouping-set ExprContexts (aggcontexts)
* replaces the standalone memory context formerly used to hold transition
* values. We cheat a little by using ExecAssignExprContext() to build
* all of them.
* 创建表达式上下文.起码需要三个,一个用于per-input-tuple处理,
* 一个用于per-output-tuple处理,另外一个用于每个grouping set.
* per-grouping-set ExprContexts (aggcontexts)的per-tuple内存上下文会替换
* 原来用于保存转换值的独立内存上下文.
*
* NOTE: the details of what is stored in aggcontexts and what is stored
* in the regular per-query memory context are driven by a simple
* decision: we want to reset the aggcontext at group boundaries (if not
* hashing) and in ExecReScanAgg to recover no-longer-wanted space.
* 注意:存储在aggcontexts和per-query内存上下文中的数据具体是什么取决于:
我们希望在组边界(非hashing)重置aggcontext以及在ExecReScanAgg中恢复不再期望的空间
*/
//分配内存上下文
ExecAssignExprContext(estate, &aggstate->ss.ps);
aggstate->tmpcontext = aggstate->ss.ps.ps_ExprContext;
for (i = 0; i < numGroupingSets; ++i)
{
ExecAssignExprContext(estate, &aggstate->ss.ps);
aggstate->aggcontexts[i] = aggstate->ss.ps.ps_ExprContext;
}
if (use_hashing)
{
ExecAssignExprContext(estate, &aggstate->ss.ps);
aggstate->hashcontext = aggstate->ss.ps.ps_ExprContext;
}
ExecAssignExprContext(estate, &aggstate->ss.ps);
/*
* Initialize child nodes.
* 初始化子节点
*
* If we are doing a hashed aggregation then the child plan does not need
* to handle REWIND efficiently; see ExecReScanAgg.
* 如果使用Hash聚合算法,子计划不需要REWIND,详细参考ExecReScanAgg.
*/
if (node->aggstrategy == AGG_HASHED)
eflags &= ~EXEC_FLAG_REWIND;
//获取outerPlan
outerPlan = outerPlan(node);
//初始化outerPlan
outerPlanState(aggstate) = ExecInitNode(outerPlan, estate, eflags);
/*
* initialize source tuple type.
* 初始化源元组类型
*/
ExecCreateScanSlotFromOuterPlan(estate, &aggstate->ss);
scanDesc = aggstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
if (node->chain)
aggstate->sort_slot = ExecInitExtraTupleSlot(estate, scanDesc);
/*
* Initialize result type, slot and projection.
* 初始化结果类型,slot和投影
*/
ExecInitResultTupleSlotTL(estate, &aggstate->ss.ps);
ExecAssignProjectionInfo(&aggstate->ss.ps, NULL);
/*
* initialize child expressions
* 初始化子表达式
*
* We expect the parser to have checked that no aggs contain other agg
* calls in their arguments (and just to be sure, we verify it again while
* initializing the plan node). This would make no sense under SQL
* semantics, and it's forbidden by the spec. Because it is true, we
* don't need to worry about evaluating the aggs in any particular order.
* 我们期望解析器已经检查过参数中没有agg包含其他agg调用(在初始化计划节点时已验证,这次再次确认)
* 这在SQL语义下没有意义,而且SQL规范禁止这样做.
* 因为这是真的话,将不需要担心以任何特定的顺序计算agg。
*
* Note: execExpr.c finds Aggrefs for us, and adds their AggrefExprState
* nodes to aggstate->aggs. Aggrefs in the qual are found here; Aggrefs
* in the targetlist are found during ExecAssignProjectionInfo, below.
* 注意:execExpr.c会帮我们找到Aggrefs,同时添加AggrefExprState节点到aggstate->aggs中.
* 表达式中的Aggrefs会在这里被检索到,targetlist中的Aggrefs会在下面的ExecAssignProjectionInfo中被检索.
*/
aggstate->ss.ps.qual =
ExecInitQual(node->plan.qual, (PlanState *) aggstate);
/*
* We should now have found all Aggrefs in the targetlist and quals.
* 现在我们已经检索了所有在投影列和表达式中的Aggrefs.
*/
numaggs = aggstate->numaggs;
Assert(numaggs == list_length(aggstate->aggs));
/*
* For each phase, prepare grouping set data and fmgr lookup data for
* compare functions. Accumulate all_grouped_cols in passing.
* 每个阶段都需要准备grouping set数据和为对比函数准备fmgr检索数据.
* 通过累积所有分组的cols实现.
*/
//分配内存
aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData));
aggstate->num_hashes = numHashes;
if (numHashes)
{
aggstate->perhash = palloc0(sizeof(AggStatePerHashData) * numHashes);
aggstate->phases[0].numsets = 0;
aggstate->phases[0].gset_lengths = palloc(numHashes * sizeof(int));
aggstate->phases[0].grouped_cols = palloc(numHashes * sizeof(Bitmapset *));
}
phase = 0;
for (phaseidx = 0; phaseidx <= list_length(node->chain); ++phaseidx)
{
//------------ 遍历各个阶段
Agg *aggnode;
Sort *sortnode;
if (phaseidx > 0)
{
//不是第一个阶段,从node链表中取得节点
aggnode = list_nth_node(Agg, node->chain, phaseidx - 1);
//排序节点
sortnode = castNode(Sort, aggnode->plan.lefttree);
}
else
{
//第一阶段,直接赋值
aggnode = node;
sortnode = NULL;
}
Assert(phase <= 1 || sortnode);
if (aggnode->aggstrategy == AGG_HASHED
|| aggnode->aggstrategy == AGG_MIXED)
{
//---------- 使用Hash聚合
//阶段数据
AggStatePerPhase phasedata = &aggstate->phases[0];
AggStatePerHash perhash;
Bitmapset *cols = NULL;
Assert(phase == 0);
i = phasedata->numsets++;
perhash = &aggstate->perhash[i];
/* phase 0 always points to the "real" Agg in the hash case */
//使用Hash聚合,阶段0通常指向"实际的"Agg
phasedata->aggnode = node;
phasedata->aggstrategy = node->aggstrategy;
/* but the actual Agg node representing this hash is saved here */
//但表示该Hash的实际的Agg节点保存在这里
perhash->aggnode = aggnode;
phasedata->gset_lengths[i] = perhash->numCols = aggnode->numCols;
//分组列放在集合中
for (j = 0; j < aggnode->numCols; ++j)
cols = bms_add_member(cols, aggnode->grpColIdx[j]);
//存储在阶段数据中
phasedata->grouped_cols[i] = cols;
//添加到大集合中
all_grouped_cols = bms_add_members(all_grouped_cols, cols);
continue;
}
else
{
//使用Group聚合
AggStatePerPhase phasedata = &aggstate->phases[++phase];
int num_sets;
phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
if (num_sets)
{
phasedata->gset_lengths = palloc(num_sets * sizeof(int));
phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *));
i = 0;
foreach(l, aggnode->groupingSets)
{
int current_length = list_length(lfirst(l));
Bitmapset *cols = NULL;
/* planner forces this to be correct */
for (j = 0; j < current_length; ++j)
cols = bms_add_member(cols, aggnode->grpColIdx[j]);
phasedata->grouped_cols[i] = cols;
phasedata->gset_lengths[i] = current_length;
++i;
}
all_grouped_cols = bms_add_members(all_grouped_cols,
phasedata->grouped_cols[0]);
}
else
{
Assert(phaseidx == 0);
phasedata->gset_lengths = NULL;
phasedata->grouped_cols = NULL;
}
/*
* If we are grouping, precompute fmgr lookup data for inner loop.
* 如果使用GroupAggregate,为内循环提前算好fmgr检索数据
*/
if (aggnode->aggstrategy == AGG_SORTED)
{
int i = 0;
Assert(aggnode->numCols > 0);
/*
* Build a separate function for each subset of columns that
* need to be compared.
* 为每一个需要对比的列子集构建独立的函数
*/
phasedata->eqfunctions =
(ExprState **) palloc0(aggnode->numCols * sizeof(ExprState *));
/* for each grouping set */
//对于每一个grouping set进行处理
for (i = 0; i < phasedata->numsets; i++)
{
int length = phasedata->gset_lengths[i];
if (phasedata->eqfunctions[length - 1] != NULL)
continue;
phasedata->eqfunctions[length - 1] =
execTuplesMatchPrepare(scanDesc,
length,
aggnode->grpColIdx,
aggnode->grpOperators,
(PlanState *) aggstate);
}
/* and for all grouped columns, unless already computed */
//处理所有需要分组的列,除非已完成计算
if (phasedata->eqfunctions[aggnode->numCols - 1] == NULL)
{
phasedata->eqfunctions[aggnode->numCols - 1] =
execTuplesMatchPrepare(scanDesc,
aggnode->numCols,
aggnode->grpColIdx,
aggnode->grpOperators,
(PlanState *) aggstate);
}
}
phasedata->aggnode = aggnode;
phasedata->aggstrategy = aggnode->aggstrategy;
phasedata->sortnode = sortnode;
}
}
/*
* Convert all_grouped_cols to a descending-order list.
* 转换all_grouped_cols为倒序链表
*/
i = -1;
while ((i = bms_next_member(all_grouped_cols, i)) >= 0)
aggstate->all_grouped_cols = lcons_int(i, aggstate->all_grouped_cols);
/*
* Set up aggregate-result storage in the output expr context, and also
* allocate my private per-agg working storage
* 在输出expr上下文中设置aggregate-result存储,同时分配私有per-agg工作存储
*/
econtext = aggstate->ss.ps.ps_ExprContext;
//分配工作空间
econtext->ecxt_aggvalues = (Datum *) palloc0(sizeof(Datum) * numaggs);
econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numaggs);
peraggs = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData) * numaggs);
pertransstates = (AggStatePerTrans) palloc0(sizeof(AggStatePerTransData) * numaggs);
aggstate->peragg = peraggs;
aggstate->pertrans = pertransstates;
aggstate->all_pergroups =
(AggStatePerGroup *) palloc0(sizeof(AggStatePerGroup)
* (numGroupingSets + numHashes));
pergroups = aggstate->all_pergroups;
if (node->aggstrategy != AGG_HASHED)
{
//---------- 使用Group聚合
for (i = 0; i < numGroupingSets; i++)
{
pergroups[i] = (AggStatePerGroup) palloc0(sizeof(AggStatePerGroupData)
* numaggs);
}
aggstate->pergroups = pergroups;
pergroups += numGroupingSets;
}
/*
* Hashing can only appear in the initial phase.
* Hashing只会出现在初始阶段
*/
if (use_hashing)
{
/* this is an array of pointers, not structures */
//指针数组,但不是结构体
aggstate->hash_pergroup = pergroups;
find_hash_columns(aggstate);
build_hash_table(aggstate);
aggstate->table_filled = false;
}
/*
* Initialize current phase-dependent values to initial phase. The initial
* phase is 1 (first sort pass) for all strategies that use sorting (if
* hashing is being done too, then phase 0 is processed last); but if only
* hashing is being done, then phase 0 is all there is.
* 初始化当前阶段依赖值为初始阶段.
* 对于所有使用排序的策略(如果也进行hasing,那么阶段0最后处理),初始阶段都为1(第一次排序传递)
* 但如果只是进行hashing,那么只有阶段0.
*/
if (node->aggstrategy == AGG_HASHED)
{
//Hashing
aggstate->current_phase = 0;
initialize_phase(aggstate, 0);
select_current_set(aggstate, 0, true);
}
else
{
//非Hashing
aggstate->current_phase = 1;
initialize_phase(aggstate, 1);
select_current_set(aggstate, 0, false);
}
/* -----------------
* Perform lookups of aggregate function info, and initialize the
* unchanging fields of the per-agg and per-trans data.
* 检索聚合函数信息,初始化per-agg和per-trans数据不可变字段
*
* We try to optimize by detecting duplicate aggregate functions so that
* their state and final values are re-used, rather than needlessly being
* re-calculated independently. We also detect aggregates that are not
* the same, but which can share the same transition state.
* 通过检测重复聚合函数进行优化,以便它们的状态和最终值可被重用,而不是无必要的重复计算.
* 同时,我们检测那些不一样但可以共享转换状态的聚合.
*
* Scenarios:
* 场景如下:
*
* 1. Identical aggregate function calls appear in the query:
*
* SELECT SUM(x) FROM ... HAVING SUM(x) > 0
*
* Since these aggregates are identical, we only need to calculate
* the value once. Both aggregates will share the same 'aggno' value.
*
* 1. 查询中出现相同的聚合函数调用:
* SELECT SUM(x) FROM ... HAVING SUM(x) > 0
* 因为聚合是相同的,只需要计算该值一次即可.两个聚合会共享同一个aggno值.
*
* 2. Two different aggregate functions appear in the query, but the
* aggregates have the same arguments, transition functions and
* initial values (and, presumably, different final functions):
*
* SELECT AVG(x), STDDEV(x) FROM ...
*
* In this case we must create a new peragg for the varying aggregate,
* and we need to call the final functions separately, but we need
* only run the transition function once. (This requires that the
* final functions be nondestructive of the transition state, but
* that's required anyway for other reasons.)
* 2. 两个不同的聚合函数出现在查询中,但聚合有相同的参数/转换函数和初始值(大概还有不同的最终函数):
* SELECT AVG(x), STDDEV(x) FROM ...
* 在这种情况下,必须为不同的聚合创建新的peragg,同时需要单独调用最终函数,
* 但我们只需要执行转换一次即可.
* (这需要最终函数对过渡状态不具有破坏性,但由于其他原因,都需要这样做)
*
* For either of these optimizations to be valid, all aggregate properties
* used in the transition phase must be the same, including any modifiers
* such as ORDER BY, DISTINCT and FILTER, and the arguments mustn't
* contain any volatile functions.
* 想要这两种优化都起效,所有在转换阶段使用聚合属性都必须是一样的,
* 包括所有修改器比如ORDER BY,DISTINCT和FILTER,同时参数中不能含有易变函数.
* -----------------
*/
aggno = -1;
transno = -1;
foreach(l, aggstate->aggs)
{
AggrefExprState *aggrefstate = (AggrefExprState *) lfirst(l);
Aggref *aggref = aggrefstate->aggref;
AggStatePerAgg peragg;
AggStatePerTrans pertrans;
int existing_aggno;
int existing_transno;
List *same_input_transnos;
Oid inputTypes[FUNC_MAX_ARGS];
int numArguments;
int numDirectArgs;
HeapTuple aggTuple;
Form_pg_aggregate aggform;
AclResult aclresult;
Oid transfn_oid,
finalfn_oid;
bool shareable;
Oid serialfn_oid,
deserialfn_oid;
Expr *finalfnexpr;
Oid aggtranstype;
Datum textInitVal;
Datum initValue;
bool initValueIsNull;
/* Planner should have assigned aggregate to correct level */
//规划器已为聚合分配了合适的层次
Assert(aggref->agglevelsup == 0);
/* ... and the split mode should match */
//拆分模式需要匹配
Assert(aggref->aggsplit == aggstate->aggsplit);
/* 1. Check for already processed aggs which can be re-used */
// 1.检查已完成的aggs是否可以重用.
existing_aggno = find_compatible_peragg(aggref, aggstate, aggno,
&same_input_transnos);
if (existing_aggno != -1)
{
/*
* Existing compatible agg found. so just point the Aggref to the
* same per-agg struct.
* 发现了兼容的agg,Aggref指向同样的per-agg结构体即可
*/
aggrefstate->aggno = existing_aggno;
continue;
}
/* Mark Aggref state node with assigned index in the result array */
//为Aggref状态节点分配结果数组中的位置索引
peragg = &peraggs[++aggno];
peragg->aggref = aggref;
aggrefstate->aggno = aggno;
/* Fetch the pg_aggregate row */
//提前pg_aggregate中的行(获取聚合函数信息)
aggTuple = SearchSysCache1(AGGFNOID,
ObjectIdGetDatum(aggref->aggfnoid));
if (!HeapTupleIsValid(aggTuple))
elog(ERROR, "cache lookup failed for aggregate %u",
aggref->aggfnoid);
//转换为相应的数据结构
aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
/* Check permission to call aggregate function */
//检查访问权限
aclresult = pg_proc_aclcheck(aggref->aggfnoid, GetUserId(),
ACL_EXECUTE);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, OBJECT_AGGREGATE,
get_func_name(aggref->aggfnoid));
//调用InvokeFunctionExecuteHook
InvokeFunctionExecuteHook(aggref->aggfnoid);
/* planner recorded transition state type in the Aggref itself */
//规划器在Aggref中记录转换状态类型
aggtranstype = aggref->aggtranstype;
Assert(OidIsValid(aggtranstype));
/*
* If this aggregation is performing state combines, then instead of
* using the transition function, we'll use the combine function
* 如果该聚合正在执行状态组合,使用组合函数而不是使用转换函数
*/
if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
{
transfn_oid = aggform->aggcombinefn;
/* If not set then the planner messed up */
//如果没有设置,会报错
if (!OidIsValid(transfn_oid))
elog(ERROR, "combinefn not set for aggregate function");
}
else
transfn_oid = aggform->aggtransfn;
/* Final function only required if we're finalizing the aggregates */
//在最后处理聚合时才需要Final function
if (DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit))
peragg->finalfn_oid = finalfn_oid = InvalidOid;
else
peragg->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
/*
* If finalfn is marked read-write, we can't share transition states;
* but it is okay to share states for AGGMODIFY_SHAREABLE aggs. Also,
* if we're not executing the finalfn here, we can share regardless.
* 如果finalfn标记为RW,则不需要共享转换状态,但可以为AGGMODIFY_SHAREABLE agg共享状态.
* 同时,如果不在这里执行finalfn,则可以共享.
*/
shareable = (aggform->aggfinalmodify != AGGMODIFY_READ_WRITE) ||
(finalfn_oid == InvalidOid);
peragg->shareable = shareable;
serialfn_oid = InvalidOid;
deserialfn_oid = InvalidOid;
/*
* Check if serialization/deserialization is required. We only do it
* for aggregates that have transtype INTERNAL.
* 检查是否需要序列化/反序列化.
*
*/
if (aggtranstype == INTERNALOID)
{
/*
* The planner should only have generated a serialize agg node if
* every aggregate with an INTERNAL state has a serialization
* function. Verify that.
* 如果每个有INTERNAL状态的聚合有一个序列化函数,规划器应该产生一个序列化agg节点,这里需要检查!
*/
if (DO_AGGSPLIT_SERIALIZE(aggstate->aggsplit))
{
/* serialization only valid when not running finalfn */
//在没有运行finalfn的情况下序列化才有效
Assert(DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit));
if (!OidIsValid(aggform->aggserialfn))
elog(ERROR, "serialfunc not provided for serialization aggregation");
serialfn_oid = aggform->aggserialfn;
}
/* Likewise for deserialization functions */
//反序列化
if (DO_AGGSPLIT_DESERIALIZE(aggstate->aggsplit))
{
/* deserialization only valid when combining states */
//在组合状态时才有效
Assert(DO_AGGSPLIT_COMBINE(aggstate->aggsplit));
if (!OidIsValid(aggform->aggdeserialfn))
elog(ERROR, "deserialfunc not provided for deserialization aggregation");
deserialfn_oid = aggform->aggdeserialfn;
}
}
/* Check that aggregate owner has permission to call component fns */
//检查聚合宿主有权限调用相应的函数
{
HeapTuple procTuple;
Oid aggOwner;
procTuple = SearchSysCache1(PROCOID,
ObjectIdGetDatum(aggref->aggfnoid));
if (!HeapTupleIsValid(procTuple))
elog(ERROR, "cache lookup failed for function %u",
aggref->aggfnoid);
aggOwner = ((Form_pg_proc) GETSTRUCT(procTuple))->proowner;
ReleaseSysCache(procTuple);
aclresult = pg_proc_aclcheck(transfn_oid, aggOwner,
ACL_EXECUTE);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, OBJECT_FUNCTION,
get_func_name(transfn_oid));
InvokeFunctionExecuteHook(transfn_oid);
if (OidIsValid(finalfn_oid))
{
aclresult = pg_proc_aclcheck(finalfn_oid, aggOwner,
ACL_EXECUTE);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, OBJECT_FUNCTION,
get_func_name(finalfn_oid));
InvokeFunctionExecuteHook(finalfn_oid);
}
if (OidIsValid(serialfn_oid))
{
aclresult = pg_proc_aclcheck(serialfn_oid, aggOwner,
ACL_EXECUTE);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, OBJECT_FUNCTION,
get_func_name(serialfn_oid));
InvokeFunctionExecuteHook(serialfn_oid);
}
if (OidIsValid(deserialfn_oid))
{
aclresult = pg_proc_aclcheck(deserialfn_oid, aggOwner,
ACL_EXECUTE);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, OBJECT_FUNCTION,
get_func_name(deserialfn_oid));
InvokeFunctionExecuteHook(deserialfn_oid);
}
}
/*
* Get actual datatypes of the (nominal) aggregate inputs. These
* could be different from the agg's declared input types, when the
* agg accepts ANY or a polymorphic type.
* 获取聚合输入的实际数据类型.
* 在agg接受ANY或者多态类型时,这些信息可能与agg声明的输入类型不同
*/
numArguments = get_aggregate_argtypes(aggref, inputTypes);
/* Count the "direct" arguments, if any */
//计算"direct"参数类型
numDirectArgs = list_length(aggref->aggdirectargs);
/* Detect how many arguments to pass to the finalfn */
//检查有多少参数传递给finalfn
if (aggform->aggfinalextra)
peragg->numFinalArgs = numArguments + 1;
else
peragg->numFinalArgs = numDirectArgs + 1;
/* Initialize any direct-argument expressions */
//初始化所有直接参数表达式
peragg->aggdirectargs = ExecInitExprList(aggref->aggdirectargs,
(PlanState *) aggstate);
/*
* build expression trees using actual argument & result types for the
* finalfn, if it exists and is required.
* 如存在,则使用finalfn的实际参数和结果类型构建表达式树.
*/
if (OidIsValid(finalfn_oid))
{
build_aggregate_finalfn_expr(inputTypes,
peragg->numFinalArgs,
aggtranstype,
aggref->aggtype,
aggref->inputcollid,
finalfn_oid,
&finalfnexpr);
fmgr_info(finalfn_oid, &peragg->finalfn);
fmgr_info_set_expr((Node *) finalfnexpr, &peragg->finalfn);
}
/* get info about the output value's datatype */
//获取输出值数据类型的相关信息.
get_typlenbyval(aggref->aggtype,
&peragg->resulttypeLen,
&peragg->resulttypeByVal);
/*
* initval is potentially null, so don't try to access it as a struct
* field. Must do it the hard way with SysCacheGetAttr.
* initval可能是null,不要尝试通过结构体域的方式访问该变量.
* 通过SysCacheGetAttr访问.
*/
textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple,
Anum_pg_aggregate_agginitval,
&initValueIsNull);
if (initValueIsNull)
initValue = (Datum) 0;
else
initValue = GetAggInitVal(textInitVal, aggtranstype);
/*
* 2. Build working state for invoking the transition function, or
* look up previously initialized working state, if we can share it.
* 2. 为调用转换函数创建工作状态,或者检索先前已初始化的工作状态(如可共享).
*
* find_compatible_peragg() already collected a list of shareable
* per-Trans's with the same inputs. Check if any of them have the
* same transition function and initial value.
* find_compatible_peragg() 已收集了具备相同输入的per-Trans共享链表.
* 检查链表中是否存在相同转换函数和初始值的的per-Trans.
*/
existing_transno = find_compatible_pertrans(aggstate, aggref,
shareable,
transfn_oid, aggtranstype,
serialfn_oid, deserialfn_oid,
initValue, initValueIsNull,
same_input_transnos);
if (existing_transno != -1)
{
/*
* Existing compatible trans found, so just point the 'peragg' to
* the same per-trans struct, and mark the trans state as shared.
* 发现兼容的per-Trans,把peragg指向相同的per-trans结构体,同时标记trans状态为共享.
*/
pertrans = &pertransstates[existing_transno];
pertrans->aggshared = true;
peragg->transno = existing_transno;
}
else
{
pertrans = &pertransstates[++transno];
build_pertrans_for_aggref(pertrans, aggstate, estate,
aggref, transfn_oid, aggtranstype,
serialfn_oid, deserialfn_oid,
initValue, initValueIsNull,
inputTypes, numArguments);
peragg->transno = transno;
}
ReleaseSysCache(aggTuple);
}
/*
* Update aggstate->numaggs to be the number of unique aggregates found.
* Also set numstates to the number of unique transition states found.
* 更新aggstate->numaggs变量为唯一的聚合函数个数.
* 同时设置numstates为唯一的转换状态个数.
*/
aggstate->numaggs = aggno + 1;
aggstate->numtrans = transno + 1;
/*
* Last, check whether any more aggregates got added onto the node while
* we processed the expressions for the aggregate arguments (including not
* only the regular arguments and FILTER expressions handled immediately
* above, but any direct arguments we might've handled earlier). If so,
* we have nested aggregate functions, which is semantically nonsensical,
* so complain. (This should have been caught by the parser, so we don't
* need to work hard on a helpful error message; but we defend against it
* here anyway, just to be sure.)
* 最后,在处理聚合函数参数表达式时检查是否有更多的聚合函数添加到节点中.
* (除了常规参数以及上述马上被处理的FILTER表达式外,还有所有先前已处理的所有直接参数)
* 如存在,意味着存在嵌套聚合函数,这在语义上是不可能的,因此提示错误.
* (解析器应该可以处理这周情况,因此不需要执行更多的处理,但为了安全起见,需要在这里检查)
*/
if (numaggs != list_length(aggstate->aggs))
ereport(ERROR,
(errcode(ERRCODE_GROUPING_ERROR),
errmsg("aggregate function calls cannot be nested")));
/*
* Build expressions doing all the transition work at once. We build a
* different one for each phase, as the number of transition function
* invocation can differ between phases. Note this'll work both for
* transition and combination functions (although there'll only be one
* phase in the latter case).
* 构建一次完成所有转换工作的表达式.
* 每个阶段构建一个表达式,因为不同的阶段转换函数的调用方式可能不同.
* 注意这对转换函数和组合函数同样有效(尽管在一种情况下只有一个阶段)
*/
for (phaseidx = 0; phaseidx < aggstate->numphases; phaseidx++)
{
AggStatePerPhase phase = &aggstate->phases[phaseidx];
bool dohash = false;
bool dosort = false;
/* phase 0 doesn't necessarily exist */
//第一阶段可能不存在
if (!phase->aggnode)
continue;
if (aggstate->aggstrategy == AGG_MIXED && phaseidx == 1)
{
/*
* Phase one, and only phase one, in a mixed agg performs both
* sorting and aggregation.
* 当且仅当阶段1,在mixed agg,执行排序和聚合.
*/
dohash = true;
dosort = true;
}
else if (aggstate->aggstrategy == AGG_MIXED && phaseidx == 0)
{
/*
* No need to compute a transition function for an AGG_MIXED phase
* 0 - the contents of the hashtables will have been computed
* during phase 1.
* 在AGG_MIXED阶段0,不需要计算转换函数.
* 哈希表的内容在第1阶段已完成计算.
*/
continue;
}
else if (phase->aggstrategy == AGG_PLAIN ||
phase->aggstrategy == AGG_SORTED)
{
dohash = false;
dosort = true;
}
else if (phase->aggstrategy == AGG_HASHED)
{
dohash = true;
dosort = false;
}
else
Assert(false);
phase->evaltrans = ExecBuildAggTrans(aggstate, phase, dosort, dohash);
}
return aggstate;
}
测试脚本
//禁用并行
testdb=# set max_parallel_workers_per_gather=0;
SET
testdb=# explain verbose select bh,avg(c1),min(c1),max(c2) from t_agg group by bh;
QUERY PLAN
---------------------------------------------------------------------------
HashAggregate (cost=13677.00..13677.06 rows=5 width=45)
Output: bh, avg(c1), min(c1), max(c2)
Group Key: t_agg.bh
-> Seq Scan on public.t_agg (cost=0.00..8677.00 rows=500000 width=13)
Output: bh, c1, c2, c3, c4, c5, c6
(5 rows)
跟踪分析
(gdb) b ExecInitAgg
Breakpoint 1 at 0x6eefc9: file nodeAgg.c, line 2096.
(gdb) c
Continuing.
Breakpoint 1, ExecInitAgg (node=0x2d903a0, estate=0x2d52428, eflags=16) at nodeAgg.c:2096
2096 Bitmapset *all_grouped_cols = NULL;
(gdb)
输入参数
(gdb) p *node
$1 = {plan = {type = T_Agg, startup_cost = 13677, total_cost = 13677.0625, plan_rows = 5, plan_width = 45,
parallel_aware = false, parallel_safe = false, plan_node_id = 0, targetlist = 0x2d631f8, qual = 0x0,
lefttree = 0x2d62cb8, righttree = 0x0, initPlan = 0x0, extParam = 0x0, allParam = 0x0}, aggstrategy = AGG_HASHED,
aggsplit = AGGSPLIT_SIMPLE, numCols = 1, grpColIdx = 0x2d62fa8, grpOperators = 0x2d62f88, numGroups = 5, aggParams = 0x0,
groupingSets = 0x0, chain = 0x0}
(gdb) p *estate
$2 = {type = T_EState, es_direction = ForwardScanDirection, es_snapshot = 0x2d00b80, es_crosscheck_snapshot = 0x0,
es_range_table = 0x2d62ff0, es_plannedstmt = 0x2c72530,
es_sourceText = 0x2c70d78 "select bh,avg(c1),min(c1),max(c2) from t_agg group by bh;", es_junkFilter = 0x0,
es_output_cid = 0, es_result_relations = 0x0, es_num_result_relations = 0, es_result_relation_info = 0x0,
es_root_result_relations = 0x0, es_num_root_result_relations = 0, es_tuple_routing_result_relations = 0x0,
es_trig_target_relations = 0x0, es_trig_tuple_slot = 0x0, es_trig_oldtup_slot = 0x0, es_trig_newtup_slot = 0x0,
es_param_list_info = 0x0, es_param_exec_vals = 0x0, es_queryEnv = 0x0, es_query_cxt = 0x2d52310, es_tupleTable = 0x0,
es_rowMarks = 0x0, es_processed = 0, es_lastoid = 0, es_top_eflags = 16, es_instrument = 0, es_finished = false,
es_exprcontexts = 0x0, es_subplanstates = 0x0, es_auxmodifytables = 0x0, es_per_tuple_exprcontext = 0x0,
es_epqTuple = 0x0, es_epqTupleSet = 0x0, es_epqScanDone = 0x0, es_use_parallel_mode = false, es_query_dsa = 0x0,
es_jit_flags = 0, es_jit = 0x0, es_jit_worker_instr = 0x0}
(gdb)
使用Hash算法计算
(gdb) n
2097 int numGroupingSets = 1;
(gdb)
2100 int i = 0;
(gdb)
2101 int j = 0;
(gdb)
2102 bool use_hashing = (node->aggstrategy == AGG_HASHED ||
(gdb)
2106 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
(gdb) p use_hashing
$3 = true
(gdb)
1.初始化AggState结构体
(gdb) n
2111 aggstate = makeNode(AggState);
(gdb)
2112 aggstate->ss.ps.plan = (Plan *) node;
(gdb)
2113 aggstate->ss.ps.state = estate;
(gdb)
2114 aggstate->ss.ps.ExecProcNode = ExecAgg;
(gdb)
2116 aggstate->aggs = NIL;
(gdb)
2117 aggstate->numaggs = 0;
(gdb)
2118 aggstate->numtrans = 0;
(gdb)
2119 aggstate->aggstrategy = node->aggstrategy;
(gdb)
2120 aggstate->aggsplit = node->aggsplit;
(gdb)
2121 aggstate->maxsets = 0;
(gdb)
2122 aggstate->projected_set = -1;
(gdb)
2123 aggstate->current_set = 0;
(gdb)
2124 aggstate->peragg = NULL;
(gdb)
2125 aggstate->pertrans = NULL;
(gdb)
2126 aggstate->curperagg = NULL;
(gdb)
2127 aggstate->curpertrans = NULL;
(gdb)
2128 aggstate->input_done = false;
(gdb)
2129 aggstate->agg_done = false;
(gdb)
2130 aggstate->pergroups = NULL;
(gdb)
2131 aggstate->grp_firstTuple = NULL;
(gdb)
2132 aggstate->sort_in = NULL;
(gdb)
2133 aggstate->sort_out = NULL;
(gdb)
(gdb) p *aggstate
$4 = {ss = {ps = {type = T_AggState, plan = 0x2d903a0, state = 0x2d52428, ExecProcNode = 0x6ee438 <ExecAgg>,
ExecProcNodeReal = 0x0, instrument = 0x0, worker_instrument = 0x0, worker_jit_instrument = 0x0, qual = 0x0,
lefttree = 0x0, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0, ps_ResultTupleSlot = 0x0,
ps_ExprContext = 0x0, ps_ProjInfo = 0x0, scandesc = 0x0}, ss_currentRelation = 0x0, ss_currentScanDesc = 0x0,
ss_ScanTupleSlot = 0x0}, aggs = 0x0, numaggs = 0, numtrans = 0, aggstrategy = AGG_HASHED, aggsplit = AGGSPLIT_SIMPLE,
phase = 0x0, numphases = 0, current_phase = 0, peragg = 0x0, pertrans = 0x0, hashcontext = 0x0, aggcontexts = 0x0,
tmpcontext = 0x0, curaggcontext = 0x0, curperagg = 0x0, curpertrans = 0x0, input_done = false, agg_done = false,
projected_set = -1, current_set = 0, grouped_cols = 0x0, all_grouped_cols = 0x0, maxsets = 0, phases = 0x0,
sort_in = 0x0, sort_out = 0x0, sort_slot = 0x0, pergroups = 0x0, grp_firstTuple = 0x0, table_filled = false,
num_hashes = 0, perhash = 0x0, hash_pergroup = 0x0, all_pergroups = 0x0, combinedproj = 0x0}
(gdb)
2.计算分为几个阶段(Hash vs Group)
(gdb)
2138 numPhases = (use_hashing ? 1 : 2);
(gdb) p numPhases
$5 = 1
(gdb) p numHashes
$6 = 1
(gdb)
Hash只需要一个阶段,执行Hash
3.如存在grouping set,则初始化相关信息
(gdb) n
2168 aggstate->maxsets = numGroupingSets;
这里没有grouping set,不需要初始化相关信息
4.分配内存上下文
(gdb)
2169 aggstate->numphases = numPhases;
(gdb)
2172 palloc0(sizeof(ExprContext *) * numGroupingSets);
(gdb)
2171 aggstate->aggcontexts = (ExprContext **)
(gdb)
2188 ExecAssignExprContext(estate, &aggstate->ss.ps);
(gdb)
2189 aggstate->tmpcontext = aggstate->ss.ps.ps_ExprContext;
(gdb)
2191 for (i = 0; i < numGroupingSets; ++i)
(gdb)
2193 ExecAssignExprContext(estate, &aggstate->ss.ps);
(gdb)
2194 aggstate->aggcontexts[i] = aggstate->ss.ps.ps_ExprContext;
(gdb)
2191 for (i = 0; i < numGroupingSets; ++i)
(gdb)
2197 if (use_hashing)
(gdb)
2199 ExecAssignExprContext(estate, &aggstate->ss.ps);
(gdb)
2200 aggstate->hashcontext = aggstate->ss.ps.ps_ExprContext;
(gdb)
2203 ExecAssignExprContext(estate, &aggstate->ss.ps);
(gdb)
2211 if (node->aggstrategy == AGG_HASHED)
(gdb)
2212 eflags &= ~EXEC_FLAG_REWIND;
(gdb)
5.初始化outer plan子节点
(gdb)
2213 outerPlan = outerPlan(node);
(gdb) n
2214 outerPlanState(aggstate) = ExecInitNode(outerPlan, estate, eflags);
(gdb) p *outerPlan
$7 = {type = T_SeqScan, startup_cost = 0, total_cost = 8677, plan_rows = 500000, plan_width = 13, parallel_aware = false,
parallel_safe = false, plan_node_id = 1, targetlist = 0x2d62770, qual = 0x0, lefttree = 0x0, righttree = 0x0,
initPlan = 0x0, extParam = 0x0, allParam = 0x0}
outer(左树)节点为SeqScan,顺序全表扫描.
6.初始化结果类型,slot和投影
(gdb) n
2219 ExecCreateScanSlotFromOuterPlan(estate, &aggstate->ss);
(gdb) n
2220 scanDesc = aggstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
(gdb) n
2221 if (node->chain)
(gdb) p *aggstate
$8 = {ss = {ps = {type = T_AggState, plan = 0x2d903a0, state = 0x2d52428, ExecProcNode = 0x6ee438 <ExecAgg>,
ExecProcNodeReal = 0x0, instrument = 0x0, worker_instrument = 0x0, worker_jit_instrument = 0x0, qual = 0x0,
lefttree = 0x2d52bb0, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0, ps_ResultTupleSlot = 0x0,
ps_ExprContext = 0x2d52af0, ps_ProjInfo = 0x0, scandesc = 0x2d52f00}, ss_currentRelation = 0x0,
ss_currentScanDesc = 0x0, ss_ScanTupleSlot = 0x2d53458}, aggs = 0x0, numaggs = 0, numtrans = 0,
aggstrategy = AGG_HASHED, aggsplit = AGGSPLIT_SIMPLE, phase = 0x0, numphases = 1, current_phase = 0, peragg = 0x0,
pertrans = 0x0, hashcontext = 0x2d52a30, aggcontexts = 0x2d52858, tmpcontext = 0x2d52878, curaggcontext = 0x0,
curperagg = 0x0, curpertrans = 0x0, input_done = false, agg_done = false, projected_set = -1, current_set = 0,
grouped_cols = 0x0, all_grouped_cols = 0x0, maxsets = 1, phases = 0x0, sort_in = 0x0, sort_out = 0x0, sort_slot = 0x0,
pergroups = 0x0, grp_firstTuple = 0x0, table_filled = false, num_hashes = 0, perhash = 0x0, hash_pergroup = 0x0,
all_pergroups = 0x0, combinedproj = 0x0}
(gdb)
(gdb) p *scanDesc
$9 = {natts = 7, tdtypeid = 2249, tdtypmod = -1, tdhasoid = false, tdrefcount = -1, constr = 0x0, attrs = 0x2d52f20}
(gdb) p *aggstate->ss.ps.scandesc
$10 = {natts = 7, tdtypeid = 2249, tdtypmod = -1, tdhasoid = false, tdrefcount = -1, constr = 0x0, attrs = 0x2d52f20}
(gdb)
(gdb) n
2227 ExecInitResultTupleSlotTL(estate, &aggstate->ss.ps);
(gdb)
2228 ExecAssignProjectionInfo(&aggstate->ss.ps, NULL);
(gdb)
2244 ExecInitQual(node->plan.qual, (PlanState *) aggstate);
(gdb) p *aggstate
$11 = {ss = {ps = {type = T_AggState, plan = 0x2d903a0, state = 0x2d52428, ExecProcNode = 0x6ee438 <ExecAgg>,
ExecProcNodeReal = 0x0, instrument = 0x0, worker_instrument = 0x0, worker_jit_instrument = 0x0, qual = 0x0,
lefttree = 0x2d52bb0, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0, ps_ResultTupleSlot = 0x2d537b0,
ps_ExprContext = 0x2d52af0, ps_ProjInfo = 0x2d538f0, scandesc = 0x2d52f00}, ss_currentRelation = 0x0,
ss_currentScanDesc = 0x0, ss_ScanTupleSlot = 0x2d53458}, aggs = 0x2d53e00, numaggs = 3, numtrans = 0,
aggstrategy = AGG_HASHED, aggsplit = AGGSPLIT_SIMPLE, phase = 0x0, numphases = 1, current_phase = 0, peragg = 0x0,
pertrans = 0x0, hashcontext = 0x2d52a30, aggcontexts = 0x2d52858, tmpcontext = 0x2d52878, curaggcontext = 0x0,
curperagg = 0x0, curpertrans = 0x0, input_done = false, agg_done = false, projected_set = -1, current_set = 0,
grouped_cols = 0x0, all_grouped_cols = 0x0, maxsets = 1, phases = 0x0, sort_in = 0x0, sort_out = 0x0, sort_slot = 0x0,
pergroups = 0x0, grp_firstTuple = 0x0, table_filled = false, num_hashes = 0, perhash = 0x0, hash_pergroup = 0x0,
all_pergroups = 0x0, combinedproj = 0x0}
(gdb) p *aggstate->ss.ps.scandesc
$12 = {natts = 7, tdtypeid = 2249, tdtypmod = -1, tdhasoid = false, tdrefcount = -1, constr = 0x0, attrs = 0x2d52f20}
#### 结果元组Slot
(gdb) p *aggstate->ss.ps.ps_ResultTupleSlot
$13 = {type = T_TupleTableSlot, tts_isempty = true, tts_shouldFree = false, tts_shouldFreeMin = false, tts_slow = false,
tts_tuple = 0x0, tts_tupleDescriptor = 0x2d53598, tts_mcxt = 0x2d52310, tts_buffer = 0, tts_nvalid = 0,
tts_values = 0x2d53810, tts_isnull = 0x2d53830, tts_mintuple = 0x0, tts_minhdr = {t_len = 0, t_self = {ip_blkid = {
bi_hi = 0, bi_lo = 0}, ip_posid = 0}, t_tableOid = 0, t_data = 0x0}, tts_off = 0, tts_fixedTupleDescriptor = true}
### 投影信息
(gdb) p *aggstate->ss.ps.ps_ProjInfo
$14 = {type = T_ProjectionInfo, pi_state = {tag = {type = T_ExprState}, flags = 6 '\006', resnull = false, resvalue = 0,
resultslot = 0x2d537b0, steps = 0x2d53988, evalfunc = 0x6cd882 <ExecInterpExprStillValid>, expr = 0x2d631f8,
evalfunc_private = 0x6cb43e <ExecInterpExpr>, steps_len = 9, steps_alloc = 16, parent = 0x2d52640, ext_params = 0x0,
innermost_caseval = 0x0, innermost_casenull = 0x0, innermost_domainval = 0x0, innermost_domainnull = 0x0},
pi_exprContext = 0x2d52af0}
(gdb)
7.初始化子表达式
(gdb) n
2243 aggstate->ss.ps.qual =
(gdb)
2249 numaggs = aggstate->numaggs;
(gdb) p *aggstate->ss.ps.qual
Cannot access memory at address 0x0
(gdb)
(gdb) n
2250 Assert(numaggs == list_length(aggstate->aggs));
(gdb) p aggstate->numaggs
$16 = 3
表达式为NULL,一共有3个聚合函数
8.为AggStatePerPhaseData/AggStatePerHashData等结构体分配内存
(gdb) n
2256 aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData));
(gdb)
2258 aggstate->num_hashes = numHashes;
(gdb)
2259 if (numHashes)
(gdb)
2261 aggstate->perhash = palloc0(sizeof(AggStatePerHashData) * numHashes);
(gdb)
2262 aggstate->phases[0].numsets = 0;
(gdb)
2263 aggstate->phases[0].gset_lengths = palloc(numHashes * sizeof(int));
(gdb) n
2264 aggstate->phases[0].grouped_cols = palloc(numHashes * sizeof(Bitmapset *));
(gdb)
2267 phase = 0;
(gdb)
(gdb) p aggstate->phases[0]
$17 = {aggstrategy = AGG_PLAIN, numsets = 0, gset_lengths = 0x2d5
9.循环遍历各个阶段
9.1计算分组列,存储在phasedata->grouped_cols数组和all_grouped_cols中
9.2初始化AggState->phases数组(数组元素对应的结构体为AggStatePerPhase)
9.3初始化AggState->perhash数组(对应的结构体为AggStatePerHash)
(gdb) n
2268 for (phaseidx = 0; phaseidx <= list_length(node->chain); ++phaseidx)
(gdb) p list_length(node->chain)
$18 = 0
(gdb) n
2273 if (phaseidx > 0)
(gdb)
2280 aggnode = node;
(gdb) p *node
$19 = {plan = {type = T_Agg, startup_cost = 13677, total_cost = 13677.0625, plan_rows = 5, plan_width = 45,
parallel_aware = false, parallel_safe = false, plan_node_id = 0, targetlist = 0x2d631f8, qual = 0x0,
lefttree = 0x2d62cb8, righttree = 0x0, initPlan = 0x0, extParam = 0x0, allParam = 0x0}, aggstrategy = AGG_HASHED,
aggsplit = AGGSPLIT_SIMPLE, numCols = 1, grpColIdx = 0x2d62fa8, grpOperators = 0x2d62f88, numGroups = 5, aggParams = 0x0,
groupingSets = 0x0, chain = 0x0}
(gdb) n
2281 sortnode = NULL;
(gdb)
2284 Assert(phase <= 1 || sortnode);
(gdb)
2286 if (aggnode->aggstrategy == AGG_HASHED
(gdb)
2289 AggStatePerPhase phasedata = &aggstate->phases[0];
(gdb)
2291 Bitmapset *cols = NULL;
(gdb)
2293 Assert(phase == 0);
(gdb)
2294 i = phasedata->numsets++;
(gdb)
2295 perhash = &aggstate->perhash[i];
(gdb)
2298 phasedata->aggnode = node;
(gdb) p *phasedata
$20 = {aggstrategy = AGG_PLAIN, numsets = 1, gset_lengths = 0x2d53fe8, grouped_cols = 0x2d54008, eqfunctions = 0x0,
aggnode = 0x0, sortnode = 0x0, evaltrans = 0x0}
(gdb) p i
$21 = 0
(gdb) n
2299 phasedata->aggstrategy = node->aggstrategy;
(gdb)
2302 perhash->aggnode = aggnode;
(gdb)
2304 phasedata->gset_lengths[i] = perhash->numCols = aggnode->numCols;
(gdb)
2306 for (j = 0; j < aggnode->numCols; ++j)
(gdb) p aggnode->numCols
$22 = 1
(gdb) n
2307 cols = bms_add_member(cols, aggnode->grpColIdx[j]);
(gdb)
2306 for (j = 0; j < aggnode->numCols; ++j)
(gdb)
2309 phasedata->grouped_cols[i] = cols;
(gdb) p cols
$23 = (Bitmapset *) 0x2d54028
(gdb) p *cols
$24 = {nwords = 1, words = 0x2d5402c}
(gdb) p *cols->words
$25 = 2
(gdb) n
2311 all_grouped_cols = bms_add_members(all_grouped_cols, cols);
(gdb)
2312 continue;
(gdb) p all_grouped_cols
$26 = (Bitmapset *) 0x2d54048
(gdb) p *all_grouped_cols
$27 = {nwords = 1, words = 0x2d5404c}
(gdb) p *all_grouped_cols->words
$28 = 2
(gdb) n
2268 for (phaseidx = 0; phaseidx <= list_length(node->chain); ++phaseidx)
(gdb)
2406 i = -1;
10.转换all_grouped_cols为倒序链表
2407 while ((i = bms_next_member(all_grouped_cols, i)) >= 0)
(gdb) p *all_grouped_cols
$29 = {nwords = 1, words = 0x2d5404c}
(gdb) n
2408 aggstate->all_grouped_cols = lcons_int(i, aggstate->all_grouped_cols);
(gdb)
2407 while ((i = bms_next_member(all_grouped_cols, i)) >= 0)
(gdb)
11.在输出expr上下文中设置aggregate-result存储,同时分配私有per-agg工作存储
(gdb)
2414 econtext = aggstate->ss.ps.ps_ExprContext;
(gdb)
2415 econtext->ecxt_aggvalues = (Datum *) palloc0(sizeof(Datum) * numaggs);
(gdb)
2416 econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numaggs);
(gdb)
2418 peraggs = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData) * numaggs);
(gdb)
2419 pertransstates = (AggStatePerTrans) palloc0(sizeof(AggStatePerTransData) * numaggs);
(gdb)
2421 aggstate->peragg = peraggs;
(gdb)
2422 aggstate->pertrans = pertransstates;
(gdb)
2427 * (numGroupingSets + numHashes));
(gdb)
2426 (AggStatePerGroup *) palloc0(sizeof(AggStatePerGroup)
(gdb)
2425 aggstate->all_pergroups =
(gdb)
2428 pergroups = aggstate->all_pergroups;
(gdb)
2430 if (node->aggstrategy != AGG_HASHED)
(gdb)
12.如使用Hash算法,则调用find_hash_columns和build_hash_table方法初始化相关数据
(gdb)
2445 if (use_hashing)
(gdb)
2448 aggstate->hash_pergroup = pergroups;
(gdb)
2450 find_hash_columns(aggstate);
(gdb)
2451 build_hash_table(aggstate);
(gdb)
2452 aggstate->table_filled = false;
(gdb)
2461 if (node->aggstrategy == AGG_HASHED)
(gdb) p *aggstate
$30 = {ss = {ps = {type = T_AggState, plan = 0x2d903a0, state = 0x2d52428, ExecProcNode = 0x6ee438 <ExecAgg>,
ExecProcNodeReal = 0x0, instrument = 0x0, worker_instrument = 0x0, worker_jit_instrument = 0x0, qual = 0x0,
lefttree = 0x2d52bb0, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0, ps_ResultTupleSlot = 0x2d537b0,
ps_ExprContext = 0x2d52af0, ps_ProjInfo = 0x2d538f0, scandesc = 0x2d52f00}, ss_currentRelation = 0x0,
ss_currentScanDesc = 0x0, ss_ScanTupleSlot = 0x2d53458}, aggs = 0x2d53e00, numaggs = 3, numtrans = 0,
aggstrategy = AGG_HASHED, aggsplit = AGGSPLIT_SIMPLE, phase = 0x0, numphases = 1, current_phase = 0, peragg = 0x2d54770,
pertrans = 0x2d56780, hashcontext = 0x2d52a30, aggcontexts = 0x2d52858, tmpcontext = 0x2d52878, curaggcontext = 0x0,
curperagg = 0x0, curpertrans = 0x0, input_done = false, agg_done = false, projected_set = -1, current_set = 0,
grouped_cols = 0x0, all_grouped_cols = 0x2d54090, maxsets = 1, phases = 0x2d53ef8, sort_in = 0x0, sort_out = 0x0,
sort_slot = 0x0, pergroups = 0x0, grp_firstTuple = 0x0, table_filled = false, num_hashes = 1, perhash = 0x2d53f50,
hash_pergroup = 0x2d54988, all_pergroups = 0x2d54988, combinedproj = 0x0}
(gdb) p *aggstate->perhash
$31 = {hashtable = 0x2d54ad8, hashiter = {cur = 0, end = 0, done = false}, hashslot = 0x2d54238, hashfunctions = 0x2d542d0,
eqfuncoids = 0x2d54a90, numCols = 1, numhashGrpCols = 1, largestGrpColIdx = 1, hashGrpColIdxInput = 0x2d549f0,
hashGrpColIdxHash = 0x2d54a10, aggnode = 0x2d903a0}
(gdb) p *aggstate->hash_pergroup
$32 = (AggStatePerGroup) 0x0
(gdb) p *aggstate->all_pergroups
$33 = (AggStatePerGroup) 0x0
(gdb) p *aggstate->phases
$34 = {aggstrategy = AGG_HASHED, numsets = 1, gset_lengths = 0x2d53fe8, grouped_cols = 0x2d54008, eqfunctions = 0x0,
aggnode = 0x2d903a0, sortnode = 0x0, evaltrans = 0x0}
(gdb)
13.调用initialize_phase/select_current_set初始化阶段数据
(gdb) n
2463 aggstate->current_phase = 0;
(gdb)
2464 initialize_phase(aggstate, 0);
(gdb)
2465 select_current_set(aggstate, 0, true);
(gdb)
2510 aggno = -1;
(gdb) p *aggstate->phases
$35 = {aggstrategy = AGG_HASHED, numsets = 1, gset_lengths = 0x2d53fe8, grouped_cols = 0x2d54008, eqfunctions = 0x0,
aggnode = 0x2d903a0, sortnode = 0x0, evaltrans = 0x0}
(gdb)
14.检索聚合函数信息,初始化per-agg和per-trans数据不可变字段
(gdb) n
2463 aggstate->current_phase = 0;
(gdb)
2464 initialize_phase(aggstate, 0);
(gdb)
2465 select_current_set(aggstate, 0, true);
(gdb)
2510 aggno = -1;
(gdb) p *aggstate->phases
$35 = {aggstrategy = AGG_HASHED, numsets = 1, gset_lengths = 0x2d53fe8, grouped_cols = 0x2d54008, eqfunctions = 0x0,
aggnode = 0x2d903a0, sortnode = 0x0, evaltrans = 0x0}
(gdb) n
2511 transno = -1;
(gdb)
2512 foreach(l, aggstate->aggs)
(gdb)
2514 AggrefExprState *aggrefstate = (AggrefExprState *) lfirst(l);
(gdb)
2515 Aggref *aggref = aggrefstate->aggref;
(gdb)
2539 Assert(aggref->agglevelsup == 0);
(gdb) p aggstate->aggs
$36 = (List *) 0x2d53e00
(gdb) p *aggstate->aggs
$37 = {type = T_List, length = 3, head = 0x2d53ed0, tail = 0x2d53dd8}
(gdb) n
2541 Assert(aggref->aggsplit == aggstate->aggsplit);
(gdb)
2544 existing_aggno = find_compatible_peragg(aggref, aggstate, aggno,
(gdb)
2546 if (existing_aggno != -1)
(gdb)
2557 peragg = &peraggs[++aggno];
(gdb)
2558 peragg->aggref = aggref;
(gdb)
2559 aggrefstate->aggno = aggno;
(gdb)
2563 ObjectIdGetDatum(aggref->aggfnoid));
(gdb)
2562 aggTuple = SearchSysCache1(AGGFNOID,
(gdb) p aggref->aggfnoid
$38 = 2116
(gdb) n
2564 if (!HeapTupleIsValid(aggTuple))
(gdb) p *aggTuple
$39 = {t_len = 96, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 0}, ip_posid = 17}, t_tableOid = 2600, t_data = 0x7fa0c01f1630}
(gdb) p *aggTuple->t_data
$40 = {t_choice = {t_heap = {t_xmin = 1, t_xmax = 0, t_field3 = {t_cid = 0, t_xvac = 0}}, t_datum = {datum_len_ = 1,
datum_typmod = 0, datum_typeid = 0}}, t_ctid = {ip_blkid = {bi_hi = 0, bi_lo = 0}, ip_posid = 17}, t_infomask2 = 22,
t_infomask = 2305, t_hoff = 32 ' ', t_bits = 0x7fa0c01f1647 "\377\377\017"}
(gdb) n
2567 aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
(gdb)
2570 aclresult = pg_proc_aclcheck(aggref->aggfnoid, GetUserId(),
(gdb) p *aggform
$41 = {aggfnoid = 2116, aggkind = 110 'n', aggnumdirectargs = 0, aggtransfn = 768, aggfinalfn = 0, aggcombinefn = 768,
aggserialfn = 0, aggdeserialfn = 0, aggmtransfn = 0, aggminvtransfn = 0, aggmfinalfn = 0, aggfinalextra = false,
aggmfinalextra = false, aggfinalmodify = 114 'r', aggmfinalmodify = 114 'r', aggsortop = 521, aggtranstype = 23,
aggtransspace = 0, aggmtranstype = 0, aggmtransspace = 0}
(gdb) n
2572 if (aclresult != ACLCHECK_OK)
(gdb)
2575 InvokeFunctionExecuteHook(aggref->aggfnoid);
(gdb)
2578 aggtranstype = aggref->aggtranstype;
(gdb)
2579 Assert(OidIsValid(aggtranstype));
(gdb)
2585 if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
(gdb)
2594 transfn_oid = aggform->aggtransfn;
(gdb)
2597 if (DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit))
(gdb) p transfn_oid
$42 = 768
(gdb) n
2600 peragg->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
(gdb)
2607 shareable = (aggform->aggfinalmodify != AGGMODIFY_READ_WRITE) ||
(gdb) p aggform->aggfinalfn
$43 = 0
(gdb) n
2609 peragg->shareable = shareable;
(gdb)
2611 serialfn_oid = InvalidOid;
(gdb) p shareable
$44 = true
(gdb) n
2612 deserialfn_oid = InvalidOid;
(gdb)
2618 if (aggtranstype == INTERNALOID)
(gdb)
2653 ObjectIdGetDatum(aggref->aggfnoid));
(gdb)
2652 procTuple = SearchSysCache1(PROCOID,
(gdb)
2654 if (!HeapTupleIsValid(procTuple))
(gdb)
2657 aggOwner = ((Form_pg_proc) GETSTRUCT(procTuple))->proowner;
(gdb)
2658 ReleaseSysCache(procTuple);
(gdb)
2660 aclresult = pg_proc_aclcheck(transfn_oid, aggOwner,
(gdb)
2662 if (aclresult != ACLCHECK_OK)
(gdb)
2665 InvokeFunctionExecuteHook(transfn_oid);
(gdb)
2666 if (OidIsValid(finalfn_oid))
(gdb)
2675 if (OidIsValid(serialfn_oid))
(gdb)
2684 if (OidIsValid(deserialfn_oid))
(gdb)
2700 numArguments = get_aggregate_argtypes(aggref, inputTypes);
(gdb)
2703 numDirectArgs = list_length(aggref->aggdirectargs);
(gdb)
2706 if (aggform->aggfinalextra)
(gdb)
2709 peragg->numFinalArgs = numDirectArgs + 1;
(gdb)
2712 peragg->aggdirectargs = ExecInitExprList(aggref->aggdirectargs,
(gdb)
2719 if (OidIsValid(finalfn_oid))
(gdb)
2733 get_typlenbyval(aggref->aggtype,
(gdb)
2741 textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple,
(gdb)
2744 if (initValueIsNull)
(gdb)
2745 initValue = (Datum) 0;
(gdb)
2757 existing_transno = find_compatible_pertrans(aggstate, aggref,
(gdb)
2763 if (existing_transno != -1)
(gdb)
2775 pertrans = &pertransstates[++transno];
(gdb)
2776 build_pertrans_for_aggref(pertrans, aggstate, estate,
(gdb)
2781 peragg->transno = transno;
(gdb)
2783 ReleaseSysCache(aggTuple);
(gdb)
2512 foreach(l, aggstate->aggs)
(gdb)
########
testdb=# select oid,proname from pg_proc where oid in (2116,768);
oid | proname
------+------------
768 | int4larger
2116 | max
(2 rows)
########
下一个循环
...
(gdb) p *aggref
$45 = {xpr = {type = T_Aggref}, aggfnoid = 2132, aggtype = 23, aggcollid = 0, inputcollid = 0, aggtranstype = 23,
aggargtypes = 0x2d63578, aggdirectargs = 0x0, args = 0x2d63688, aggorder = 0x0, aggdistinct = 0x0, aggfilter = 0x0,
aggstar = false, aggvariadic = false, aggkind = 110 'n', agglevelsup = 0, aggsplit = AGGSPLIT_SIMPLE, location = 18}
...
(gdb) p transfn_oid
$49 = 769
...
testdb=# select oid,proname from pg_proc where oid in (2132,769);
oid | proname
------+-------------
769 | int4smaller
2132 | min
(2 rows)
第3遍循环
...
(gdb) p *aggref
$50 = {xpr = {type = T_Aggref}, aggfnoid = 2101, aggtype = 1700, aggcollid = 0, inputcollid = 0, aggtranstype = 1016,
aggargtypes = 0x2d632f0, aggdirectargs = 0x0, args = 0x2d63400, aggorder = 0x0, aggdistinct = 0x0, aggfilter = 0x0,
aggstar = false, aggvariadic = false, aggkind = 110 'n', agglevelsup = 0, aggsplit = AGGSPLIT_SIMPLE, location = 10}
...
(gdb) p transfn_oid
$51 = 1963
...
2512 foreach(l, aggstate->aggs)
(gdb)
#####
testdb=# select oid,proname from pg_proc where oid in (2101,1963);
oid | proname
------+----------------
1963 | int4_avg_accum
2101 | avg
(2 rows)
#####
15.构建一次就完成所有转换工作的表达式.
(gdb)
2790 aggstate->numaggs = aggno + 1;
(gdb)
2791 aggstate->numtrans = transno + 1;
(gdb)
2803 if (numaggs != list_length(aggstate->aggs))
(gdb)
2815 for (phaseidx = 0; phaseidx < aggstate->numphases; phaseidx++)
(gdb)
2817 AggStatePerPhase phase = &aggstate->phases[phaseidx];
(gdb)
2818 bool dohash = false;
(gdb)
2819 bool dosort = false;
(gdb)
2822 if (!phase->aggnode)
(gdb)
2825 if (aggstate->aggstrategy == AGG_MIXED && phaseidx == 1)
(gdb)
2834 else if (aggstate->aggstrategy == AGG_MIXED && phaseidx == 0)
(gdb)
2843 else if (phase->aggstrategy == AGG_PLAIN ||
(gdb)
2844 phase->aggstrategy == AGG_SORTED)
(gdb)
2843 else if (phase->aggstrategy == AGG_PLAIN ||
(gdb)
2849 else if (phase->aggstrategy == AGG_HASHED)
(gdb)
2851 dohash = true;
(gdb)
2852 dosort = false;
(gdb)
2857 phase->evaltrans = ExecBuildAggTrans(aggstate, phase, dosort, dohash);
(gdb)
2815 for (phaseidx = 0; phaseidx < aggstate->numphases; phaseidx++)
(gdb)
2861 return aggstate;
(gdb)
最终结果
AggState结构体
(gdb) p *aggstate
$52 = {ss = {ps = {type = T_AggState, plan = 0x2d903a0, state = 0x2d52428, ExecProcNode = 0x6ee438 <ExecAgg>,
ExecProcNodeReal = 0x0, instrument = 0x0, worker_instrument = 0x0, worker_jit_instrument = 0x0, qual = 0x0,
lefttree = 0x2d52bb0, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0, ps_ResultTupleSlot = 0x2d537b0,
ps_ExprContext = 0x2d52af0, ps_ProjInfo = 0x2d538f0, scandesc = 0x2d52f00}, ss_currentRelation = 0x0,
ss_currentScanDesc = 0x0, ss_ScanTupleSlot = 0x2d53458}, aggs = 0x2d53e00, numaggs = 3, numtrans = 3,
aggstrategy = AGG_HASHED, aggsplit = AGGSPLIT_SIMPLE, phase = 0x2d53ef8, numphases = 1, current_phase = 0,
peragg = 0x2d54770, pertrans = 0x2d56780, hashcontext = 0x2d52a30, aggcontexts = 0x2d52858, tmpcontext = 0x2d52878,
curaggcontext = 0x2d52a30, curperagg = 0x0, curpertrans = 0x0, input_done = false, agg_done = false, projected_set = -1,
current_set = 0, grouped_cols = 0x0, all_grouped_cols = 0x2d54090, maxsets = 1, phases = 0x2d53ef8, sort_in = 0x0,
sort_out = 0x0, sort_slot = 0x0, pergroups = 0x0, grp_firstTuple = 0x0, table_filled = false, num_hashes = 1,
perhash = 0x2d53f50, hash_pergroup = 0x2d54988, all_pergroups = 0x2d54988, combinedproj = 0x0}
AggState->phase
(gdb) p *aggstate->phase
$53 = {aggstrategy = AGG_HASHED, numsets = 1, gset_lengths = 0x2d53fe8, grouped_cols = 0x2d54008, eqfunctions = 0x0,
aggnode = 0x2d903a0, sortnode = 0x0, evaltrans = 0x2d55e78}
AggState->peragg
(gdb) p *aggstate->peragg
$54 = {aggref = 0x2d63740, transno = 0, finalfn_oid = 0, finalfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0,
fn_strict = false, fn_retset = false, fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0},
numFinalArgs = 1, aggdirectargs = 0x0, resulttypeLen = 4, resulttypeByVal = true, shareable = true}
(gdb) p *aggstate->peragg->aggref
$55 = {xpr = {type = T_Aggref}, aggfnoid = 2116, aggtype = 23, aggcollid = 0, inputcollid = 0, aggtranstype = 23,
aggargtypes = 0x2d63800, aggdirectargs = 0x0, args = 0x2d63910, aggorder = 0x0, aggdistinct = 0x0, aggfilter = 0x0,
aggstar = false, aggvariadic = false, aggkind = 110 'n', agglevelsup = 0, aggsplit = AGGSPLIT_SIMPLE, location = 26}
(gdb) p aggstate->peragg[1]
$56 = {aggref = 0x2d634b8, transno = 1, finalfn_oid = 0, finalfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0,
fn_strict = false, fn_retset = false, fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0},
numFinalArgs = 1, aggdirectargs = 0x0, resulttypeLen = 4, resulttypeByVal = true, shareable = true}
(gdb) p *aggstate->peragg[1]->aggref
$57 = {xpr = {type = T_Aggref}, aggfnoid = 2132, aggtype = 23, aggcollid = 0, inputcollid = 0, aggtranstype = 23,
aggargtypes = 0x2d63578, aggdirectargs = 0x0, args = 0x2d63688, aggorder = 0x0, aggdistinct = 0x0, aggfilter = 0x0,
aggstar = false, aggvariadic = false, aggkind = 110 'n', agglevelsup = 0, aggsplit = AGGSPLIT_SIMPLE, location = 18}
(gdb) p aggstate->peragg[2]
$58 = {aggref = 0x2d63230, transno = 2, finalfn_oid = 1964, finalfn = {fn_addr = 0x978251 <int8_avg>, fn_oid = 1964,
fn_nargs = 1, fn_strict = true, fn_retset = false, fn_stats = 2 '\002', fn_extra = 0x0, fn_mcxt = 0x2d52310,
fn_expr = 0x2d55b80}, numFinalArgs = 1, aggdirectargs = 0x0, resulttypeLen = -1, resulttypeByVal = false,
shareable = true}
(gdb) p *aggstate->peragg[2]->aggref
$59 = {xpr = {type = T_Aggref}, aggfnoid = 2101, aggtype = 1700, aggcollid = 0, inputcollid = 0, aggtranstype = 1016,
aggargtypes = 0x2d632f0, aggdirectargs = 0x0, args = 0x2d63400, aggorder = 0x0, aggdistinct = 0x0, aggfilter = 0x0,
aggstar = false, aggvariadic = false, aggkind = 110 'n', agglevelsup = 0, aggsplit = AGGSPLIT_SIMPLE, location = 10}
AggState->pertrans
(gdb) p aggstate->pertrans[0]
$60 = {aggref = 0x2d63740, aggshared = false, numInputs = 1, numTransInputs = 1, transfn_oid = 768, serialfn_oid = 0,
deserialfn_oid = 0, aggtranstype = 23, transfn = {fn_addr = 0x93e877 <int4larger>, fn_oid = 768, fn_nargs = 2,
fn_strict = true, fn_retset = false, fn_stats = 2 '\002', fn_extra = 0x0, fn_mcxt = 0x2d52310, fn_expr = 0x2d55940},
serialfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, fn_strict = false, fn_retset = false, fn_stats = 0 '\000',
fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, deserialfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0,
fn_strict = false, fn_retset = false, fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0},
aggCollation = 0, numSortCols = 0, numDistinctCols = 0, sortColIdx = 0x0, sortOperators = 0x0, sortCollations = 0x0,
sortNullsFirst = 0x0, equalfnOne = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, fn_strict = false, fn_retset = false,
fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, equalfnMulti = 0x0, initValue = 0,
initValueIsNull = true, inputtypeLen = 0, transtypeLen = 4, inputtypeByVal = false, transtypeByVal = true,
sortslot = 0x0, uniqslot = 0x0, sortdesc = 0x0, sortstates = 0x2d549b0, transfn_fcinfo = {flinfo = 0x2d567a8,
context = 0x2d52640, resultinfo = 0x0, fncollation = 0, isnull = false, nargs = 2, arg = {0 <repeats 100 times>},
argnull = {false <repeats 100 times>}}, serialfn_fcinfo = {flinfo = 0x0, context = 0x0, resultinfo = 0x0,
fncollation = 0, isnull = false, nargs = 0, arg = {0 <repeats 100 times>}, argnull = {false <repeats 100 times>}},
deserialfn_fcinfo = {flinfo = 0x0, context = 0x0, resultinfo = 0x0, fncollation = 0, isnull = false, nargs = 0, arg = {
0 <repeats 100 times>}, argnull = {false <repeats 100 times>}}}
(gdb) p aggstate->pertrans[1]
$61 = {aggref = 0x2d634b8, aggshared = false, numInputs = 1, numTransInputs = 1, transfn_oid = 769, serialfn_oid = 0,
deserialfn_oid = 0, aggtranstype = 23, transfn = {fn_addr = 0x93e8a3 <int4smaller>, fn_oid = 769, fn_nargs = 2,
fn_strict = true, fn_retset = false, fn_stats = 2 '\002', fn_extra = 0x0, fn_mcxt = 0x2d52310, fn_expr = 0x2d55a90},
serialfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, fn_strict = false, fn_retset = false, fn_stats = 0 '\000',
fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, deserialfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0,
fn_strict = false, fn_retset = false, fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0},
aggCollation = 0, numSortCols = 0, numDistinctCols = 0, sortColIdx = 0x0, sortOperators = 0x0, sortCollations = 0x0,
sortNullsFirst = 0x0, equalfnOne = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, fn_strict = false, fn_retset = false,
fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, equalfnMulti = 0x0, initValue = 0,
initValueIsNull = true, inputtypeLen = 0, transtypeLen = 4, inputtypeByVal = false, transtypeByVal = true,
sortslot = 0x0, uniqslot = 0x0, sortdesc = 0x0, sortstates = 0x2d549d0, transfn_fcinfo = {flinfo = 0x2d573f0,
context = 0x2d52640, resultinfo = 0x0, fncollation = 0, isnull = false, nargs = 2, arg = {0 <repeats 100 times>},
argnull = {false <repeats 100 times>}}, serialfn_fcinfo = {flinfo = 0x0, context = 0x0, resultinfo = 0x0,
fncollation = 0, isnull = false, nargs = 0, arg = {0 <repeats 100 times>}, argnull = {false <repeats 100 times>}},
deserialfn_fcinfo = {flinfo = 0x0, context = 0x0, resultinfo = 0x0, fncollation = 0, isnull = false, nargs = 0, arg = {
0 <repeats 100 times>}, argnull = {false <repeats 100 times>}}}
(gdb) p aggstate->pertrans[2]
$62 = {aggref = 0x2d63230, aggshared = false, numInputs = 1, numTransInputs = 1, transfn_oid = 1963, serialfn_oid = 0,
deserialfn_oid = 0, aggtranstype = 1016, transfn = {fn_addr = 0x977d8f <int4_avg_accum>, fn_oid = 1963, fn_nargs = 2,
fn_strict = true, fn_retset = false, fn_stats = 2 '\002', fn_extra = 0x0, fn_mcxt = 0x2d52310, fn_expr = 0x2d55e20},
serialfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, fn_strict = false, fn_retset = false, fn_stats = 0 '\000',
fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, deserialfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0,
fn_strict = false, fn_retset = false, fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0},
aggCollation = 0, numSortCols = 0, numDistinctCols = 0, sortColIdx = 0x0, sortOperators = 0x0, sortCollations = 0x0,
sortNullsFirst = 0x0, equalfnOne = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, fn_strict = false, fn_retset = false,
fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, equalfnMulti = 0x0, initValue = 47537400,
initValueIsNull = false, inputtypeLen = 0, transtypeLen = -1, inputtypeByVal = false, transtypeByVal = false,
sortslot = 0x0, uniqslot = 0x0, sortdesc = 0x0, sortstates = 0x2d55bd8, transfn_fcinfo = {flinfo = 0x2d58038,
context = 0x2d52640, resultinfo = 0x0, fncollation = 0, isnull = false, nargs = 2, arg = {0 <repeats 100 times>},
argnull = {false <repeats 100 times>}}, serialfn_fcinfo = {flinfo = 0x0, context = 0x0, resultinfo = 0x0,
fncollation = 0, isnull = false, nargs = 0, arg = {0 <repeats 100 times>}, argnull = {false <repeats 100 times>}},
deserialfn_fcinfo = {flinfo = 0x0, context = 0x0, resultinfo = 0x0, fncollation = 0, isnull = false, nargs = 0, arg = {
0 <repeats 100 times>}, argnull = {false <repeats 100 times>}}}
AggState->groups相关
(gdb) p *aggstate->pergroups
Cannot access memory at address 0x0
(gdb) p *aggstate->hash_pergroup
$65 = (AggStatePerGroup) 0x0
(gdb) p *aggstate->all_pergroups
$66 = (AggStatePerGroup) 0x0
AggState->perhash
(gdb) p *aggstate->perhash
$67 = {hashtable = 0x2d54ad8, hashiter = {cur = 0, end = 0, done = false}, hashslot = 0x2d54238, hashfunctions = 0x2d542d0,
eqfuncoids = 0x2d54a90, numCols = 1, numhashGrpCols = 1, largestGrpColIdx = 1, hashGrpColIdxInput = 0x2d549f0,
hashGrpColIdxHash = 0x2d54a10, aggnode = 0x2d903a0}
(gdb) p *aggstate->perhash->hashtable
$68 = {hashtab = 0x2d54b70, numCols = 1, keyColIdx = 0x2d54a10, tab_hash_funcs = 0x2d542d0, tab_eq_func = 0x2d54e90,
tablecxt = 0x2d7c450, tempcxt = 0x2d90a00, entrysize = 24, tableslot = 0x2d54df8, inputslot = 0x0, in_hash_funcs = 0x0,
cur_eq_func = 0x0, hash_iv = 0, exprcontext = 0x2d557b0}
(gdb) p *aggstate->perhash->hashfunctions
$69 = {fn_addr = 0x4c8a31 <hashtext>, fn_oid = 400, fn_nargs = 1, fn_strict = true, fn_retset = false, fn_stats = 2 '\002',
fn_extra = 0x0, fn_mcxt = 0x2d52310, fn_expr = 0x0}
DONE!
尚有不少细节需要整理
N/A
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。