第 16 课 PostgreSQL查询过程源码分析
时间:2019-05-23 13:10 来源:linux.it.net.cn 作者:IT
1. 查询基本流程
1.PLSQL语句,string
2.生成语法树,parseTree
3.生成查询树,queryTree
4.生成优化查询树列表,queryTreeList
5.生成计划语句树,planStmtTree
6.生成计划执行状态树,planStateTree
2. 各个结构切换源码分析
2.1 把用户输入的string的SQL语句转换成原始语法树(parseTree)。
例如:insert into test values(1, 'xxxx'); 是用户发过来的,由postgres服务进程通过网络获取到,如果调试的话可以从函数exec_simple_query()开始。SQL语句到语法树的转换是函数raw_parser()完成的,返回一个语法树list。
List *
raw_parser(const char *str)
{
core_yyscan_t yyscanner;
base_yy_extra_type yyextra;
int yyresult;
/* 初始化flex扫描器 */
yyscanner = scanner_init(str, &yyextra.core_yy_extra,
ScanKeywords, NumScanKeywords);
/* base_yylex() only needs this much initialization */
yyextra.have_lookahead = false;
/* 初始化bison解析器 */
parser_init(&yyextra);
/* 执行语法分析 */
yyresult = base_yyparse(yyscanner);
/* 解析完成后调用,在scanner_init()之后需要进行清理 */
scanner_finish(yyscanner);
if (yyresult) /* error */
return NIL;
/* 返回原始语法树列表, list里存储的类型是struct RawStmt */
return yyextra.parsetree;
}
返回的list其实体是struct RawStmt结构体, 在文件src\include\nodes\parsenodes.h中定义。在该文件中所有*Stmt结构体都是以NodeTag为基类,用来标识其节点类型。
typedef struct RawStmt
{
NodeTag type;
Node *stmt; /* raw parse tree */
int stmt_location; /* start location, or -1 if unknown */
int stmt_len; /* length in bytes; 0 means "rest of string" */
} RawStmt;
例如我们举例的insert语句的结构体如下, 他的type类型是T_InsertStmt,该属性用来在后面把语法树转化成查询树时使用,整个转换过程通过Node类型来指导执行逻辑。
typedef struct InsertStmt
{
NodeTag type;
RangeVar *relation; /* relation to insert into */
List *cols; /* optional: names of the target columns */
Node *selectStmt; /* the source SELECT/VALUES, or NULL */
OnConflictClause *onConflictClause; /* ON CONFLICT clause */
List *returningList; /* list of expressions to return */
WithClause *withClause; /* WITH clause */
OverridingKind override; /* OVERRIDING clause */
} InsertStmt;
2.2 原始语法树转换为查询树,queryTree
整个过程可以分成两个步骤,语法树到查询树和查询重写。
完成转换后生成查询树结构Query,如下。语法树转查询树的入口函数是transformTopLevelStmt(),该函数根据语法树,实施深度优先遍历所有节点。最后创建一个Query对象,整个转换过程就是填充Query结构中的各个成员变量,例如成员变量targetList,如果的我们语句是:”INSERT INTO p4(num, name) VALUES(nums[1], names[1])”, 其中nums、names都是数组,那么targetList保存的就是如何访问这两个数组对象的方式。
List cteList; / WITH list (of CommonTableExpr's) */
上面的是关于with as语句的相关信息,如果我们要增加一个新的语法特性,就需要在这里增加一个特性的相关属性。
typedef struct Query
{
NodeTag type;
CmdType commandType; /* select|insert|update|delete|utility */
QuerySource querySource; /* where did I come from? */
List *targetList; /* target list (of TargetEntry) */
List *cteList; /* WITH list (of CommonTableExpr's) */
后面省略
} Query;
查询重写过程
入口是pg_rewrite_query(query),参数就是上面返回的Query指针。具体实施是在QueryRewrite()->RewriteQuery()函数,返回一个QueryList。
if (query->commandType == CMD_UTILITY)
{
/* don't rewrite utilities, just dump 'em into result list */
querytree_list = list_make1(query);
}
else
{
/* rewrite regular queries */
querytree_list = QueryRewrite(query);
}
2.3 查询树转计划语句树,planStmtTree
循环查询树List,调用pg_plan_query()把查询树转换成PlannedStmt(数据结构往下看)。
/*
* 为已经重写的查询树列表生成计划树。
* 正常可优化的语句在结果列表中生成PlannedStmt。实用语句仅由它们的语句节点表示。
* 所有的语句解析完成,return stmt_list。
*/
List *
pg_plan_queries(List *querytrees, int cursorOptions, ParamListInfo boundParams)
{
List *stmt_list = NIL;
ListCell *query_list;
foreach(query_list, querytrees)
{
Query *query = lfirst_node(Query, query_list);
PlannedStmt *stmt;
if (query->commandType == CMD_UTILITY)
{
/* Utility commands require no planning. */
stmt = makeNode(PlannedStmt);
stmt->commandType = CMD_UTILITY;
stmt->canSetTag = query->canSetTag;
stmt->utilityStmt = query->utilityStmt;
stmt->stmt_location = query->stmt_location;
stmt->stmt_len = query->stmt_len;
}
else
{
stmt = pg_plan_query(query, cursorOptions, boundParams);
}
stmt_list = lappend(stmt_list, stmt);
}
return stmt_list;
}
typedef struct PlannedStmt
typedef struct PlannedStmt
{
NodeTag type;
CmdType commandType; /* select|insert|update|delete|utility */
uint64 queryId; /* query identifier (copied from Query) */
bool hasReturning; /* is it insert|update|delete RETURNING? */
bool hasModifyingCTE; /* has insert|update|delete in WITH? */
bool canSetTag; /* do I set the command result tag? */
bool transientPlan; /* redo plan when TransactionXmin changes? */
bool dependsOnRole; /* is plan specific to current role? */
bool parallelModeNeeded; /* parallel mode required to execute? */
int jitFlags; /* which forms of JIT should be performed */
struct Plan *planTree; /* tree of Plan nodes */
List *rtable; /* list of RangeTblEntry nodes */
/* rtable indexes of target relations for INSERT/UPDATE/DELETE */
List *resultRelations; /* integer list of RT indexes, or NIL */
/*
* rtable indexes of partitioned table roots that are UPDATE/DELETE
* targets; needed for trigger firing.
*/
List *rootResultRelations;
List *subplans; /* Plan trees for SubPlan expressions; note
* that some could be NULL */
Bitmapset *rewindPlanIDs; /* indices of subplans that require REWIND */
List *rowMarks; /* a list of PlanRowMark's */
List *relationOids; /* OIDs of relations the plan depends on */
List *invalItems; /* other dependencies, as PlanInvalItems */
List *paramExecTypes; /* type OIDs for PARAM_EXEC Params */
Node *utilityStmt; /* non-null if this is utility stmt */
/* statement location in source string (copied from Query) */
int stmt_location; /* start location, or -1 if unknown */
int stmt_len; /* length in bytes; 0 means "rest of string" */
} PlannedStmt;
2.4 计划语句树转计划执行状态树,planStateTree
转换后的结果放到queryDesc->estate->es_subplanstates中。在执行器执行过程中,便是依据es_subplanstates链表和树形结构进行执行。
static void
InitPlan(QueryDesc *queryDesc, int eflags)
{
CmdType operation = queryDesc->operation;
PlannedStmt *plannedstmt = queryDesc->plannedstmt;
Plan *plan = plannedstmt->planTree;
List *rangeTable = plannedstmt->rtable;
EState *estate = queryDesc->estate;
PlanState *planstate;
TupleDesc tupType;
ListCell *l;
int i;
// 把所有子查询生成PlanState,放到estate->es_subplanstates列表中
foreach(l, plannedstmt->subplans)
{
Plan *subplan = (Plan *) lfirst(l);
PlanState *subplanstate;
int sp_eflags;
/*
* A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If
* it is a parameterless subplan (not initplan), we suggest that it be
* prepared to handle REWIND efficiently; otherwise there is no need.
*/
sp_eflags = eflags
& (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA);
if (bms_is_member(i, plannedstmt->rewindPlanIDs))
sp_eflags |= EXEC_FLAG_REWIND;
subplanstate = ExecInitNode(subplan, estate, sp_eflags);
estate->es_subplanstates = lappend(estate->es_subplanstates,
subplanstate);
i++;
}
(责任编辑:IT)
1. 查询基本流程 1.PLSQL语句,string 2.生成语法树,parseTree 3.生成查询树,queryTree 4.生成优化查询树列表,queryTreeList 5.生成计划语句树,planStmtTree 6.生成计划执行状态树,planStateTree 2. 各个结构切换源码分析 2.1 把用户输入的string的SQL语句转换成原始语法树(parseTree)。 例如:insert into test values(1, 'xxxx'); 是用户发过来的,由postgres服务进程通过网络获取到,如果调试的话可以从函数exec_simple_query()开始。SQL语句到语法树的转换是函数raw_parser()完成的,返回一个语法树list。 List * raw_parser(const char *str) { core_yyscan_t yyscanner; base_yy_extra_type yyextra; int yyresult; /* 初始化flex扫描器 */ yyscanner = scanner_init(str, &yyextra.core_yy_extra, ScanKeywords, NumScanKeywords); /* base_yylex() only needs this much initialization */ yyextra.have_lookahead = false; /* 初始化bison解析器 */ parser_init(&yyextra); /* 执行语法分析 */ yyresult = base_yyparse(yyscanner); /* 解析完成后调用,在scanner_init()之后需要进行清理 */ scanner_finish(yyscanner); if (yyresult) /* error */ return NIL; /* 返回原始语法树列表, list里存储的类型是struct RawStmt */ return yyextra.parsetree; } 返回的list其实体是struct RawStmt结构体, 在文件src\include\nodes\parsenodes.h中定义。在该文件中所有*Stmt结构体都是以NodeTag为基类,用来标识其节点类型。 typedef struct RawStmt { NodeTag type; Node *stmt; /* raw parse tree */ int stmt_location; /* start location, or -1 if unknown */ int stmt_len; /* length in bytes; 0 means "rest of string" */ } RawStmt; 例如我们举例的insert语句的结构体如下, 他的type类型是T_InsertStmt,该属性用来在后面把语法树转化成查询树时使用,整个转换过程通过Node类型来指导执行逻辑。 typedef struct InsertStmt { NodeTag type; RangeVar *relation; /* relation to insert into */ List *cols; /* optional: names of the target columns */ Node *selectStmt; /* the source SELECT/VALUES, or NULL */ OnConflictClause *onConflictClause; /* ON CONFLICT clause */ List *returningList; /* list of expressions to return */ WithClause *withClause; /* WITH clause */ OverridingKind override; /* OVERRIDING clause */ } InsertStmt; 2.2 原始语法树转换为查询树,queryTree 整个过程可以分成两个步骤,语法树到查询树和查询重写。 完成转换后生成查询树结构Query,如下。语法树转查询树的入口函数是transformTopLevelStmt(),该函数根据语法树,实施深度优先遍历所有节点。最后创建一个Query对象,整个转换过程就是填充Query结构中的各个成员变量,例如成员变量targetList,如果的我们语句是:”INSERT INTO p4(num, name) VALUES(nums[1], names[1])”, 其中nums、names都是数组,那么targetList保存的就是如何访问这两个数组对象的方式。 List cteList; / WITH list (of CommonTableExpr's) */ 上面的是关于with as语句的相关信息,如果我们要增加一个新的语法特性,就需要在这里增加一个特性的相关属性。 typedef struct Query { NodeTag type; CmdType commandType; /* select|insert|update|delete|utility */ QuerySource querySource; /* where did I come from? */ List *targetList; /* target list (of TargetEntry) */ List *cteList; /* WITH list (of CommonTableExpr's) */ 后面省略 } Query; 查询重写过程 入口是pg_rewrite_query(query),参数就是上面返回的Query指针。具体实施是在QueryRewrite()->RewriteQuery()函数,返回一个QueryList。 if (query->commandType == CMD_UTILITY) { /* don't rewrite utilities, just dump 'em into result list */ querytree_list = list_make1(query); } else { /* rewrite regular queries */ querytree_list = QueryRewrite(query); } 2.3 查询树转计划语句树,planStmtTree 循环查询树List,调用pg_plan_query()把查询树转换成PlannedStmt(数据结构往下看)。 /* * 为已经重写的查询树列表生成计划树。 * 正常可优化的语句在结果列表中生成PlannedStmt。实用语句仅由它们的语句节点表示。 * 所有的语句解析完成,return stmt_list。 */ List * pg_plan_queries(List *querytrees, int cursorOptions, ParamListInfo boundParams) { List *stmt_list = NIL; ListCell *query_list; foreach(query_list, querytrees) { Query *query = lfirst_node(Query, query_list); PlannedStmt *stmt; if (query->commandType == CMD_UTILITY) { /* Utility commands require no planning. */ stmt = makeNode(PlannedStmt); stmt->commandType = CMD_UTILITY; stmt->canSetTag = query->canSetTag; stmt->utilityStmt = query->utilityStmt; stmt->stmt_location = query->stmt_location; stmt->stmt_len = query->stmt_len; } else { stmt = pg_plan_query(query, cursorOptions, boundParams); } stmt_list = lappend(stmt_list, stmt); } return stmt_list; } typedef struct PlannedStmt typedef struct PlannedStmt { NodeTag type; CmdType commandType; /* select|insert|update|delete|utility */ uint64 queryId; /* query identifier (copied from Query) */ bool hasReturning; /* is it insert|update|delete RETURNING? */ bool hasModifyingCTE; /* has insert|update|delete in WITH? */ bool canSetTag; /* do I set the command result tag? */ bool transientPlan; /* redo plan when TransactionXmin changes? */ bool dependsOnRole; /* is plan specific to current role? */ bool parallelModeNeeded; /* parallel mode required to execute? */ int jitFlags; /* which forms of JIT should be performed */ struct Plan *planTree; /* tree of Plan nodes */ List *rtable; /* list of RangeTblEntry nodes */ /* rtable indexes of target relations for INSERT/UPDATE/DELETE */ List *resultRelations; /* integer list of RT indexes, or NIL */ /* * rtable indexes of partitioned table roots that are UPDATE/DELETE * targets; needed for trigger firing. */ List *rootResultRelations; List *subplans; /* Plan trees for SubPlan expressions; note * that some could be NULL */ Bitmapset *rewindPlanIDs; /* indices of subplans that require REWIND */ List *rowMarks; /* a list of PlanRowMark's */ List *relationOids; /* OIDs of relations the plan depends on */ List *invalItems; /* other dependencies, as PlanInvalItems */ List *paramExecTypes; /* type OIDs for PARAM_EXEC Params */ Node *utilityStmt; /* non-null if this is utility stmt */ /* statement location in source string (copied from Query) */ int stmt_location; /* start location, or -1 if unknown */ int stmt_len; /* length in bytes; 0 means "rest of string" */ } PlannedStmt; 2.4 计划语句树转计划执行状态树,planStateTree 转换后的结果放到queryDesc->estate->es_subplanstates中。在执行器执行过程中,便是依据es_subplanstates链表和树形结构进行执行。 static void InitPlan(QueryDesc *queryDesc, int eflags) { CmdType operation = queryDesc->operation; PlannedStmt *plannedstmt = queryDesc->plannedstmt; Plan *plan = plannedstmt->planTree; List *rangeTable = plannedstmt->rtable; EState *estate = queryDesc->estate; PlanState *planstate; TupleDesc tupType; ListCell *l; int i; // 把所有子查询生成PlanState,放到estate->es_subplanstates列表中 foreach(l, plannedstmt->subplans) { Plan *subplan = (Plan *) lfirst(l); PlanState *subplanstate; int sp_eflags; /* * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If * it is a parameterless subplan (not initplan), we suggest that it be * prepared to handle REWIND efficiently; otherwise there is no need. */ sp_eflags = eflags & (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA); if (bms_is_member(i, plannedstmt->rewindPlanIDs)) sp_eflags |= EXEC_FLAG_REWIND; subplanstate = ExecInitNode(subplan, estate, sp_eflags); estate->es_subplanstates = lappend(estate->es_subplanstates, subplanstate); i++; } (责任编辑:IT) |