> 数据库 > PostgreSQL >

第 16 课 PostgreSQL查询过程源码分析

1. 查询基本流程
1.PLSQL语句,string
2.生成语法树,parseTree
3.生成查询树,queryTree
4.生成优化查询树列表,queryTreeList
5.生成计划语句树,planStmtTree
6.生成计划执行状态树,planStateTree

2. 各个结构切换源码分析
2.1 把用户输入的string的SQL语句转换成原始语法树(parseTree)。

例如:insert into test values(1, 'xxxx'); 是用户发过来的,由postgres服务进程通过网络获取到,如果调试的话可以从函数exec_simple_query()开始。SQL语句到语法树的转换是函数raw_parser()完成的,返回一个语法树list。

List *
raw_parser(const char *str)
{
    core_yyscan_t yyscanner;
    base_yy_extra_type yyextra;
    int         yyresult;

    /* 初始化flex扫描器 */
    yyscanner = scanner_init(str, &yyextra.core_yy_extra,
                             ScanKeywords, NumScanKeywords);

    /* base_yylex() only needs this much initialization */
    yyextra.have_lookahead = false;

    /* 初始化bison解析器 */
    parser_init(&yyextra);

    /* 执行语法分析 */
    yyresult = base_yyparse(yyscanner);

    /* 解析完成后调用,在scanner_init()之后需要进行清理 */
    scanner_finish(yyscanner);

    if (yyresult)               /* error */
        return NIL;
       
    /* 返回原始语法树列表, list里存储的类型是struct RawStmt */
    return yyextra.parsetree;
}

返回的list其实体是struct RawStmt结构体, 在文件src\include\nodes\parsenodes.h中定义。在该文件中所有*Stmt结构体都是以NodeTag为基类,用来标识其节点类型。
typedef struct RawStmt
{
    NodeTag     type;
    Node       *stmt;           /* raw parse tree */
    int         stmt_location;  /* start location, or -1 if unknown */
    int         stmt_len;       /* length in bytes; 0 means "rest of string" */
} RawStmt;

例如我们举例的insert语句的结构体如下, 他的type类型是T_InsertStmt,该属性用来在后面把语法树转化成查询树时使用,整个转换过程通过Node类型来指导执行逻辑。
typedef struct InsertStmt
{
    NodeTag     type;
    RangeVar   *relation;       /* relation to insert into */
    List       *cols;           /* optional: names of the target columns */
    Node       *selectStmt;     /* the source SELECT/VALUES, or NULL */
    OnConflictClause *onConflictClause; /* ON CONFLICT clause */
    List       *returningList;  /* list of expressions to return */
    WithClause *withClause;     /* WITH clause */
    OverridingKind override;    /* OVERRIDING clause */
} InsertStmt;

2.2 原始语法树转换为查询树,queryTree

整个过程可以分成两个步骤,语法树到查询树和查询重写。


完成转换后生成查询树结构Query,如下。语法树转查询树的入口函数是transformTopLevelStmt(),该函数根据语法树,实施深度优先遍历所有节点。最后创建一个Query对象,整个转换过程就是填充Query结构中的各个成员变量,例如成员变量targetList,如果的我们语句是:”INSERT INTO p4(num, name) VALUES(nums[1], names[1])”, 其中nums、names都是数组,那么targetList保存的就是如何访问这两个数组对象的方式。
List       cteList;        / WITH list (of CommonTableExpr's) */
上面的是关于with as语句的相关信息,如果我们要增加一个新的语法特性,就需要在这里增加一个特性的相关属性。

typedef struct Query
{
    NodeTag     type;

    CmdType     commandType;    /* select|insert|update|delete|utility */

    QuerySource querySource;    /* where did I come from? */
       
        List       *targetList;     /* target list (of TargetEntry) */
        List       *cteList;        /* WITH list (of CommonTableExpr's) */
       后面省略
} Query;


查询重写过程


入口是pg_rewrite_query(query),参数就是上面返回的Query指针。具体实施是在QueryRewrite()->RewriteQuery()函数,返回一个QueryList。

    if (query->commandType == CMD_UTILITY)
    {
        /* don't rewrite utilities, just dump 'em into result list */
        querytree_list = list_make1(query);
    }
    else
    {
        /* rewrite regular queries */
        querytree_list = QueryRewrite(query);
    }

2.3 查询树转计划语句树,planStmtTree
循环查询树List,调用pg_plan_query()把查询树转换成PlannedStmt(数据结构往下看)。
/*
* 为已经重写的查询树列表生成计划树。
* 正常可优化的语句在结果列表中生成PlannedStmt。实用语句仅由它们的语句节点表示。
* 所有的语句解析完成,return stmt_list。
*/
List *
pg_plan_queries(List *querytrees, int cursorOptions, ParamListInfo boundParams)
{
    List       *stmt_list = NIL;
    ListCell   *query_list;

    foreach(query_list, querytrees)
    {
        Query      *query = lfirst_node(Query, query_list);
        PlannedStmt *stmt;

        if (query->commandType == CMD_UTILITY)
        {
            /* Utility commands require no planning. */
            stmt = makeNode(PlannedStmt);
            stmt->commandType = CMD_UTILITY;
            stmt->canSetTag = query->canSetTag;
            stmt->utilityStmt = query->utilityStmt;
            stmt->stmt_location = query->stmt_location;
            stmt->stmt_len = query->stmt_len;
        }
        else
        {
            stmt = pg_plan_query(query, cursorOptions, boundParams);
        }

        stmt_list = lappend(stmt_list, stmt);
    }

    return stmt_list;
}

typedef struct PlannedStmt
typedef struct PlannedStmt
{
    NodeTag     type;

    CmdType     commandType;    /* select|insert|update|delete|utility */

    uint64      queryId;        /* query identifier (copied from Query) */

    bool        hasReturning;   /* is it insert|update|delete RETURNING? */

    bool        hasModifyingCTE;    /* has insert|update|delete in WITH? */

    bool        canSetTag;      /* do I set the command result tag? */

    bool        transientPlan;  /* redo plan when TransactionXmin changes? */

    bool        dependsOnRole;  /* is plan specific to current role? */

    bool        parallelModeNeeded; /* parallel mode required to execute? */

    int         jitFlags;       /* which forms of JIT should be performed */

    struct Plan *planTree;      /* tree of Plan nodes */

    List       *rtable;         /* list of RangeTblEntry nodes */

    /* rtable indexes of target relations for INSERT/UPDATE/DELETE */
    List       *resultRelations;    /* integer list of RT indexes, or NIL */

    /*
     * rtable indexes of partitioned table roots that are UPDATE/DELETE
     * targets; needed for trigger firing.
     */
    List       *rootResultRelations;

    List       *subplans;       /* Plan trees for SubPlan expressions; note
                                 * that some could be NULL */

    Bitmapset  *rewindPlanIDs;  /* indices of subplans that require REWIND */

    List       *rowMarks;       /* a list of PlanRowMark's */

    List       *relationOids;   /* OIDs of relations the plan depends on */

    List       *invalItems;     /* other dependencies, as PlanInvalItems */

    List       *paramExecTypes; /* type OIDs for PARAM_EXEC Params */

    Node       *utilityStmt;    /* non-null if this is utility stmt */

    /* statement location in source string (copied from Query) */
    int         stmt_location;  /* start location, or -1 if unknown */
    int         stmt_len;       /* length in bytes; 0 means "rest of string" */
} PlannedStmt;

2.4 计划语句树转计划执行状态树,planStateTree
转换后的结果放到queryDesc->estate->es_subplanstates中。在执行器执行过程中,便是依据es_subplanstates链表和树形结构进行执行。
static void
InitPlan(QueryDesc *queryDesc, int eflags)
{
    CmdType     operation = queryDesc->operation;
    PlannedStmt *plannedstmt = queryDesc->plannedstmt;
    Plan       *plan = plannedstmt->planTree;
    List       *rangeTable = plannedstmt->rtable;
    EState     *estate = queryDesc->estate;
    PlanState  *planstate;
    TupleDesc   tupType;
    ListCell   *l;
    int         i;

    // 把所有子查询生成PlanState,放到estate->es_subplanstates列表中
    foreach(l, plannedstmt->subplans)
    {
        Plan       *subplan = (Plan *) lfirst(l);
        PlanState  *subplanstate;
        int         sp_eflags;

        /*
         * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If
         * it is a parameterless subplan (not initplan), we suggest that it be
         * prepared to handle REWIND efficiently; otherwise there is no need.
         */
        sp_eflags = eflags
            & (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA);
        if (bms_is_member(i, plannedstmt->rewindPlanIDs))
            sp_eflags |= EXEC_FLAG_REWIND;

        subplanstate = ExecInitNode(subplan, estate, sp_eflags);

        estate->es_subplanstates = lappend(estate->es_subplanstates,
                                           subplanstate);

        i++;
    }



(责任编辑:IT)