美文网首页PostgreSQL程序猿阵线联盟-汇总各类技术干货
PostgreSQL 源码解读(38)- 查询语句#23(que

PostgreSQL 源码解读(38)- 查询语句#23(que

作者: EthanHe | 来源:发表于2018-09-10 17:09 被阅读8次

    上一小节已介绍了grouping_planner函数,该函数通过调用query_planner函数为扫描/连接部分生成最优的未排序和预排序(presorted)路径,本节主要介绍query_planner函数对简单SQL如SELECT 2+2;的处理逻辑。

    一、重要的数据结构

    RelOptInfo
    查询语句经过查询重写/表达式简化/外连接消除等处理后,查询树Query已完成规范化,查询树中的RangeTblEntry(RTE)数据结构已完成其历史使命,由此可进入逻辑优化处理,在此阶段使用RelOptInfo数据结构.

     typedef enum RelOptKind
     {
         RELOPT_BASEREL,//基本关系(如基表/子查询等)
         RELOPT_JOINREL,//连接产生的关系,要注意的是通过连接等方式产生的结果亦可以视为关系
         RELOPT_OTHER_MEMBER_REL,
         RELOPT_OTHER_JOINREL,
         RELOPT_UPPER_REL,//上层的关系
         RELOPT_OTHER_UPPER_REL,
         RELOPT_DEADREL
     } RelOptKind;
     
     /*
      * Is the given relation a simple relation i.e a base or "other" member
      * relation?
      */
     #define IS_SIMPLE_REL(rel) \
         ((rel)->reloptkind == RELOPT_BASEREL || \
          (rel)->reloptkind == RELOPT_OTHER_MEMBER_REL)
     
     /* Is the given relation a join relation? */
     #define IS_JOIN_REL(rel)    \
         ((rel)->reloptkind == RELOPT_JOINREL || \
          (rel)->reloptkind == RELOPT_OTHER_JOINREL)
     
     /* Is the given relation an upper relation? */
     #define IS_UPPER_REL(rel)   \
         ((rel)->reloptkind == RELOPT_UPPER_REL || \
          (rel)->reloptkind == RELOPT_OTHER_UPPER_REL)
     
     /* Is the given relation an "other" relation? */
     #define IS_OTHER_REL(rel) \
         ((rel)->reloptkind == RELOPT_OTHER_MEMBER_REL || \
          (rel)->reloptkind == RELOPT_OTHER_JOINREL || \
          (rel)->reloptkind == RELOPT_OTHER_UPPER_REL)
     
     typedef struct RelOptInfo
     {
         NodeTag     type;//节点标识
     
         RelOptKind  reloptkind;//RelOpt类型
     
         /* all relations included in this RelOptInfo */
         Relids      relids;         /*Relids(rtindex)集合 set of base relids (rangetable indexes) */
     
         /* size estimates generated by planner */
         double      rows;           /*结果元组的估算数量 estimated number of result tuples */
     
         /* per-relation planner control flags */
         bool        consider_startup;   /*是否考虑启动成本?是,需要保留启动成本低的路径 keep cheap-startup-cost paths? */
         bool        consider_param_startup; /*是否考虑参数化?的路径 ditto, for parameterized paths? */
         bool        consider_parallel;  /*是否考虑并行处理路径 consider parallel paths? */
     
         /* default result targetlist for Paths scanning this relation */
         struct PathTarget *reltarget;   /*扫描该Relation时默认的结果 list of Vars/Exprs, cost, width */
     
         /* materialization information */
         List       *pathlist;       /*访问路径链表 Path structures */
         List       *ppilist;        /*路径链表中使用参数化路径进行 ParamPathInfos used in pathlist */
         List       *partial_pathlist;   /* partial Paths */
         struct Path *cheapest_startup_path;//代价最低的启动路径
         struct Path *cheapest_total_path;//代价最低的整体路径
         struct Path *cheapest_unique_path;//代价最低的获取唯一值的路径
         List       *cheapest_parameterized_paths;//代价最低的参数化?路径链表
     
         /* parameterization information needed for both base rels and join rels */
         /* (see also lateral_vars and lateral_referencers) */
         Relids      direct_lateral_relids;  /*使用lateral语法,需依赖的Relids rels directly laterally referenced */
         Relids      lateral_relids; /* minimum parameterization of rel */
     
         /* information about a base rel (not set for join rels!) */
       //reloptkind=RELOPT_BASEREL时使用的数据结构
         Index       relid;      /* Relation ID */
         Oid         reltablespace;  /* 表空间 containing tablespace */
         RTEKind     rtekind;        /* 基表?子查询?还是函数等等?RELATION, SUBQUERY, FUNCTION, etc */
         AttrNumber  min_attr;       /* 最小的属性编号 smallest attrno of rel (often <0) */
         AttrNumber  max_attr;       /* 最大的属性编号 largest attrno of rel */
         Relids     *attr_needed;    /* 数组 array indexed [min_attr .. max_attr] */
         int32      *attr_widths;    /* 属性宽度 array indexed [min_attr .. max_attr] */
         List       *lateral_vars;   /* 关系依赖的Vars/PHVs LATERAL Vars and PHVs referenced by rel */
         Relids      lateral_referencers;    /*依赖该关系的Relids rels that reference me laterally */
         List       *indexlist;      /* 该关系的IndexOptInfo链表 list of IndexOptInfo */
         List       *statlist;       /* 统计信息链表 list of StatisticExtInfo */
         BlockNumber pages;          /* 块数 size estimates derived from pg_class */
         double      tuples;     /* 元组数 */
         double      allvisfrac;   /* ? */
         PlannerInfo *subroot;       /* 如为子查询,存储子查询的root if subquery */
         List       *subplan_params; /* 如为子查询,存储子查询的参数 if subquery */
         int         rel_parallel_workers;   /* 并行执行,需要多少个workers? wanted number of parallel workers */
     
         /* Information about foreign tables and foreign joins */
       //FWD相关信息
         Oid         serverid;       /* identifies server for the table or join */
         Oid         userid;         /* identifies user to check access as */
         bool        useridiscurrent;    /* join is only valid for current user */
         /* use "struct FdwRoutine" to avoid including fdwapi.h here */
         struct FdwRoutine *fdwroutine;
         void       *fdw_private;
     
         /* cache space for remembering if we have proven this relation unique */
       //已知的,可保证唯一的Relids链表
         List       *unique_for_rels;    /* known unique for these other relid
                                          * set(s) */
         List       *non_unique_for_rels;    /* 已知的,不唯一的Relids链表 known not unique for these set(s) */
     
         /* used by various scans and joins: */
         List       *baserestrictinfo;   /* 如为基本关系,存储约束条件 RestrictInfo structures (if base rel) */
         QualCost    baserestrictcost;   /* 解析约束表达式的成本? cost of evaluating the above */
         Index       baserestrict_min_security;  /* 最低安全等级 min security_level found in
                                                  * baserestrictinfo */
         List       *joininfo;       /* 连接语句的约束条件信息 RestrictInfo structures for join clauses
                                      * involving this rel */
         bool        has_eclass_joins;   /* 是否存在等价类连接? T means joininfo is incomplete */
     
         /* used by partitionwise joins: */
         bool        consider_partitionwise_join;    /* 分区? consider partitionwise
                                                      * join paths? (if
                                                      * partitioned rel) */
         Relids      top_parent_relids;  /* Relids of topmost parents (if "other"
                                          * rel) */
     
         /* used for partitioned relations */
       //分区表使用
         PartitionScheme part_scheme;    /* 分区的schema Partitioning scheme. */
         int         nparts;         /* 分区数 number of partitions */
         struct PartitionBoundInfoData *boundinfo;   /* 分区边界信息 Partition bounds */
         List       *partition_qual; /* 分区约束 partition constraint */
         struct RelOptInfo **part_rels;  /* 分区的RelOptInfo数组 Array of RelOptInfos of partitions,
                                          * stored in the same order of bounds */
         List      **partexprs;      /* 非空分区键表达式 Non-nullable partition key expressions. */
         List      **nullable_partexprs; /* 可为空的分区键表达式 Nullable partition key expressions. */
         List       *partitioned_child_rels; /* RT Indexes链表 List of RT indexes. */
     } RelOptInfo;
    
    

    PathCostComparison

     typedef enum
     {
         COSTS_EQUAL,                /* 近似相等 path costs are fuzzily equal */
         COSTS_BETTER1,              /* 第一个路径成本较低 first path is cheaper than second */
         COSTS_BETTER2,              /* 第二个相对较低 second path is cheaper than first */
         COSTS_DIFFERENT             /* 不管是哪个路径,成本上都不占优势 neither path dominates the other on cost */
     } PathCostComparison;
     
    

    ResultPath

     /*
      * ResultPath represents use of a Result plan node to compute a variable-free
      * targetlist with no underlying tables (a "SELECT expressions" query).
      * The query could have a WHERE clause, too, represented by "quals".
      *
      * Note that quals is a list of bare clauses, not RestrictInfos.
      */
     typedef struct ResultPath //表示无基础表的结果计划节点
     {
         Path        path;//扫描路径
         List       *quals;//where语句表达式,bare clauses, not RestrictInfos
     } ResultPath;
    

    二、源码解读

     /*
      * query_planner
      *    Generate a path (that is, a simplified plan) for a basic query,
      *    which may involve joins but not any fancier features.
      *
      * 为一个基本的查询(可能涉及连接)生成访问路径(也可以视为一个简化的计划).
      *
      * Since query_planner does not handle the toplevel processing (grouping,
      * sorting, etc) it cannot select the best path by itself.  Instead, it
      * returns the RelOptInfo for the top level of joining, and the caller
      * (grouping_planner) can choose among the surviving paths for the rel.
      *
      * query_planner不会处理顶层的处理过程(如最后的分组/排序等操作),因此,不能选择最优的访问路径
      * 该函数会返回RelOptInfo给最高层的连接,grouping_planner可以在剩下的路径中进行选择
      *
      * root describes the query to plan
      * tlist is the target list the query should produce
      *      (this is NOT necessarily root->parse->targetList!)
      * qp_callback is a function to compute query_pathkeys once it's safe to do so
      * qp_extra is optional extra data to pass to qp_callback
      *
      * root是计划信息/tlist是投影列
      * qp_callback是计算query_pathkeys的函数/qp_extra是传递给qp_callback的函数
      *
      * Note: the PlannerInfo node also includes a query_pathkeys field, which
      * tells query_planner the sort order that is desired in the final output
      * plan.  This value is *not* available at call time, but is computed by
      * qp_callback once we have completed merging the query's equivalence classes.
      * (We cannot construct canonical pathkeys until that's done.)
      */
     RelOptInfo *
     query_planner(PlannerInfo *root, List *tlist,
                   query_pathkeys_callback qp_callback, void *qp_extra)
     {
         Query      *parse = root->parse;//查询树
         List       *joinlist;
         RelOptInfo *final_rel;//结果
         Index       rti;//RTE的index
         double      total_pages;//总pages数
     
         /*
          * If the query has an empty join tree, then it's something easy like
          * "SELECT 2+2;" or "INSERT ... VALUES()".  Fall through quickly.
          */
         if (parse->jointree->fromlist == NIL)//简单SQL,无FROM/WHERE语句
         {
             /* We need a dummy joinrel to describe the empty set of baserels */
             final_rel = build_empty_join_rel(root);//创建返回结果
     
             /*
              * If query allows parallelism in general, check whether the quals are
              * parallel-restricted.  (We need not check final_rel->reltarget
              * because it's empty at this point.  Anything parallel-restricted in
              * the query tlist will be dealt with later.)
              */
             if (root->glob->parallelModeOK)//并行模式?
                 final_rel->consider_parallel =
                     is_parallel_safe(root, parse->jointree->quals);
     
             /* The only path for it is a trivial Result path */
             add_path(final_rel, (Path *)
                      create_result_path(root, final_rel,
                                         final_rel->reltarget,
                                         (List *) parse->jointree->quals));//添加访问路径
     
             /* Select cheapest path (pretty easy in this case...) */
             set_cheapest(final_rel);//选择最优的访问路径
     
             /*
              * We still are required to call qp_callback, in case it's something
              * like "SELECT 2+2 ORDER BY 1".
              */
             root->canon_pathkeys = NIL;
             (*qp_callback) (root, qp_extra);//回调函数
     
             return final_rel;//返回
         }
     
         //其他代码
         ...
     }
    

    add_path/set_cheapest
    后续再行介绍

    create_result_path

     /*
      * create_result_path
      *    Creates a path representing a Result-and-nothing-else plan.
      *
      * This is only used for degenerate cases, such as a query with an empty
      * jointree.
      */
     ResultPath *
     create_result_path(PlannerInfo *root, RelOptInfo *rel,
                        PathTarget *target, List *resconstantqual)
     {
         ResultPath *pathnode = makeNode(ResultPath);//结果
     
         pathnode->path.pathtype = T_Result;//扫描路径类型
         pathnode->path.parent = rel;//路径的partentbuild_empty_join_rel
         pathnode->path.pathtarget = target;//目标列
         pathnode->path.param_info = NULL;   /* there are no other rels... */
         pathnode->path.parallel_aware = false;
         pathnode->path.parallel_safe = rel->consider_parallel;
         pathnode->path.parallel_workers = 0;//并行workers数目,设置为0
         pathnode->path.pathkeys = NIL;//
         pathnode->quals = resconstantqual;//表达式
     
         /* Hardly worth defining a cost_result() function ... just do it */
         pathnode->path.rows = 1;//行数为1
         pathnode->path.startup_cost = target->cost.startup;
         pathnode->path.total_cost = target->cost.startup +
             cpu_tuple_cost + target->cost.per_tuple;
     
         /*
          * Add cost of qual, if any --- but we ignore its selectivity, since our
          * rowcount estimate should be 1 no matter what the qual is.
          */
         if (resconstantqual)
         {
             QualCost    qual_cost;
     
             cost_qual_eval(&qual_cost, resconstantqual, root);
             /* resconstantqual is evaluated once at startup */
             pathnode->path.startup_cost += qual_cost.startup + qual_cost.per_tuple;
             pathnode->path.total_cost += qual_cost.startup + qual_cost.per_tuple;
         }
     
         return pathnode;
     }
    

    build_empty_join_rel

    /*
      * build_empty_join_rel
      *      Build a dummy join relation describing an empty set of base rels.
      *
      * This is used for queries with empty FROM clauses, such as "SELECT 2+2" or
      * "INSERT INTO foo VALUES(...)".  We don't try very hard to make the empty
      * joinrel completely valid, since no real planning will be done with it ---
      * we just need it to carry a simple Result path out of query_planner().
      */
     RelOptInfo *
     build_empty_join_rel(PlannerInfo *root)
     {
         RelOptInfo *joinrel;
     
         /* The dummy join relation should be the only one ... */
         Assert(root->join_rel_list == NIL);
     
         joinrel = makeNode(RelOptInfo);
         joinrel->reloptkind = RELOPT_JOINREL;
         joinrel->relids = NULL;     /* empty set */
         joinrel->rows = 1;          /* we produce one row for such cases */
         joinrel->rtekind = RTE_JOIN;
         joinrel->reltarget = create_empty_pathtarget();
     
         root->join_rel_list = lappend(root->join_rel_list, joinrel);
     
         return joinrel;
     }
    

    三、跟踪分析

    (gdb) b query_planner
    Breakpoint 1 at 0x76942c: file planmain.c, line 57.
    (gdb) c
    Continuing.
    
    Breakpoint 1, query_planner (root=0x275c878, tlist=0x277fd78, qp_callback=0x76e97d <standard_qp_callback>, 
        qp_extra=0x7ffdd435d490) at planmain.c:57
    57    Query    *parse = root->parse;
    (gdb) n
    67    if (parse->jointree->fromlist == NIL)
    (gdb) 
    70      final_rel = build_empty_join_rel(root);
    (gdb) 
    78      if (root->glob->parallelModeOK)
    #创建的空RELOPT_JOINREL
    (gdb) p *final_rel
    $4 = {type = T_RelOptInfo, reloptkind = RELOPT_JOINREL, relids = 0x0, rows = 1, consider_startup = false, 
      consider_param_startup = false, consider_parallel = false, reltarget = 0x277fda8, pathlist = 0x0, ppilist = 0x0, 
      partial_pathlist = 0x0, cheapest_startup_path = 0x0, cheapest_total_path = 0x0, cheapest_unique_path = 0x0, 
      cheapest_parameterized_paths = 0x0, direct_lateral_relids = 0x0, lateral_relids = 0x0, relid = 0, reltablespace = 0, 
      rtekind = RTE_JOIN, min_attr = 0, max_attr = 0, attr_needed = 0x0, attr_widths = 0x0, lateral_vars = 0x0, 
      lateral_referencers = 0x0, indexlist = 0x0, statlist = 0x0, pages = 0, tuples = 0, allvisfrac = 0, subroot = 0x0, 
      subplan_params = 0x0, rel_parallel_workers = 0, serverid = 0, userid = 0, useridiscurrent = false, fdwroutine = 0x0, 
      fdw_private = 0x0, unique_for_rels = 0x0, non_unique_for_rels = 0x0, baserestrictinfo = 0x0, baserestrictcost = {
        startup = 0, per_tuple = 0}, baserestrict_min_security = 0, joininfo = 0x0, has_eclass_joins = false, 
      top_parent_relids = 0x0, part_scheme = 0x0, nparts = 0, boundinfo = 0x0, partition_qual = 0x0, part_rels = 0x0, 
      partexprs = 0x0, nullable_partexprs = 0x0, partitioned_child_rels = 0x0}
    ...
    (gdb) step
    add_path (parent_rel=0x275cc88, new_path=0x275c498) at pathnode.c:424
    424   bool    accept_new = true;  /* unless we find a superior old path */
    #创建的path(ResultPath)
    (gdb) p *new_path
    $6 = {type = T_ResultPath, pathtype = T_Result, parent = 0x275cc88, pathtarget = 0x277fda8, param_info = 0x0, 
      parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 1, startup_cost = 0, total_cost = 0.01, 
      pathkeys = 0x0}
    (gdb) finish
    Run till exit from #0  add_path (parent_rel=0x275cc88, new_path=0x275c498) at pathnode.c:425
    query_planner (root=0x275c878, tlist=0x277fd78, qp_callback=0x76e97d <standard_qp_callback>, qp_extra=0x7ffdd435d490)
        at planmain.c:89
    89      set_cheapest(final_rel);
    ...
    98      return final_rel;
    (gdb) 
    267 }
    #返回值
    (gdb) p *final_rel
    $8 = {type = T_RelOptInfo, reloptkind = RELOPT_JOINREL, relids = 0x0, rows = 1, consider_startup = false, 
      consider_param_startup = false, consider_parallel = true, reltarget = 0x277fda8, pathlist = 0x277fe68, ppilist = 0x0, 
      partial_pathlist = 0x0, cheapest_startup_path = 0x275c498, cheapest_total_path = 0x275c498, cheapest_unique_path = 0x0, 
      cheapest_parameterized_paths = 0x277feb8, direct_lateral_relids = 0x0, lateral_relids = 0x0, relid = 0, 
      reltablespace = 0, rtekind = RTE_JOIN, min_attr = 0, max_attr = 0, attr_needed = 0x0, attr_widths = 0x0, 
      lateral_vars = 0x0, lateral_referencers = 0x0, indexlist = 0x0, statlist = 0x0, pages = 0, tuples = 0, allvisfrac = 0, 
      subroot = 0x0, subplan_params = 0x0, rel_parallel_workers = 0, serverid = 0, userid = 0, useridiscurrent = false, 
      fdwroutine = 0x0, fdw_private = 0x0, unique_for_rels = 0x0, non_unique_for_rels = 0x0, baserestrictinfo = 0x0, 
      baserestrictcost = {startup = 0, per_tuple = 0}, baserestrict_min_security = 0, joininfo = 0x0, has_eclass_joins = false, 
      top_parent_relids = 0x0, part_scheme = 0x0, nparts = 0, boundinfo = 0x0, partition_qual = 0x0, part_rels = 0x0, 
      partexprs = 0x0, nullable_partexprs = 0x0, partitioned_child_rels = 0x0}
    (gdb) p *final_rel->cheapest_total_path
    $9 = {type = T_ResultPath, pathtype = T_Result, parent = 0x275cc88, pathtarget = 0x277fda8, param_info = 0x0, 
      parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 1, startup_cost = 0, total_cost = 0.01, 
      pathkeys = 0x0}
    (gdb) 
    #DONE!
    

    四、参考资料

    planmain.c

    相关文章

      网友评论

        本文标题:PostgreSQL 源码解读(38)- 查询语句#23(que

        本文链接:https://www.haomeiwen.com/subject/djasgftx.html