美文网首页PostgreSQL
PostgreSQL 源码解读(55)- 查询语句#40(mak

PostgreSQL 源码解读(55)- 查询语句#40(mak

作者: EthanHe | 来源:发表于2018-09-26 16:18 被阅读9次

    本节继续介绍make_one_rel函数中的set_base_rel_pathlists函数及其子函数。set_base_rel_pathlists函数的目的是为每一个base rel找出所有可用的访问路径(包括顺序扫描和所有可用的索引),每一个可用的路径都会添加到pathlist链表中。这一小节主要介绍索引扫描成本估算的第一部分(验证限制/约束条件是否与索引匹配)。

    一、数据结构

    RelOptInfo

     typedef struct RelOptInfo
     {
         NodeTag     type;//节点标识
     
         RelOptKind  reloptkind;//RelOpt类型
     
         /* all relations included in this RelOptInfo */
         Relids      relids;         /*Relids(rtindex)集合 set of base relids (rangetable indexes) */
     
         /* size estimates generated by planner */
         double      rows;           /*结果元组的估算数量 estimated number of result tuples */
     
         /* per-relation planner control flags */
         bool        consider_startup;   /*是否考虑启动成本?是,需要保留启动成本低的路径 keep cheap-startup-cost paths? */
         bool        consider_param_startup; /*是否考虑参数化?的路径 ditto, for parameterized paths? */
         bool        consider_parallel;  /*是否考虑并行处理路径 consider parallel paths? */
     
         /* default result targetlist for Paths scanning this relation */
         struct PathTarget *reltarget;   /*扫描该Relation时默认的结果 list of Vars/Exprs, cost, width */
     
         /* materialization information */
         List       *pathlist;       /*访问路径链表 Path structures */
         List       *ppilist;        /*路径链表中使用参数化路径进行 ParamPathInfos used in pathlist */
         List       *partial_pathlist;   /* partial Paths */
         struct Path *cheapest_startup_path;//代价最低的启动路径
         struct Path *cheapest_total_path;//代价最低的整体路径
         struct Path *cheapest_unique_path;//代价最低的获取唯一值的路径
         List       *cheapest_parameterized_paths;//代价最低的参数化路径链表
     
         /* parameterization information needed for both base rels and join rels */
         /* (see also lateral_vars and lateral_referencers) */
         Relids      direct_lateral_relids;  /*使用lateral语法,需依赖的Relids rels directly laterally referenced */
         Relids      lateral_relids; /* minimum parameterization of rel */
     
         /* information about a base rel (not set for join rels!) */
         //reloptkind=RELOPT_BASEREL时使用的数据结构
         Index       relid;          /* Relation ID */
         Oid         reltablespace;  /* 表空间 containing tablespace */
         RTEKind     rtekind;        /* 基表?子查询?还是函数等等?RELATION, SUBQUERY, FUNCTION, etc */
         AttrNumber  min_attr;       /* 最小的属性编号 smallest attrno of rel (often <0) */
         AttrNumber  max_attr;       /* 最大的属性编号 largest attrno of rel */
         Relids     *attr_needed;    /* 数组 array indexed [min_attr .. max_attr] */
         int32      *attr_widths;    /* 属性宽度 array indexed [min_attr .. max_attr] */
         List       *lateral_vars;   /* 关系依赖的Vars/PHVs LATERAL Vars and PHVs referenced by rel */
         Relids      lateral_referencers;    /*依赖该关系的Relids rels that reference me laterally */
         List       *indexlist;      /* 该关系的IndexOptInfo链表 list of IndexOptInfo */
         List       *statlist;       /* 统计信息链表 list of StatisticExtInfo */
         BlockNumber pages;          /* 块数 size estimates derived from pg_class */
         double      tuples;         /* 元组数 */
         double      allvisfrac;     /* ? */
         PlannerInfo *subroot;       /* 如为子查询,存储子查询的root if subquery */
         List       *subplan_params; /* 如为子查询,存储子查询的参数 if subquery */
         int         rel_parallel_workers;   /* 并行执行,需要多少个workers? wanted number of parallel workers */
     
         /* Information about foreign tables and foreign joins */
         //FDW相关信息
         Oid         serverid;       /* identifies server for the table or join */
         Oid         userid;         /* identifies user to check access as */
         bool        useridiscurrent;    /* join is only valid for current user */
         /* use "struct FdwRoutine" to avoid including fdwapi.h here */
         struct FdwRoutine *fdwroutine;
         void       *fdw_private;
     
         /* cache space for remembering if we have proven this relation unique */
         //已知的,可保证唯一元组返回的Relids链表
         List       *unique_for_rels;    /* known unique for these other relid
                                          * set(s) */
         List       *non_unique_for_rels;    /* 已知的,返回的数据不唯一的Relids链表 known not unique for these set(s) */
     
         /* used by various scans and joins: */
         List       *baserestrictinfo;   /* 如为基本关系,则存储约束条件 RestrictInfo structures (if base rel) */
         QualCost    baserestrictcost;   /* 解析约束表达式的成本? cost of evaluating the above */
         Index       baserestrict_min_security;  /* 最低安全等级 min security_level found in
                                                  * baserestrictinfo */
         List       *joininfo;       /* 连接语句的约束条件信息 RestrictInfo structures for join clauses
                                      * involving this rel */
         bool        has_eclass_joins;   /* 是否存在等价类连接? True意味着joininfo并不完整,,T means joininfo is incomplete */
     
         /* used by partitionwise joins: */
           //是否尝试partitionwise连接,这是PG 11的一个新特性.
         bool        consider_partitionwise_join;    /* consider partitionwise
                                                      * join paths? (if
                                                      * partitioned rel) */
         Relids      top_parent_relids;  /* Relids of topmost parents (if "other"
                                          * rel) */
     
         /* used for partitioned relations */
         //分区表使用
         PartitionScheme part_scheme;    /* 分区的schema Partitioning scheme. */
         int         nparts;         /* 分区数 number of partitions */
         struct PartitionBoundInfoData *boundinfo;   /* 分区边界信息 Partition bounds */
         List       *partition_qual; /* 分区约束 partition constraint */
         struct RelOptInfo **part_rels;  /* 分区的RelOptInfo数组 Array of RelOptInfos of partitions,
                                          * stored in the same order of bounds */
         List      **partexprs;      /* 非空分区键表达式 Non-nullable partition key expressions. */
         List      **nullable_partexprs; /* 可为空的分区键表达式 Nullable partition key expressions. */
         List       *partitioned_child_rels; /* RT Indexes链表 List of RT indexes. */
     } RelOptInfo;
    
    

    Cost相关
    注意:实际使用的参数值通过系统配置文件定义,而不是这里的常量定义!

    /*
      * The cost estimate produced by cost_qual_eval() includes both a one-time
      * (startup) cost, and a per-tuple cost.
      */
     typedef struct QualCost
     {
         Cost        startup;        /* 启动成本,one-time cost */
         Cost        per_tuple;      /* 每个元组的成本,per-evaluation cost */
     } QualCost;
     
    typedef double Cost; /* execution cost (in page-access units) */
    
     /* defaults for costsize.c's Cost parameters */
     /* NB: cost-estimation code should use the variables, not these constants! */
     /* 注意:实际值通过系统配置文件定义,而不是这里的常量定义! */
     /* If you change these, update backend/utils/misc/postgresql.sample.conf */
     #define DEFAULT_SEQ_PAGE_COST  1.0       //顺序扫描page的成本
     #define DEFAULT_RANDOM_PAGE_COST  4.0      //随机扫描page的成本
     #define DEFAULT_CPU_TUPLE_COST  0.01     //处理一个元组的CPU成本
     #define DEFAULT_CPU_INDEX_TUPLE_COST 0.005   //处理一个索引元组的CPU成本
     #define DEFAULT_CPU_OPERATOR_COST  0.0025    //执行一次操作或函数的CPU成本
     #define DEFAULT_PARALLEL_TUPLE_COST 0.1    //并行执行,从一个worker传输一个元组到另一个worker的成本
     #define DEFAULT_PARALLEL_SETUP_COST  1000.0  //构建并行执行环境的成本
     
     #define DEFAULT_EFFECTIVE_CACHE_SIZE  524288    /*先前已有介绍, measured in pages */
    
     double      seq_page_cost = DEFAULT_SEQ_PAGE_COST;
     double      random_page_cost = DEFAULT_RANDOM_PAGE_COST;
     double      cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
     double      cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
     double      cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
     double      parallel_tuple_cost = DEFAULT_PARALLEL_TUPLE_COST;
     double      parallel_setup_cost = DEFAULT_PARALLEL_SETUP_COST;
     
     int         effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
     
     Cost        disable_cost = 1.0e10;//1后面10个0,通过设置一个巨大的成本,让优化器自动放弃此路径
     
     int         max_parallel_workers_per_gather = 2;//每次gather使用的worker数
     
    

    IndexClauseSet
    用于收集匹配索引的的条件语句

     /* Data structure for collecting qual clauses that match an index */
     typedef struct
     {
         bool        nonempty;       /* True if lists are not all empty */
         /* Lists of RestrictInfos, one per index column */
         List       *indexclauses[INDEX_MAX_KEYS];
     } IndexClauseSet;
    

    二、源码解读

    set_base_rel_pathlists函数遍历RelOptInfo数组,为每一个Rel构造访问路径,先前已介绍了顺序扫描的成本估算,本节介绍索引扫描的成本估算(函数:create_index_paths),通过调用set_plain_rel_pathlist->create_index_paths函数实现.

     /*
      * set_plain_rel_pathlist
      *    Build access paths for a plain relation (no subquery, no inheritance)
      */
     static void
     set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
     {
         Relids      required_outer;
     
         //...
     
         /* 索引扫描,Consider index scans */
         create_index_paths(root, rel);
     
         /* TID扫描,Consider TID scans */
         create_tidscan_paths(root, rel);
     }
    

    create_index_paths
    create_index_paths函数生成Relation所有可能被选中的索引访问路径,详见源码注释.

     /*
      * create_index_paths()
      *    Generate all interesting index paths for the given relation.
      *    Candidate paths are added to the rel's pathlist (using add_path).
      *    生成Relation所有可能被选中的索引访问路径.
      *    Paths通过add_path方法加入到RelOptInfo的pathlist链表中.
      *
      * To be considered for an index scan, an index must match one or more
      * restriction clauses or join clauses from the query's qual condition,
      * or match the query's ORDER BY condition, or have a predicate that
      * matches the query's qual condition.
      * 使用索引扫描的前提是:1.索引必须匹配一个或多个限制条件或连接条件,或者
      * 2.匹配查询的ORDER BY排序条件,或者3.匹配查询条件的谓词(部分/条件索引)
      *
      * There are two basic kinds of index scans.  A "plain" index scan uses
      * only restriction clauses (possibly none at all) in its indexqual,
      * so it can be applied in any context.  A "parameterized" index scan uses
      * join clauses (plus restriction clauses, if available) in its indexqual.
      * When joining such a scan to one of the relations supplying the other
      * variables used in its indexqual, the parameterized scan must appear as
      * the inner relation of a nestloop join; it can't be used on the outer side,
      * nor in a merge or hash join.  In that context, values for the other rels'
      * attributes are available and fixed during any one scan of the indexpath.
      * 有两种基本的索引扫描类型,一种是"plain"索引扫描,只使用限制条件(或者什么都
      *  没有),这种扫描方法适用于任何场景.另外一种是"parameterized"扫描,使用连接条件
      *  (可能的话,加上限制条件)."parameterized"扫描只能出现在嵌套循环中的内关系中,
      *  因为参数由外关系提供.
      *
      * An IndexPath is generated and submitted to add_path() for each plain or
      * parameterized index scan this routine deems potentially interesting for
      * the current query.
      * IndexPath访问路径通过函数add_path生成并提交.
      *
      * 输入参数:
      * 'rel' is the relation for which we want to generate index paths
      * rel是待生成索引范围路径的关系
      *
      * Note: check_index_predicates() must have been run previously for this rel.
      * 注意:函数check_index_predicates在调用此函数前调用
      *
      * Note: in cases involving LATERAL references in the relation's tlist, it's
      * possible that rel->lateral_relids is nonempty.  Currently, we include
      * lateral_relids into the parameterization reported for each path, but don't
      * take it into account otherwise.  The fact that any such rels *must* be
      * available as parameter sources perhaps should influence our choices of
      * index quals ... but for now, it doesn't seem worth troubling over.
      * In particular, comments below about "unparameterized" paths should be read
      * as meaning "unparameterized so far as the indexquals are concerned".
      */
     void
     create_index_paths(PlannerInfo *root, RelOptInfo *rel)
     {
         List       *indexpaths;//索引访问路径链表
         List       *bitindexpaths;//
         List       *bitjoinpaths;
         List       *joinorclauses;
         IndexClauseSet rclauseset;
         IndexClauseSet jclauseset;
         IndexClauseSet eclauseset;
         ListCell   *lc;
     
         /* Skip the whole mess if no indexes */
         if (rel->indexlist == NIL)//不存在索引,退出
             return;
     
         /* Bitmap paths are collected and then dealt with at the end */
         bitindexpaths = bitjoinpaths = joinorclauses = NIL;//初始赋值
     
         /* Examine each index in turn */
         foreach(lc, rel->indexlist)//遍历索引链表
         {
             IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);//索引信息
     
             /* Protect limited-size array in IndexClauseSets */
             Assert(index->ncolumns <= INDEX_MAX_KEYS);
     
             /*
              * Ignore partial indexes that do not match the query.
              * (generate_bitmap_or_paths() might be able to do something with
              * them, but that's of no concern here.)
              */
             if (index->indpred != NIL && !index->predOK)//部分索引,而且不能使用,不使用此索引
                 continue;
     
             /*
              * Identify the restriction clauses that can match the index.
              * 验证索引和条件是否匹配
              */
             MemSet(&rclauseset, 0, sizeof(rclauseset));
             match_restriction_clauses_to_index(rel, index, &rclauseset);
     
             /*
              * Build index paths from the restriction clauses.  These will be
              * non-parameterized paths.  Plain paths go directly to add_path(),
              * bitmap paths are added to bitindexpaths to be handled below.
              * 通过限制条件创建非参数化索引访问路径.Plain访问路径通过函数add_path直接添加到RelOptInfo中
              * 位图访问路径添加到bitindexpaths链表中,后续再处理
              */
             get_index_paths(root, rel, index, &rclauseset,
                             &bitindexpaths);
     
             /*
              * Identify the join clauses that can match the index.  For the moment
              * we keep them separate from the restriction clauses.  Note that this
              * step finds only "loose" join clauses that have not been merged into
              * EquivalenceClasses.  Also, collect join OR clauses for later.
              * 验证索引是否与连接条件匹配(连接条件与限制条件相互独立).
              * 这一步只是发现未被合并到EC中的"loose"连接条件,在此之后会收集连接中的OR条件
              */
             MemSet(&jclauseset, 0, sizeof(jclauseset));
             match_join_clauses_to_index(root, rel, index,
                                         &jclauseset, &joinorclauses);
     
             /*
              * Look for EquivalenceClasses that can generate joinclauses matching
              * the index.
              * 通过EC(等价类)匹配索引,结果存储在eclauseset链表中
              */
             MemSet(&eclauseset, 0, sizeof(eclauseset));
             match_eclass_clauses_to_index(root, index,
                                           &eclauseset);
     
             /*
              * If we found any plain or eclass join clauses, build parameterized
              * index paths using them.
              * 如果存在plain或者eclass连接条件,创建参数化索引访问路径
              */
             if (jclauseset.nonempty || eclauseset.nonempty)
                 consider_index_join_clauses(root, rel, index,
                                             &rclauseset,
                                             &jclauseset,
                                             &eclauseset,
                                             &bitjoinpaths);
         }
     
         /*
          * Generate BitmapOrPaths for any suitable OR-clauses present in the
          * restriction list.  Add these to bitindexpaths.
          * 基于RelOptInfo中的限制条件生成BitmapOrPaths访问路径
          */
         indexpaths = generate_bitmap_or_paths(root, rel,
                                               rel->baserestrictinfo, NIL);
         bitindexpaths = list_concat(bitindexpaths, indexpaths);//合并到bitindexpaths链表中
     
         /*
          * Likewise, generate BitmapOrPaths for any suitable OR-clauses present in
          * the joinclause list.  Add these to bitjoinpaths.
          * 同样的,基于连接条件joinorclause中的OR语句生成BitmapOrPaths访问路径
          */
         indexpaths = generate_bitmap_or_paths(root, rel,
                                               joinorclauses, rel->baserestrictinfo);
         bitjoinpaths = list_concat(bitjoinpaths, indexpaths);//合并到bitjoinpaths链表中
     
         /*
          * If we found anything usable, generate a BitmapHeapPath for the most
          * promising combination of restriction bitmap index paths.  Note there
          * will be only one such path no matter how many indexes exist.  This
          * should be sufficient since there's basically only one figure of merit
          * (total cost) for such a path.
          */
         if (bitindexpaths != NIL)//存在位图索引访问路径
         {
             Path       *bitmapqual;//访问路径
             BitmapHeapPath *bpath;//BitmapHeapPath访问路径
     
             bitmapqual = choose_bitmap_and(root, rel, bitindexpaths);//位图表达式路径
             bpath = create_bitmap_heap_path(root, rel, bitmapqual,
                                             rel->lateral_relids, 1.0, 0);//BitmapHeapPath访问路径
             add_path(rel, (Path *) bpath);//添加到RelOptInfo中
     
             /* create a partial bitmap heap path */
             if (rel->consider_parallel && rel->lateral_relids == NULL)
                 create_partial_bitmap_paths(root, rel, bitmapqual);//创建并行访问路径
         }
     
         /*
          * Likewise, if we found anything usable, generate BitmapHeapPaths for the
          * most promising combinations of join bitmap index paths.  Our strategy
          * is to generate one such path for each distinct parameterization seen
          * among the available bitmap index paths.  This may look pretty
          * expensive, but usually there won't be very many distinct
          * parameterizations.  (This logic is quite similar to that in
          * consider_index_join_clauses, but we're working with whole paths not
          * individual clauses.)
          */
         if (bitjoinpaths != NIL)//bitjoinpaths位图连接访问路径
         {
             List       *path_outer;//依赖的外部Relids链表
             List       *all_path_outers;//依赖的外部路径Relids链表
             ListCell   *lc;//临时变量
     
             /*
              * path_outer holds the parameterization of each path in bitjoinpaths
              * (to save recalculating that several times), while all_path_outers
              * holds all distinct parameterization sets.
              */
             path_outer = all_path_outers = NIL;//初始化变量
             foreach(lc, bitjoinpaths)//遍历bitjoinpaths
             {
                 Path       *path = (Path *) lfirst(lc);//访问路径
                 Relids      required_outer;//依赖的外部Relids
     
                 required_outer = get_bitmap_tree_required_outer(path);//
                 path_outer = lappend(path_outer, required_outer);//添加到链表中
                 if (!bms_equal_any(required_outer, all_path_outers))//不等,则添加到all_path_outers中
                     all_path_outers = lappend(all_path_outers, required_outer);
             }
     
             /* Now, for each distinct parameterization set ... */
             //对每一个唯一的参数化集合进行处理
             foreach(lc, all_path_outers)//遍历all_path_outers
             {
                 Relids      max_outers = (Relids) lfirst(lc);
                 List       *this_path_set;
                 Path       *bitmapqual;
                 Relids      required_outer;
                 double      loop_count;
                 BitmapHeapPath *bpath;
                 ListCell   *lcp;
                 ListCell   *lco;
     
                 /* Identify all the bitmap join paths needing no more than that */
                 this_path_set = NIL;
                 forboth(lcp, bitjoinpaths, lco, path_outer)//遍历
                 {
                     Path       *path = (Path *) lfirst(lcp);
                     Relids      p_outers = (Relids) lfirst(lco);
     
                     if (bms_is_subset(p_outers, max_outers))//无需依赖其他Relids,添加到this_path_set中
                         this_path_set = lappend(this_path_set, path);
                 }
     
                 /*
                  * Add in restriction bitmap paths, since they can be used
                  * together with any join paths.
                  */
                 this_path_set = list_concat(this_path_set, bitindexpaths);//合并bitindexpaths访问路径
     
                 /* Select best AND combination for this parameterization */
                 bitmapqual = choose_bitmap_and(root, rel, this_path_set);//为此参数化处理选择最好的AND组合
     
                 /* And push that path into the mix */
                 required_outer = get_bitmap_tree_required_outer(bitmapqual);
                 loop_count = get_loop_count(root, rel->relid, required_outer);
                 bpath = create_bitmap_heap_path(root, rel, bitmapqual,
                                                 required_outer, loop_count, 0);//创建索引访问路径
                 add_path(rel, (Path *) bpath);
             }
         }
     }
    
    

    match_XXX
    match_restriction_clauses_to_index函数验证限制条件是否与Index匹配,匹配的条件添加到clauseset中.
    match_join_clauses_to_index函数验证连接条件是否与Index匹配,同样的,匹配的条件添加到clauseset中.
    match_eclass_clauses_to_index函数验证EC连接条件是否与Index匹配,匹配的条件添加到clauseset中.

    
    //--------------------------------------------------- match_restriction_clauses_to_index
     /*
      * match_restriction_clauses_to_index
      *    Identify restriction clauses for the rel that match the index.
      *    Matching clauses are added to *clauseset.
      *    验证限制条件是否与Index匹配,匹配的条件加入到clauseset中
      */
     static void
     match_restriction_clauses_to_index(RelOptInfo *rel, IndexOptInfo *index,
                                        IndexClauseSet *clauseset)
     {
         /* We can ignore clauses that are implied by the index predicate */
         //忽略部分(条件)索引,直接调用match_clauses_to_index
         match_clauses_to_index(index, index->indrestrictinfo, clauseset);
     }
     
    //------------------------------- match_clauses_to_index
    
     /*
      * match_clauses_to_index
      *    Perform match_clause_to_index() for each clause in a list.
      *    Matching clauses are added to *clauseset.
      */
     static void
     match_clauses_to_index(IndexOptInfo *index,
                            List *clauses,
                            IndexClauseSet *clauseset)
     {
         ListCell   *lc;//临时变量
     
         foreach(lc, clauses)//遍历限制条件
         {
             RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
     
             match_clause_to_index(index, rinfo, clauseset);
         }
     }
    
    //--------------------------------------------------- match_join_clauses_to_index
    
     /*
      * match_join_clauses_to_index
      *    Identify join clauses for the rel that match the index.
      *    Matching clauses are added to *clauseset.
      *    Also, add any potentially usable join OR clauses to *joinorclauses.
      *    验证连接条件是否与Index匹配,匹配的条件添加到clauseset中
      *    另外,在joinorclauses中添加可能有用的连接条件OR子句
      */
     static void
     match_join_clauses_to_index(PlannerInfo *root,
                                 RelOptInfo *rel, IndexOptInfo *index,
                                 IndexClauseSet *clauseset,
                                 List **joinorclauses)
     {
         ListCell   *lc;//临时变量
     
         /* Scan the rel's join clauses */
         foreach(lc, rel->joininfo)//遍历连接条件
         {
             RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
     
             /* Check if clause can be moved to this rel */
             if (!join_clause_is_movable_to(rinfo, rel))
                 continue;
     
             /* Potentially usable, so see if it matches the index or is an OR */
             if (restriction_is_or_clause(rinfo))
                 *joinorclauses = lappend(*joinorclauses, rinfo);
             else
                 match_clause_to_index(index, rinfo, clauseset);
         }
     }
    
    
    //--------------------------------------------------- match_eclass_clauses_to_index
    
     /*
      * match_eclass_clauses_to_index
      *    Identify EquivalenceClass join clauses for the rel that match the index.
      *    Matching clauses are added to *clauseset.
      *    验证EC连接条件是否与Index匹配,相匹配的子句加入到clauseset中
      */
     static void
     match_eclass_clauses_to_index(PlannerInfo *root, IndexOptInfo *index,
                                   IndexClauseSet *clauseset)
     {
         int         indexcol;
     
         /* No work if rel is not in any such ECs */
         if (!index->rel->has_eclass_joins)//没有ECs,返回
             return;
     
         for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++)//遍历索引列
         {
             ec_member_matches_arg arg;
             List       *clauses;
     
             /* Generate clauses, skipping any that join to lateral_referencers */
             //生成条件子句链表
             arg.index = index;
             arg.indexcol = indexcol;
             clauses = generate_implied_equalities_for_column(root,
                                                              index->rel,
                                                              ec_member_matches_indexcol,
                                                              (void *) &arg,
                                                              index->rel->lateral_referencers);
     
             /*
              * We have to check whether the results actually do match the index,
              * since for non-btree indexes the EC's equality operators might not
              * be in the index opclass (cf ec_member_matches_indexcol).
              */
             match_clauses_to_index(index, clauses, clauseset);
         }
     }
    
    //---------------------------- generate_implied_equalities_for_column
    
     /*
      * generate_implied_equalities_for_column
      *    Create EC-derived joinclauses usable with a specific column.
      *    创建可用于特定列的EC衍生连接条件
      *
      * This is used by indxpath.c to extract potentially indexable joinclauses
      * from ECs, and can be used by foreign data wrappers for similar purposes.
      * We assume that only expressions in Vars of a single table are of interest,
      * but the caller provides a callback function to identify exactly which
      * such expressions it would like to know about.
      *
      * We assume that any given table/index column could appear in only one EC.
      * (This should be true in all but the most pathological cases, and if it
      * isn't, we stop on the first match anyway.)  Therefore, what we return
      * is a redundant list of clauses equating the table/index column to each of
      * the other-relation values it is known to be equal to.  Any one of
      * these clauses can be used to create a parameterized path, and there
      * is no value in using more than one.  (But it *is* worthwhile to create
      * a separate parameterized path for each one, since that leads to different
      * join orders.)
      *
      * The caller can pass a Relids set of rels we aren't interested in joining
      * to, so as to save the work of creating useless clauses.
      */
     List *
     generate_implied_equalities_for_column(PlannerInfo *root,
                                            RelOptInfo *rel,
                                            ec_matches_callback_type callback,
                                            void *callback_arg,
                                            Relids prohibited_rels)
     {
         List       *result = NIL;//结果链表
         bool        is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL);//是否子Relation
         Relids      parent_relids;//父Relids
         ListCell   *lc1;//变量
     
         /* Indexes are available only on base or "other" member relations. */
         Assert(IS_SIMPLE_REL(rel));
     
         /* If it's a child rel, we'll need to know what its parent(s) are */
         if (is_child_rel)
             parent_relids = find_childrel_parents(root, rel);
         else
             parent_relids = NULL;   /* not used, but keep compiler quiet */
     
         foreach(lc1, root->eq_classes)//遍历EC
         {
             EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);//当前的EC
             EquivalenceMember *cur_em;//EC成员
             ListCell   *lc2;//链表成员
     
             /*
              * Won't generate joinclauses if const or single-member (the latter
              * test covers the volatile case too)
              */
             if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1)
                 continue;
     
             /*
              * No point in searching if rel not mentioned in eclass (but we can't
              * tell that for a child rel).
              */
             if (!is_child_rel &&
                 !bms_is_subset(rel->relids, cur_ec->ec_relids))
                 continue;
     
             /*
              * Scan members, looking for a match to the target column.  Note that
              * child EC members are considered, but only when they belong to the
              * target relation.  (Unlike regular members, the same expression
              * could be a child member of more than one EC.  Therefore, it's
              * potentially order-dependent which EC a child relation's target
              * column gets matched to.  This is annoying but it only happens in
              * corner cases, so for now we live with just reporting the first
              * match.  See also get_eclass_for_sort_expr.)
              */
             cur_em = NULL;
             foreach(lc2, cur_ec->ec_members)//遍历EC的成员
             {
                 cur_em = (EquivalenceMember *) lfirst(lc2);//当前成员
                 if (bms_equal(cur_em->em_relids, rel->relids) &&
                     callback(root, rel, cur_ec, cur_em, callback_arg))//调用ec_member_matches_indexcol函数
                     break;//找到匹配的成员,跳出
                 cur_em = NULL;
             }
     
             if (!cur_em)
                 continue;
     
             /*
              * Found our match.  Scan the other EC members and attempt to generate
              * joinclauses.
              */
             foreach(lc2, cur_ec->ec_members)
             {
                 EquivalenceMember *other_em = (EquivalenceMember *) lfirst(lc2);
                 Oid         eq_op;
                 RestrictInfo *rinfo;
     
                 if (other_em->em_is_child)//
                     continue;       /* 忽略子成员,ignore children here */
     
                 /* Make sure it'll be a join to a different rel */
                 if (other_em == cur_em ||
                     bms_overlap(other_em->em_relids, rel->relids))//过滤cur_em
                     continue;
     
                 /* Forget it if caller doesn't want joins to this rel */
                 if (bms_overlap(other_em->em_relids, prohibited_rels))
                     continue;
     
                 /*
                  * Also, if this is a child rel, avoid generating a useless join
                  * to its parent rel(s).
                  */
                 if (is_child_rel &&
                     bms_overlap(parent_relids, other_em->em_relids))
                     continue;
     
                 eq_op = select_equality_operator(cur_ec,
                                                  cur_em->em_datatype,
                                                  other_em->em_datatype);
                 if (!OidIsValid(eq_op))
                     continue;
     
                 /* set parent_ec to mark as redundant with other joinclauses */
                 rinfo = create_join_clause(root, cur_ec, eq_op,
                                            cur_em, other_em,
                                            cur_ec);//创建连接条件语句
     
                 result = lappend(result, rinfo);
             }
     
             /*
              * If somehow we failed to create any join clauses, we might as well
              * keep scanning the ECs for another match.  But if we did make any,
              * we're done, because we don't want to return non-redundant clauses.
              */
             if (result)
                 break;
         }
     
         return result;
     }
    
    //---------------------------- match_clause_to_index
    
     /*
      * match_clause_to_index
      *    Test whether a qual clause can be used with an index.
      *
      * If the clause is usable, add it to the appropriate list in *clauseset.
      * *clauseset must be initialized to zeroes before first call.
      *
      * Note: in some circumstances we may find the same RestrictInfos coming from
      * multiple places.  Defend against redundant outputs by refusing to add a
      * clause twice (pointer equality should be a good enough check for this).
      *
      * Note: it's possible that a badly-defined index could have multiple matching
      * columns.  We always select the first match if so; this avoids scenarios
      * wherein we get an inflated idea of the index's selectivity by using the
      * same clause multiple times with different index columns.
      */
     static void
     match_clause_to_index(IndexOptInfo *index,
                           RestrictInfo *rinfo,
                           IndexClauseSet *clauseset)
     {
         int         indexcol;
     
         /*
          * Never match pseudoconstants to indexes.  (Normally a match could not
          * happen anyway, since a pseudoconstant clause couldn't contain a Var,
          * but what if someone builds an expression index on a constant? It's not
          * totally unreasonable to do so with a partial index, either.)
          */
         if (rinfo->pseudoconstant)
             return;
     
         /*
          * If clause can't be used as an indexqual because it must wait till after
          * some lower-security-level restriction clause, reject it.
          */
         if (!restriction_is_securely_promotable(rinfo, index->rel))
             return;
     
         /* OK, check each index key column for a match */
         for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++)
         {
             if (match_clause_to_indexcol(index,
                                          indexcol,
                                          rinfo))
             {
                 clauseset->indexclauses[indexcol] =
                     list_append_unique_ptr(clauseset->indexclauses[indexcol],
                                            rinfo);//赋值
                 clauseset->nonempty = true;//设置标记
                 return;
             }
         }
     }
    
    //------------------- match_clause_to_indexcol
    
     /*
      * match_clause_to_indexcol()
      *    Determines whether a restriction clause matches a column of an index.
      *    判断约束条件是否与索引中的某一列匹配
      *
      *    To match an index normally, the clause:
      *    通常来说,匹配索引,子句必须:
      *    (1)  must be in the form (indexkey op const) or (const op indexkey);
      *         and
      *         满足格式:(索引键 操作符 常量) 或者 (常量 操作符 索引键),而且
      *    (2)  must contain an operator which is in the same family as the index
      *         operator for this column, or is a "special" operator as recognized
      *         by match_special_index_operator();
      *         and
      *         包含一种与索引列同一family的操作符,或者是一种通过
                match_special_index_operator方法认定的特殊操作符
      *    (3)  must match the collation of the index, if collation is relevant.
      *         与索引的排序规则collation匹配
      * 
      *    Our definition of "const" is exceedingly liberal: we allow anything that
      *    doesn't involve a volatile function or a Var of the index's relation.
      *    In particular, Vars belonging to other relations of the query are
      *    accepted here, since a clause of that form can be used in a
      *    parameterized indexscan.  It's the responsibility of higher code levels
      *    to manage restriction and join clauses appropriately.
      *    这里"const"常量的定义非常自由:除了易变函数或索引关系的Var之外的,均视为"const"
      *    由于存在参数化索引扫描的可能,因此查询中属于其他Relations的Vars也可以在此出现.
      *    调用此函数的代码有责任"合适"的管理限制条件和连接条件.
      *
      *    Note: we do need to check for Vars of the index's relation on the
      *    "const" side of the clause, since clauses like (a.f1 OP (b.f2 OP a.f3))
      *    are not processable by a parameterized indexscan on a.f1, whereas
      *    something like (a.f1 OP (b.f2 OP c.f3)) is.
      *    注意:需要在子句的const部分检查索引关系的Vars,因为子句
      *    如(a.f1 OP (b.f2 OP a.f3)不能通过a上的参数化索引扫描进行处理
      *
      *    Presently, the executor can only deal with indexquals that have the
      *    indexkey on the left, so we can only use clauses that have the indexkey
      *    on the right if we can commute the clause to put the key on the left.
      *    We do not actually do the commuting here, but we check whether a
      *    suitable commutator operator is available.
      *    目前为止,执行器只能处理索引键在左边的索引表达式,因此只能使用那些可以
      *    把索引键变换到左边的条件表达式.在这个函数中不执行变换,但会执行相应的检查.
      *
      *    If the index has a collation, the clause must have the same collation.
      *    For collation-less indexes, we assume it doesn't matter; this is
      *    necessary for cases like "hstore ? text", wherein hstore's operators
      *    don't care about collation but the clause will get marked with a
      *    collation anyway because of the text argument.  (This logic is
      *    embodied in the macro IndexCollMatchesExprColl.)
      *    如果索引含有排序规则(collation),条件子句必须包含相同的排序规则.
      *    对于无collation的索引,假定collation没有任何影响.
      *
      *    It is also possible to match RowCompareExpr clauses to indexes (but
      *    currently, only btree indexes handle this).  In this routine we will
      *    report a match if the first column of the row comparison matches the
      *    target index column.  This is sufficient to guarantee that some index
      *    condition can be constructed from the RowCompareExpr --- whether the
      *    remaining columns match the index too is considered in
      *    adjust_rowcompare_for_index().
      *    RowCompareExpr有可能与索引进行匹配,在这个处理过程中,如果行对比的第一个列
      *    与目标索引匹配,那么可以认为是匹配的.
      *
      *    It is also possible to match ScalarArrayOpExpr clauses to indexes, when
      *    the clause is of the form "indexkey op ANY (arrayconst)".
      *    如果子句的格式是"indexkey op ANY (arrayconst)",那么匹配ScalarArrayOpExpr
      *    也是可能的.
      *
      *    For boolean indexes, it is also possible to match the clause directly
      *    to the indexkey; or perhaps the clause is (NOT indexkey).
      *    对于布尔索引,可以直接与索引键进行匹配
      *
      * 输入参数:
      * 'index' is the index of interest.
      * index-正在处理的索引
      * 'indexcol' is a column number of 'index' (counting from 0).
      * indexcol-索引列(从0起算)
      * 'rinfo' is the clause to be tested (as a RestrictInfo node).
      * rinfo-RestrictInfo Node
      *
      * Returns true if the clause can be used with this index key.
      * 如可以使用索引,则返回T
      *
      * NOTE:  returns false if clause is an OR or AND clause; it is the
      * responsibility of higher-level routines to cope with those.
      * 注意:如果条件语句是OR/AND语句,则返回F,由上层处理逻辑处理
      */
     static bool
     match_clause_to_indexcol(IndexOptInfo *index,
                              int indexcol,
                              RestrictInfo *rinfo)
     {
         Expr       *clause = rinfo->clause;//条件语句
         Index       index_relid = index->rel->relid;//Index的Relid
         Oid         opfamily;//操作符种类
         Oid         idxcollation;//索引排序规则
         Node       *leftop,//左节点
                    *rightop;//右节点
         Relids      left_relids;//左节点相关Relids
         Relids      right_relids;//右节点相关Relids
         Oid         expr_op;//表达式操作符的Oid
         Oid         expr_coll;//表达式Collation的Oid
         bool        plain_op;//是否Plain操作符
     
         Assert(indexcol < index->nkeycolumns);
     
         opfamily = index->opfamily[indexcol];//获取操作符种类
         idxcollation = index->indexcollations[indexcol];//获取索引排序规则
     
         /* First check for boolean-index cases. */
         if (IsBooleanOpfamily(opfamily))//是否布尔类
         {
             if (match_boolean_index_clause((Node *) clause, indexcol, index))//是否匹配
                 return true;//如匹配,返回T
         }
     
         /*
          * Clause must be a binary opclause, or possibly a ScalarArrayOpExpr
          * (which is always binary, by definition).  Or it could be a
          * RowCompareExpr, which we pass off to match_rowcompare_to_indexcol().
          * Or, if the index supports it, we can handle IS NULL/NOT NULL clauses.
          */
         if (is_opclause(clause))//OpExpr
         {
             leftop = get_leftop(clause);
             rightop = get_rightop(clause);
             if (!leftop || !rightop)
                 return false;
             left_relids = rinfo->left_relids;
             right_relids = rinfo->right_relids;
             expr_op = ((OpExpr *) clause)->opno;
             expr_coll = ((OpExpr *) clause)->inputcollid;
             plain_op = true;
         }
         else if (clause && IsA(clause, ScalarArrayOpExpr))//ScalarArrayOpExpr
         {
             ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
     
             /* We only accept ANY clauses, not ALL */
             if (!saop->useOr)
                 return false;
             leftop = (Node *) linitial(saop->args);
             rightop = (Node *) lsecond(saop->args);
             left_relids = NULL;     /* not actually needed */
             right_relids = pull_varnos(rightop);
             expr_op = saop->opno;
             expr_coll = saop->inputcollid;
             plain_op = false;
         }
         else if (clause && IsA(clause, RowCompareExpr))//RowCompareExpr
         {
             return match_rowcompare_to_indexcol(index, indexcol,
                                                 opfamily, idxcollation,
                                                 (RowCompareExpr *) clause);
         }
         else if (index->amsearchnulls && IsA(clause, NullTest))//NullTest
         {
             NullTest   *nt = (NullTest *) clause;
     
             if (!nt->argisrow &&
                 match_index_to_operand((Node *) nt->arg, indexcol, index))
                 return true;
             return false;
         }
         else
             return false;
     
         /*
          * Check for clauses of the form: (indexkey operator constant) or
          * (constant operator indexkey).  See above notes about const-ness.
          * (indexkey operator constant)和(constant operator indexkey)格式的语句
          */
         //处理:(indexkey operator constant)
         if (match_index_to_operand(leftop, indexcol, index) &&
             !bms_is_member(index_relid, right_relids) &&
             !contain_volatile_functions(rightop))
         {
             if (IndexCollMatchesExprColl(idxcollation, expr_coll) &&
                 is_indexable_operator(expr_op, opfamily, true))//排序规则&操作符种类匹配
                 return true;//返回T
     
             /*
              * If we didn't find a member of the index's opfamily, see whether it
              * is a "special" indexable operator.
              */
             if (plain_op &&
                 match_special_index_operator(clause, opfamily,
                                              idxcollation, true))//Plain操作&特殊操作符,返回T
                 return true;
             return false;//否则,返回F
         }
     
         //处理(constant operator indexkey)
         if (plain_op &&
             match_index_to_operand(rightop, indexcol, index) &&
             !bms_is_member(index_relid, left_relids) &&
             !contain_volatile_functions(leftop))
         {
             if (IndexCollMatchesExprColl(idxcollation, expr_coll) &&
                 is_indexable_operator(expr_op, opfamily, false))
                 return true;
     
             /*
              * If we didn't find a member of the index's opfamily, see whether it
              * is a "special" indexable operator.
              */
             if (match_special_index_operator(clause, opfamily,
                                              idxcollation, false))
                 return true;
             return false;
         }
     
         return false;
     }
    
    

    三、跟踪分析

    测试脚本如下

    select a.*,b.grbh,b.je 
    from t_dwxx a,
        lateral (select t1.dwbh,t1.grbh,t2.je 
         from t_grxx t1 
              inner join t_jfxx t2 on t1.dwbh = a.dwbh and t1.grbh = t2.grbh) b
    where a.dwbh = '1001'
    order by b.dwbh;
    
    

    注意:按先前的分析,SQL语句存在等价类{t_dwxx.dwbh t_grxx.dwbh '1001'}和{t_grxx.grbh t_jfxx.grbh},在构造t_grxx的索引访问路径时,使用等价类构造.

    启动gdb,第一个RelOptInfo(对应t_dwxx)有3个Index,第二个RelOptInfo(对应t_grxx)有2个Index(分别是在dwbh和grbh上的索引),第三个RelOptInfo(对应t_jfxx)有1个Index(grbh上的索引),本节以t_jfxx和t_grxx为例进行跟踪分析

    ...
    (gdb) c
    Continuing.
    
    Breakpoint 1, create_index_paths (root=0x2714c50, rel=0x2729530) at indxpath.c:242
    242   if (rel->indexlist == NIL)
    (gdb) p *(IndexOptInfo *)rel->indexlist->head->data.ptr_value
    $38 = {type = T_IndexOptInfo, indexoid = 16750, reltablespace = 0, rel = 0x2729530, pages = 276, tuples = 100000, 
      tree_height = 1, ncolumns = 1, nkeycolumns = 1, indexkeys = 0x2729998, indexcollations = 0x27299b0, opfamily = 0x27299c8, 
      opcintype = 0x27299e0, sortopfamily = 0x27299c8, reverse_sort = 0x2729a10, nulls_first = 0x2729a28, 
      canreturn = 0x27299f8, relam = 403, indexprs = 0x0, indpred = 0x0, indextlist = 0x2729ae0, indrestrictinfo = 0x0, 
      predOK = false, unique = false, immediate = true, hypothetical = false, amcanorderbyop = false, amoptionalkey = true, 
      amsearcharray = true, amsearchnulls = true, amhasgettuple = true, amhasgetbitmap = true, amcanparallel = true, 
      amcostestimate = 0x94f0ad <btcostestimate>}
    

    输入信息是已熟知的root(PlannerInfo)和rel(RelOptInfo).首先进行索引遍历循环

    (gdb) c
    Continuing.
    
    Breakpoint 1, create_index_paths (root=0x2714c50, rel=0x2729530) at indxpath.c:242
    242   if (rel->indexlist == NIL)
    (gdb) p *(IndexOptInfo *)rel->indexlist->head->data.ptr_value
    $38 = {type = T_IndexOptInfo, indexoid = 16750, reltablespace = 0, rel = 0x2729530, pages = 276, tuples = 100000, 
      tree_height = 1, ncolumns = 1, nkeycolumns = 1, indexkeys = 0x2729998, indexcollations = 0x27299b0, opfamily = 0x27299c8, 
      opcintype = 0x27299e0, sortopfamily = 0x27299c8, reverse_sort = 0x2729a10, nulls_first = 0x2729a28, 
      canreturn = 0x27299f8, relam = 403, indexprs = 0x0, indpred = 0x0, indextlist = 0x2729ae0, indrestrictinfo = 0x0, 
      predOK = false, unique = false, immediate = true, hypothetical = false, amcanorderbyop = false, amoptionalkey = true, 
      amsearcharray = true, amsearchnulls = true, amhasgettuple = true, amhasgetbitmap = true, amcanparallel = true, 
      amcostestimate = 0x94f0ad <btcostestimate>}
    

    查询数据字典pg_class,oid=16750相应的索引是idx_t_jfxx_grbh

    testdb=# select relname from pg_class where oid=16750;
         relname     
    -----------------
     idx_t_jfxx_grbh
    (1 row)
    

    调用match_restriction_clauses_to_index和match_join_clauses_to_index,子句集合均为NULL

    (gdb) 
    match_restriction_clauses_to_index (rel=0x2729530, index=0x2729888, clauseset=0x7fff69cf0890) at indxpath.c:2117
    2117  }
    (gdb) 
    create_index_paths (root=0x2714c50, rel=0x2729530) at indxpath.c:275
    275     get_index_paths(root, rel, index, &rclauseset,
    (gdb) 
    284     MemSet(&jclauseset, 0, sizeof(jclauseset));
    (gdb) 
    285     match_join_clauses_to_index(root, rel, index,
    (gdb) 
    292     MemSet(&eclauseset, 0, sizeof(eclauseset));
    (gdb) 
    293     match_eclass_clauses_to_index(root, index,
    (gdb) p rclauseset
    $2 = {nonempty = false, indexclauses = {0x0 <repeats 32 times>}}
    (gdb) p joinorclauses
    $3 = (List *) 0x0
    (gdb) p jclauseset
    $4 = {nonempty = false, indexclauses = {0x0 <repeats 32 times>}}
    

    进入match_eclass_clauses_to_index

    ...
    268     match_restriction_clauses_to_index(rel, index, &rclauseset);
    (gdb) step
    match_restriction_clauses_to_index (rel=0x2724c88, index=0x27254d8, clauseset=0x7fff69cf0890) at indxpath.c:2116
    2116    match_clauses_to_index(index, index->indrestrictinfo, clauseset);
    

    进入generate_implied_equalities_for_column

    ...
    (gdb) step
    generate_implied_equalities_for_column (root=0x2714c50, rel=0x2729530, callback=0x7509b0 <ec_member_matches_indexcol>, 
        callback_arg=0x7fff69cf0620, prohibited_rels=0x0) at equivclass.c:2219
    2219    List     *result = NIL;
    

    等价类信息

    ...
    2235      EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
    (gdb) 
    2243      if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1)
    (gdb) p *cur_ec
    $6 = {type = T_EquivalenceClass, ec_opfamilies = 0x272a268, ec_collation = 100, ec_members = 0x272a4a8, 
      ec_sources = 0x272a3f0, ec_derives = 0x272d2f0, ec_relids = 0x272a470, ec_has_const = false, ec_has_volatile = false, 
      ec_below_outer_join = false, ec_broken = false, ec_sortref = 0, ec_min_security = 0, ec_max_security = 0, ec_merged = 0x0}
    

    遍历EC的成员后,cur_em不为NULL,查看cur_em内存结构(匹配的成员,即t_jfxx.grbh)

    2281      foreach(lc2, cur_ec->ec_members)
    (gdb) p *cur_em
    $7 = {type = T_EquivalenceMember, em_expr = 0x2722890, em_relids = 0x272a238, em_nullable_relids = 0x0, 
      em_is_const = false, em_is_child = false, em_datatype = 25}
    (gdb) p *cur_em->em_expr
    $8 = {type = T_RelabelType}
    (gdb) p *(RelabelType *)cur_em->em_expr
    $9 = {xpr = {type = T_RelabelType}, arg = 0x2722840, resulttype = 25, resulttypmod = -1, resultcollid = 100, 
      relabelformat = COERCE_IMPLICIT_CAST, location = -1}
    (gdb) p *((RelabelType *)cur_em->em_expr)->arg
    $10 = {type = T_Var}
    (gdb) p *(Var *)((RelabelType *)cur_em->em_expr)->arg
    $11 = {xpr = {type = T_Var}, varno = 4, varattno = 1, vartype = 1043, vartypmod = 14, varcollid = 100, varlevelsup = 0, 
      varnoold = 4, varoattno = 1, location = 168}
    

    再次遍历等价类的成员,得到第一个约束条件(t_jfxx.grbh=t_grxx.grbh)

    (gdb) n
    2314        rinfo = create_join_clause(root, cur_ec, eq_op,
    (gdb) 
    2318        result = lappend(result, rinfo);
    (gdb) p *rinfo
    $18 = {type = T_RestrictInfo, clause = 0x272d910, is_pushed_down = true, outerjoin_delayed = false, can_join = true, 
      pseudoconstant = false, leakproof = false, security_level = 0, clause_relids = 0x272db10, required_relids = 0x272d5f0, 
      outer_relids = 0x0, nullable_relids = 0x0, left_relids = 0x272dae0, right_relids = 0x272daf8, orclause = 0x0, 
      parent_ec = 0x272a340, eval_cost = {startup = 0, per_tuple = 0.0025000000000000001}, norm_selec = -1, outer_selec = -1, 
      mergeopfamilies = 0x272db48, left_ec = 0x272a340, right_ec = 0x272a340, left_em = 0x272a4d8, right_em = 0x272a420, 
      scansel_cache = 0x0, outer_is_left = false, hashjoinoperator = 98, left_bucketsize = -1, right_bucketsize = -1, 
      left_mcvfreq = -1, right_mcvfreq = -1}
    (gdb) set $tmp1=(RelabelType *)((OpExpr *)rinfo->clause)->args->head->data.ptr_value
    (gdb) set $tmp2=(RelabelType *)((OpExpr *)rinfo->clause)->args->head->next->data.ptr_value
    (gdb) p *(Var *)$tmp1->arg
    $31 = {xpr = {type = T_Var}, varno = 4, varattno = 1, vartype = 1043, vartypmod = 14, varcollid = 100, varlevelsup = 0, 
      varnoold = 4, varoattno = 1, location = 168}
    (gdb) p *(Var *)$tmp2->arg
    $32 = {xpr = {type = T_Var}, varno = 3, varattno = 2, vartype = 1043, vartypmod = 14, varcollid = 100, varlevelsup = 0, 
      varnoold = 3, varoattno = 2, location = 158}
    

    获得了结果,返回到match_eclass_clauses_to_index

    2281      foreach(lc2, cur_ec->ec_members)
    (gdb) 
    2326      if (result)
    (gdb) 
    2327        break;
    (gdb) 
    2330    return result;
    (gdb) 
    2331  }
    (gdb) 
    match_eclass_clauses_to_index (root=0x2714c50, index=0x2729888, clauseset=0x7fff69cf0670) at indxpath.c:2184
    2184      match_clauses_to_index(index, clauses, clauseset);
    ...
    

    下面再考察t_grxx.dwbh上的索引为例,分析match_clause_to_index

    (gdb) c
    Continuing.
    
    Breakpoint 1, create_index_paths (root=0x2714c50, rel=0x2728c38) at indxpath.c:242
    242   if (rel->indexlist == NIL)
    (gdb) p *(IndexOptInfo *)rel->indexlist->head->data.ptr_value
    $39 = {type = T_IndexOptInfo, indexoid = 16752, reltablespace = 0, rel = 0x2728c38, pages = 276, tuples = 100000, 
      tree_height = 1, ncolumns = 1, nkeycolumns = 1, indexkeys = 0x2729378, indexcollations = 0x2729390, opfamily = 0x27293a8, 
      opcintype = 0x27293c0, sortopfamily = 0x27293a8, reverse_sort = 0x27293f0, nulls_first = 0x2729408, 
      canreturn = 0x27293d8, relam = 403, indexprs = 0x0, indpred = 0x0, indextlist = 0x27294e0, indrestrictinfo = 0x272b040, 
      predOK = false, unique = false, immediate = true, hypothetical = false, amcanorderbyop = false, amoptionalkey = true, 
      amsearcharray = true, amsearchnulls = true, amhasgettuple = true, amhasgetbitmap = true, amcanparallel = true, 
      amcostestimate = 0x94f0ad <btcostestimate>}
    

    oid=16752,对应的object为idx_t_grxx_dwbh

    testdb=# select relname from pg_class where oid=16752;
         relname     
    -----------------
     idx_t_grxx_dwbh
    (1 row)
    

    进入IndexOptInfo循环,第一个元素对应的IndexOptInfo为idx_t_grxx_dwbh

    249   foreach(lc, rel->indexlist)
    (gdb) p *rel->indexlist
    $40 = {type = T_List, length = 2, head = 0x2729510, tail = 0x2729218}
    (gdb) p *(IndexOptInfo *)rel->indexlist->head->data->ptr_value
    $42 = {type = T_IndexOptInfo, indexoid = 16752, reltablespace = 0, rel = 0x2728c38, pages = 276, tuples = 100000, 
      tree_height = 1, ncolumns = 1, nkeycolumns = 1, indexkeys = 0x2729378, indexcollations = 0x2729390, opfamily = 0x27293a8, 
      opcintype = 0x27293c0, sortopfamily = 0x27293a8, reverse_sort = 0x27293f0, nulls_first = 0x2729408, 
      canreturn = 0x27293d8, relam = 403, indexprs = 0x0, indpred = 0x0, indextlist = 0x27294e0, indrestrictinfo = 0x272b040, 
      predOK = false, unique = false, immediate = true, hypothetical = false, amcanorderbyop = false, amoptionalkey = true, 
      amsearcharray = true, amsearchnulls = true, amhasgettuple = true, amhasgetbitmap = true, amcanparallel = true, 
      amcostestimate = 0x94f0ad <btcostestimate>}
    

    一路小跑,进入match_clause_to_indexcol

    ...
    (gdb) step
    match_clause_to_indexcol (index=0x2729268, indexcol=0, rinfo=0x272ae58) at indxpath.c:2330
    2330    Expr     *clause = rinfo->clause;
    (gdb) n
    2331    Index   index_relid = index->rel->relid;
    (gdb) n
    2344    opfamily = index->opfamily[indexcol];
    (gdb) 
    2345    idxcollation = index->indexcollations[indexcol];
    (gdb) p index_relid
    $47 = 3
    (gdb) p opfamily
    $48 = 1994
    (gdb) 
    

    根据opfamily查询数据字典

    testdb=# select * from pg_opfamily where oid=1994;
     opfmethod | opfname  | opfnamespace | opfowner 
    -----------+----------+--------------+----------
           403 | text_ops |           11 |       10
    (1 row)
    -- 索引访问方法(btree)
    testdb=# select * from pg_am where oid=403;
     amname | amhandler | amtype 
    --------+-----------+--------
     btree  | bthandler | i
    (1 row)
    

    下面进入is_opclause判断分支

    (gdb) p idxcollation
    $49 = 100
    (gdb) n
    2360    if (is_opclause(clause))
    (gdb) 
    2362      leftop = get_leftop(clause);
    (gdb) 
    2363      rightop = get_rightop(clause);
    (gdb) 
    2364      if (!leftop || !rightop)
    (gdb) p *leftop
    $50 = {type = T_RelabelType}
    (gdb) p *rightop
    $51 = {type = T_Const}
    

    限制条件下推后,形成限制条件t_grxx.dwbh = '1001'

    #Var:t_grxx.dwbh
    (gdb) p *(RelabelType *)leftop
    $56 = {xpr = {type = T_RelabelType}, arg = 0x272ad80, resulttype = 25, resulttypmod = -1, resultcollid = 100, 
      relabelformat = COERCE_IMPLICIT_CAST, location = -1}
    #常量:'1001'
    (gdb) p *(Const *)rightop
    $57 = {xpr = {type = T_Const}, consttype = 25, consttypmod = -1, constcollid = 100, constlen = -1, constvalue = 41069848, 
      constisnull = false, constbyval = false, location = 194}
    

    执行相关判断,返回T

    (gdb) n
    2366      left_relids = rinfo->left_relids;
    (gdb) 
    2367      right_relids = rinfo->right_relids;
    (gdb) 
    2368      expr_op = ((OpExpr *) clause)->opno;
    (gdb) 
    2369      expr_coll = ((OpExpr *) clause)->inputcollid;
    (gdb) 
    2370      plain_op = true;
    (gdb) 
    2409    if (match_index_to_operand(leftop, indexcol, index) &&
    (gdb) 
    2410      !bms_is_member(index_relid, right_relids) &&
    (gdb) 
    2409    if (match_index_to_operand(leftop, indexcol, index) &&
    (gdb) 
    2411      !contain_volatile_functions(rightop))
    (gdb) 
    2410      !bms_is_member(index_relid, right_relids) &&
    (gdb) 
    2413      if (IndexCollMatchesExprColl(idxcollation, expr_coll) &&
    (gdb) 
    2414        is_indexable_operator(expr_op, opfamily, true))
    (gdb) 
    2413      if (IndexCollMatchesExprColl(idxcollation, expr_coll) &&
    (gdb) 
    2415        return true;
    

    给clauseset变量赋值

    (gdb) 
    match_clause_to_index (index=0x2729268, rinfo=0x272ae58, clauseset=0x7fff69cf0890) at indxpath.c:2255
    2255          list_append_unique_ptr(clauseset->indexclauses[indexcol],
    (gdb) 
    2254        clauseset->indexclauses[indexcol] =
    (gdb) 
    2257        clauseset->nonempty = true;
    (gdb) 
    2258        return;
    (gdb) 
    2261  }
    

    返回到match_clauses_to_index

    (gdb) 
    match_clauses_to_index (index=0x2729268, clauses=0x272b040, clauseset=0x7fff69cf0890) at indxpath.c:2200
    2200    foreach(lc, clauses)
    

    至此,match_XXX的跟踪完毕,下一小节介绍get_index_paths.

    四、参考资料

    allpaths.c
    cost.h
    costsize.c
    PG Document:Query Planning

    相关文章

      网友评论

        本文标题:PostgreSQL 源码解读(55)- 查询语句#40(mak

        本文链接:https://www.haomeiwen.com/subject/nmqjoftx.html