PostgreSQL 源码解读（55）- 查询语句#40（mak

作者: EthanHe | 来源:发表于2018-09-26 16:18 被阅读9次

本节继续介绍make_one_rel函数中的set_base_rel_pathlists函数及其子函数。set_base_rel_pathlists函数的目的是为每一个base rel找出所有可用的访问路径(包括顺序扫描和所有可用的索引)，每一个可用的路径都会添加到pathlist链表中。这一小节主要介绍索引扫描成本估算的第一部分(验证限制/约束条件是否与索引匹配)。

一、数据结构

RelOptInfo

 typedef struct RelOptInfo
 {
     NodeTag     type;//节点标识
 
     RelOptKind  reloptkind;//RelOpt类型
 
     /* all relations included in this RelOptInfo */
     Relids      relids;         /*Relids(rtindex)集合 set of base relids (rangetable indexes) */
 
     /* size estimates generated by planner */
     double      rows;           /*结果元组的估算数量 estimated number of result tuples */
 
     /* per-relation planner control flags */
     bool        consider_startup;   /*是否考虑启动成本?是,需要保留启动成本低的路径 keep cheap-startup-cost paths? */
     bool        consider_param_startup; /*是否考虑参数化?的路径 ditto, for parameterized paths? */
     bool        consider_parallel;  /*是否考虑并行处理路径 consider parallel paths? */
 
     /* default result targetlist for Paths scanning this relation */
     struct PathTarget *reltarget;   /*扫描该Relation时默认的结果 list of Vars/Exprs, cost, width */
 
     /* materialization information */
     List       *pathlist;       /*访问路径链表 Path structures */
     List       *ppilist;        /*路径链表中使用参数化路径进行 ParamPathInfos used in pathlist */
     List       *partial_pathlist;   /* partial Paths */
     struct Path *cheapest_startup_path;//代价最低的启动路径
     struct Path *cheapest_total_path;//代价最低的整体路径
     struct Path *cheapest_unique_path;//代价最低的获取唯一值的路径
     List       *cheapest_parameterized_paths;//代价最低的参数化路径链表
 
     /* parameterization information needed for both base rels and join rels */
     /* (see also lateral_vars and lateral_referencers) */
     Relids      direct_lateral_relids;  /*使用lateral语法,需依赖的Relids rels directly laterally referenced */
     Relids      lateral_relids; /* minimum parameterization of rel */
 
     /* information about a base rel (not set for join rels!) */
     //reloptkind=RELOPT_BASEREL时使用的数据结构
     Index       relid;          /* Relation ID */
     Oid         reltablespace;  /* 表空间 containing tablespace */
     RTEKind     rtekind;        /* 基表?子查询?还是函数等等?RELATION, SUBQUERY, FUNCTION, etc */
     AttrNumber  min_attr;       /* 最小的属性编号 smallest attrno of rel (often <0) */
     AttrNumber  max_attr;       /* 最大的属性编号 largest attrno of rel */
     Relids     *attr_needed;    /* 数组 array indexed [min_attr .. max_attr] */
     int32      *attr_widths;    /* 属性宽度 array indexed [min_attr .. max_attr] */
     List       *lateral_vars;   /* 关系依赖的Vars/PHVs LATERAL Vars and PHVs referenced by rel */
     Relids      lateral_referencers;    /*依赖该关系的Relids rels that reference me laterally */
     List       *indexlist;      /* 该关系的IndexOptInfo链表 list of IndexOptInfo */
     List       *statlist;       /* 统计信息链表 list of StatisticExtInfo */
     BlockNumber pages;          /* 块数 size estimates derived from pg_class */
     double      tuples;         /* 元组数 */
     double      allvisfrac;     /* ? */
     PlannerInfo *subroot;       /* 如为子查询,存储子查询的root if subquery */
     List       *subplan_params; /* 如为子查询,存储子查询的参数 if subquery */
     int         rel_parallel_workers;   /* 并行执行,需要多少个workers? wanted number of parallel workers */
 
     /* Information about foreign tables and foreign joins */
     //FDW相关信息
     Oid         serverid;       /* identifies server for the table or join */
     Oid         userid;         /* identifies user to check access as */
     bool        useridiscurrent;    /* join is only valid for current user */
     /* use "struct FdwRoutine" to avoid including fdwapi.h here */
     struct FdwRoutine *fdwroutine;
     void       *fdw_private;
 
     /* cache space for remembering if we have proven this relation unique */
     //已知的,可保证唯一元组返回的Relids链表
     List       *unique_for_rels;    /* known unique for these other relid
                                      * set(s) */
     List       *non_unique_for_rels;    /* 已知的,返回的数据不唯一的Relids链表 known not unique for these set(s) */
 
     /* used by various scans and joins: */
     List       *baserestrictinfo;   /* 如为基本关系,则存储约束条件 RestrictInfo structures (if base rel) */
     QualCost    baserestrictcost;   /* 解析约束表达式的成本? cost of evaluating the above */
     Index       baserestrict_min_security;  /* 最低安全等级 min security_level found in
                                              * baserestrictinfo */
     List       *joininfo;       /* 连接语句的约束条件信息 RestrictInfo structures for join clauses
                                  * involving this rel */
     bool        has_eclass_joins;   /* 是否存在等价类连接? True意味着joininfo并不完整,,T means joininfo is incomplete */
 
     /* used by partitionwise joins: */
       //是否尝试partitionwise连接,这是PG 11的一个新特性.
     bool        consider_partitionwise_join;    /* consider partitionwise
                                                  * join paths? (if
                                                  * partitioned rel) */
     Relids      top_parent_relids;  /* Relids of topmost parents (if "other"
                                      * rel) */
 
     /* used for partitioned relations */
     //分区表使用
     PartitionScheme part_scheme;    /* 分区的schema Partitioning scheme. */
     int         nparts;         /* 分区数 number of partitions */
     struct PartitionBoundInfoData *boundinfo;   /* 分区边界信息 Partition bounds */
     List       *partition_qual; /* 分区约束 partition constraint */
     struct RelOptInfo **part_rels;  /* 分区的RelOptInfo数组 Array of RelOptInfos of partitions,
                                      * stored in the same order of bounds */
     List      **partexprs;      /* 非空分区键表达式 Non-nullable partition key expressions. */
     List      **nullable_partexprs; /* 可为空的分区键表达式 Nullable partition key expressions. */
     List       *partitioned_child_rels; /* RT Indexes链表 List of RT indexes. */
 } RelOptInfo;

Cost相关
注意:实际使用的参数值通过系统配置文件定义,而不是这里的常量定义!

/*
  * The cost estimate produced by cost_qual_eval() includes both a one-time
  * (startup) cost, and a per-tuple cost.
  */
 typedef struct QualCost
 {
     Cost        startup;        /* 启动成本,one-time cost */
     Cost        per_tuple;      /* 每个元组的成本,per-evaluation cost */
 } QualCost;
 
typedef double Cost; /* execution cost (in page-access units) */

 /* defaults for costsize.c's Cost parameters */
 /* NB: cost-estimation code should use the variables, not these constants! */
 /* 注意:实际值通过系统配置文件定义,而不是这里的常量定义! */
 /* If you change these, update backend/utils/misc/postgresql.sample.conf */
 #define DEFAULT_SEQ_PAGE_COST  1.0       //顺序扫描page的成本
 #define DEFAULT_RANDOM_PAGE_COST  4.0      //随机扫描page的成本
 #define DEFAULT_CPU_TUPLE_COST  0.01     //处理一个元组的CPU成本
 #define DEFAULT_CPU_INDEX_TUPLE_COST 0.005   //处理一个索引元组的CPU成本
 #define DEFAULT_CPU_OPERATOR_COST  0.0025    //执行一次操作或函数的CPU成本
 #define DEFAULT_PARALLEL_TUPLE_COST 0.1    //并行执行,从一个worker传输一个元组到另一个worker的成本
 #define DEFAULT_PARALLEL_SETUP_COST  1000.0  //构建并行执行环境的成本
 
 #define DEFAULT_EFFECTIVE_CACHE_SIZE  524288    /*先前已有介绍, measured in pages */

 double      seq_page_cost = DEFAULT_SEQ_PAGE_COST;
 double      random_page_cost = DEFAULT_RANDOM_PAGE_COST;
 double      cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
 double      cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
 double      cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
 double      parallel_tuple_cost = DEFAULT_PARALLEL_TUPLE_COST;
 double      parallel_setup_cost = DEFAULT_PARALLEL_SETUP_COST;
 
 int         effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
 
 Cost        disable_cost = 1.0e10;//1后面10个0,通过设置一个巨大的成本,让优化器自动放弃此路径
 
 int         max_parallel_workers_per_gather = 2;//每次gather使用的worker数

IndexClauseSet
用于收集匹配索引的的条件语句

 /* Data structure for collecting qual clauses that match an index */
 typedef struct
 {
     bool        nonempty;       /* True if lists are not all empty */
     /* Lists of RestrictInfos, one per index column */
     List       *indexclauses[INDEX_MAX_KEYS];
 } IndexClauseSet;

二、源码解读

set_base_rel_pathlists函数遍历RelOptInfo数组,为每一个Rel构造访问路径,先前已介绍了顺序扫描的成本估算,本节介绍索引扫描的成本估算(函数:create_index_paths),通过调用set_plain_rel_pathlist->create_index_paths函数实现.

 /*
  * set_plain_rel_pathlist
  *    Build access paths for a plain relation (no subquery, no inheritance)
  */
 static void
 set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 {
     Relids      required_outer;
 
     //...
 
     /* 索引扫描,Consider index scans */
     create_index_paths(root, rel);
 
     /* TID扫描,Consider TID scans */
     create_tidscan_paths(root, rel);
 }

create_index_paths
create_index_paths函数生成Relation所有可能被选中的索引访问路径,详见源码注释.

 /*
  * create_index_paths()
  *    Generate all interesting index paths for the given relation.
  *    Candidate paths are added to the rel's pathlist (using add_path).
  *    生成Relation所有可能被选中的索引访问路径.
  *    Paths通过add_path方法加入到RelOptInfo的pathlist链表中.
  *
  * To be considered for an index scan, an index must match one or more
  * restriction clauses or join clauses from the query's qual condition,
  * or match the query's ORDER BY condition, or have a predicate that
  * matches the query's qual condition.
  * 使用索引扫描的前提是:1.索引必须匹配一个或多个限制条件或连接条件,或者
  * 2.匹配查询的ORDER BY排序条件,或者3.匹配查询条件的谓词(部分/条件索引)
  *
  * There are two basic kinds of index scans.  A "plain" index scan uses
  * only restriction clauses (possibly none at all) in its indexqual,
  * so it can be applied in any context.  A "parameterized" index scan uses
  * join clauses (plus restriction clauses, if available) in its indexqual.
  * When joining such a scan to one of the relations supplying the other
  * variables used in its indexqual, the parameterized scan must appear as
  * the inner relation of a nestloop join; it can't be used on the outer side,
  * nor in a merge or hash join.  In that context, values for the other rels'
  * attributes are available and fixed during any one scan of the indexpath.
  * 有两种基本的索引扫描类型,一种是"plain"索引扫描,只使用限制条件(或者什么都
  *  没有),这种扫描方法适用于任何场景.另外一种是"parameterized"扫描,使用连接条件
  *  (可能的话,加上限制条件)."parameterized"扫描只能出现在嵌套循环中的内关系中,
  *  因为参数由外关系提供.
  *
  * An IndexPath is generated and submitted to add_path() for each plain or
  * parameterized index scan this routine deems potentially interesting for
  * the current query.
  * IndexPath访问路径通过函数add_path生成并提交.
  *
  * 输入参数:
  * 'rel' is the relation for which we want to generate index paths
  * rel是待生成索引范围路径的关系
  *
  * Note: check_index_predicates() must have been run previously for this rel.
  * 注意:函数check_index_predicates在调用此函数前调用
  *
  * Note: in cases involving LATERAL references in the relation's tlist, it's
  * possible that rel->lateral_relids is nonempty.  Currently, we include
  * lateral_relids into the parameterization reported for each path, but don't
  * take it into account otherwise.  The fact that any such rels *must* be
  * available as parameter sources perhaps should influence our choices of
  * index quals ... but for now, it doesn't seem worth troubling over.
  * In particular, comments below about "unparameterized" paths should be read
  * as meaning "unparameterized so far as the indexquals are concerned".
  */
 void
 create_index_paths(PlannerInfo *root, RelOptInfo *rel)
 {
     List       *indexpaths;//索引访问路径链表
     List       *bitindexpaths;//
     List       *bitjoinpaths;
     List       *joinorclauses;
     IndexClauseSet rclauseset;
     IndexClauseSet jclauseset;
     IndexClauseSet eclauseset;
     ListCell   *lc;
 
     /* Skip the whole mess if no indexes */
     if (rel->indexlist == NIL)//不存在索引,退出
         return;
 
     /* Bitmap paths are collected and then dealt with at the end */
     bitindexpaths = bitjoinpaths = joinorclauses = NIL;//初始赋值
 
     /* Examine each index in turn */
     foreach(lc, rel->indexlist)//遍历索引链表
     {
         IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);//索引信息
 
         /* Protect limited-size array in IndexClauseSets */
         Assert(index->ncolumns <= INDEX_MAX_KEYS);
 
         /*
          * Ignore partial indexes that do not match the query.
          * (generate_bitmap_or_paths() might be able to do something with
          * them, but that's of no concern here.)
          */
         if (index->indpred != NIL && !index->predOK)//部分索引,而且不能使用,不使用此索引
             continue;
 
         /*
          * Identify the restriction clauses that can match the index.
          * 验证索引和条件是否匹配
          */
         MemSet(&rclauseset, 0, sizeof(rclauseset));
         match_restriction_clauses_to_index(rel, index, &rclauseset);
 
         /*
          * Build index paths from the restriction clauses.  These will be
          * non-parameterized paths.  Plain paths go directly to add_path(),
          * bitmap paths are added to bitindexpaths to be handled below.
          * 通过限制条件创建非参数化索引访问路径.Plain访问路径通过函数add_path直接添加到RelOptInfo中
          * 位图访问路径添加到bitindexpaths链表中,后续再处理
          */
         get_index_paths(root, rel, index, &rclauseset,
                         &bitindexpaths);
 
         /*
          * Identify the join clauses that can match the index.  For the moment
          * we keep them separate from the restriction clauses.  Note that this
          * step finds only "loose" join clauses that have not been merged into
          * EquivalenceClasses.  Also, collect join OR clauses for later.
          * 验证索引是否与连接条件匹配(连接条件与限制条件相互独立).
          * 这一步只是发现未被合并到EC中的"loose"连接条件,在此之后会收集连接中的OR条件
          */
         MemSet(&jclauseset, 0, sizeof(jclauseset));
         match_join_clauses_to_index(root, rel, index,
                                     &jclauseset, &joinorclauses);
 
         /*
          * Look for EquivalenceClasses that can generate joinclauses matching
          * the index.
          * 通过EC(等价类)匹配索引,结果存储在eclauseset链表中
          */
         MemSet(&eclauseset, 0, sizeof(eclauseset));
         match_eclass_clauses_to_index(root, index,
                                       &eclauseset);
 
         /*
          * If we found any plain or eclass join clauses, build parameterized
          * index paths using them.
          * 如果存在plain或者eclass连接条件,创建参数化索引访问路径
          */
         if (jclauseset.nonempty || eclauseset.nonempty)
             consider_index_join_clauses(root, rel, index,
                                         &rclauseset,
                                         &jclauseset,
                                         &eclauseset,
                                         &bitjoinpaths);
     }
 
     /*
      * Generate BitmapOrPaths for any suitable OR-clauses present in the
      * restriction list.  Add these to bitindexpaths.
      * 基于RelOptInfo中的限制条件生成BitmapOrPaths访问路径
      */
     indexpaths = generate_bitmap_or_paths(root, rel,
                                           rel->baserestrictinfo, NIL);
     bitindexpaths = list_concat(bitindexpaths, indexpaths);//合并到bitindexpaths链表中
 
     /*
      * Likewise, generate BitmapOrPaths for any suitable OR-clauses present in
      * the joinclause list.  Add these to bitjoinpaths.
      * 同样的,基于连接条件joinorclause中的OR语句生成BitmapOrPaths访问路径
      */
     indexpaths = generate_bitmap_or_paths(root, rel,
                                           joinorclauses, rel->baserestrictinfo);
     bitjoinpaths = list_concat(bitjoinpaths, indexpaths);//合并到bitjoinpaths链表中
 
     /*
      * If we found anything usable, generate a BitmapHeapPath for the most
      * promising combination of restriction bitmap index paths.  Note there
      * will be only one such path no matter how many indexes exist.  This
      * should be sufficient since there's basically only one figure of merit
      * (total cost) for such a path.
      */
     if (bitindexpaths != NIL)//存在位图索引访问路径
     {
         Path       *bitmapqual;//访问路径
         BitmapHeapPath *bpath;//BitmapHeapPath访问路径
 
         bitmapqual = choose_bitmap_and(root, rel, bitindexpaths);//位图表达式路径
         bpath = create_bitmap_heap_path(root, rel, bitmapqual,
                                         rel->lateral_relids, 1.0, 0);//BitmapHeapPath访问路径
         add_path(rel, (Path *) bpath);//添加到RelOptInfo中
 
         /* create a partial bitmap heap path */
         if (rel->consider_parallel && rel->lateral_relids == NULL)
             create_partial_bitmap_paths(root, rel, bitmapqual);//创建并行访问路径
     }
 
     /*
      * Likewise, if we found anything usable, generate BitmapHeapPaths for the
      * most promising combinations of join bitmap index paths.  Our strategy
      * is to generate one such path for each distinct parameterization seen
      * among the available bitmap index paths.  This may look pretty
      * expensive, but usually there won't be very many distinct
      * parameterizations.  (This logic is quite similar to that in
      * consider_index_join_clauses, but we're working with whole paths not
      * individual clauses.)
      */
     if (bitjoinpaths != NIL)//bitjoinpaths位图连接访问路径
     {
         List       *path_outer;//依赖的外部Relids链表
         List       *all_path_outers;//依赖的外部路径Relids链表
         ListCell   *lc;//临时变量
 
         /*
          * path_outer holds the parameterization of each path in bitjoinpaths
          * (to save recalculating that several times), while all_path_outers
          * holds all distinct parameterization sets.
          */
         path_outer = all_path_outers = NIL;//初始化变量
         foreach(lc, bitjoinpaths)//遍历bitjoinpaths
         {
             Path       *path = (Path *) lfirst(lc);//访问路径
             Relids      required_outer;//依赖的外部Relids
 
             required_outer = get_bitmap_tree_required_outer(path);//
             path_outer = lappend(path_outer, required_outer);//添加到链表中
             if (!bms_equal_any(required_outer, all_path_outers))//不等,则添加到all_path_outers中
                 all_path_outers = lappend(all_path_outers, required_outer);
         }
 
         /* Now, for each distinct parameterization set ... */
         //对每一个唯一的参数化集合进行处理
         foreach(lc, all_path_outers)//遍历all_path_outers
         {
             Relids      max_outers = (Relids) lfirst(lc);
             List       *this_path_set;
             Path       *bitmapqual;
             Relids      required_outer;
             double      loop_count;
             BitmapHeapPath *bpath;
             ListCell   *lcp;
             ListCell   *lco;
 
             /* Identify all the bitmap join paths needing no more than that */
             this_path_set = NIL;
             forboth(lcp, bitjoinpaths, lco, path_outer)//遍历
             {
                 Path       *path = (Path *) lfirst(lcp);
                 Relids      p_outers = (Relids) lfirst(lco);
 
                 if (bms_is_subset(p_outers, max_outers))//无需依赖其他Relids,添加到this_path_set中
                     this_path_set = lappend(this_path_set, path);
             }
 
             /*
              * Add in restriction bitmap paths, since they can be used
              * together with any join paths.
              */
             this_path_set = list_concat(this_path_set, bitindexpaths);//合并bitindexpaths访问路径
 
             /* Select best AND combination for this parameterization */
             bitmapqual = choose_bitmap_and(root, rel, this_path_set);//为此参数化处理选择最好的AND组合
 
             /* And push that path into the mix */
             required_outer = get_bitmap_tree_required_outer(bitmapqual);
             loop_count = get_loop_count(root, rel->relid, required_outer);
             bpath = create_bitmap_heap_path(root, rel, bitmapqual,
                                             required_outer, loop_count, 0);//创建索引访问路径
             add_path(rel, (Path *) bpath);
         }
     }
 }

match_XXX
match_restriction_clauses_to_index函数验证限制条件是否与Index匹配,匹配的条件添加到clauseset中.
match_join_clauses_to_index函数验证连接条件是否与Index匹配,同样的,匹配的条件添加到clauseset中.
match_eclass_clauses_to_index函数验证EC连接条件是否与Index匹配,匹配的条件添加到clauseset中.


//--------------------------------------------------- match_restriction_clauses_to_index
 /*
  * match_restriction_clauses_to_index
  *    Identify restriction clauses for the rel that match the index.
  *    Matching clauses are added to *clauseset.
  *    验证限制条件是否与Index匹配,匹配的条件加入到clauseset中
  */
 static void
 match_restriction_clauses_to_index(RelOptInfo *rel, IndexOptInfo *index,
                                    IndexClauseSet *clauseset)
 {
     /* We can ignore clauses that are implied by the index predicate */
     //忽略部分(条件)索引,直接调用match_clauses_to_index
     match_clauses_to_index(index, index->indrestrictinfo, clauseset);
 }
 
//------------------------------- match_clauses_to_index

 /*
  * match_clauses_to_index
  *    Perform match_clause_to_index() for each clause in a list.
  *    Matching clauses are added to *clauseset.
  */
 static void
 match_clauses_to_index(IndexOptInfo *index,
                        List *clauses,
                        IndexClauseSet *clauseset)
 {
     ListCell   *lc;//临时变量
 
     foreach(lc, clauses)//遍历限制条件
     {
         RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
 
         match_clause_to_index(index, rinfo, clauseset);
     }
 }

//--------------------------------------------------- match_join_clauses_to_index

 /*
  * match_join_clauses_to_index
  *    Identify join clauses for the rel that match the index.
  *    Matching clauses are added to *clauseset.
  *    Also, add any potentially usable join OR clauses to *joinorclauses.
  *    验证连接条件是否与Index匹配,匹配的条件添加到clauseset中
  *    另外,在joinorclauses中添加可能有用的连接条件OR子句
  */
 static void
 match_join_clauses_to_index(PlannerInfo *root,
                             RelOptInfo *rel, IndexOptInfo *index,
                             IndexClauseSet *clauseset,
                             List **joinorclauses)
 {
     ListCell   *lc;//临时变量
 
     /* Scan the rel's join clauses */
     foreach(lc, rel->joininfo)//遍历连接条件
     {
         RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
 
         /* Check if clause can be moved to this rel */
         if (!join_clause_is_movable_to(rinfo, rel))
             continue;
 
         /* Potentially usable, so see if it matches the index or is an OR */
         if (restriction_is_or_clause(rinfo))
             *joinorclauses = lappend(*joinorclauses, rinfo);
         else
             match_clause_to_index(index, rinfo, clauseset);
     }
 }


//--------------------------------------------------- match_eclass_clauses_to_index

 /*
  * match_eclass_clauses_to_index
  *    Identify EquivalenceClass join clauses for the rel that match the index.
  *    Matching clauses are added to *clauseset.
  *    验证EC连接条件是否与Index匹配,相匹配的子句加入到clauseset中
  */
 static void
 match_eclass_clauses_to_index(PlannerInfo *root, IndexOptInfo *index,
                               IndexClauseSet *clauseset)
 {
     int         indexcol;
 
     /* No work if rel is not in any such ECs */
     if (!index->rel->has_eclass_joins)//没有ECs,返回
         return;
 
     for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++)//遍历索引列
     {
         ec_member_matches_arg arg;
         List       *clauses;
 
         /* Generate clauses, skipping any that join to lateral_referencers */
         //生成条件子句链表
         arg.index = index;
         arg.indexcol = indexcol;
         clauses = generate_implied_equalities_for_column(root,
                                                          index->rel,
                                                          ec_member_matches_indexcol,
                                                          (void *) &arg,
                                                          index->rel->lateral_referencers);
 
         /*
          * We have to check whether the results actually do match the index,
          * since for non-btree indexes the EC's equality operators might not
          * be in the index opclass (cf ec_member_matches_indexcol).
          */
         match_clauses_to_index(index, clauses, clauseset);
     }
 }

//---------------------------- generate_implied_equalities_for_column

 /*
  * generate_implied_equalities_for_column
  *    Create EC-derived joinclauses usable with a specific column.
  *    创建可用于特定列的EC衍生连接条件
  *
  * This is used by indxpath.c to extract potentially indexable joinclauses
  * from ECs, and can be used by foreign data wrappers for similar purposes.
  * We assume that only expressions in Vars of a single table are of interest,
  * but the caller provides a callback function to identify exactly which
  * such expressions it would like to know about.
  *
  * We assume that any given table/index column could appear in only one EC.
  * (This should be true in all but the most pathological cases, and if it
  * isn't, we stop on the first match anyway.)  Therefore, what we return
  * is a redundant list of clauses equating the table/index column to each of
  * the other-relation values it is known to be equal to.  Any one of
  * these clauses can be used to create a parameterized path, and there
  * is no value in using more than one.  (But it *is* worthwhile to create
  * a separate parameterized path for each one, since that leads to different
  * join orders.)
  *
  * The caller can pass a Relids set of rels we aren't interested in joining
  * to, so as to save the work of creating useless clauses.
  */
 List *
 generate_implied_equalities_for_column(PlannerInfo *root,
                                        RelOptInfo *rel,
                                        ec_matches_callback_type callback,
                                        void *callback_arg,
                                        Relids prohibited_rels)
 {
     List       *result = NIL;//结果链表
     bool        is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL);//是否子Relation
     Relids      parent_relids;//父Relids
     ListCell   *lc1;//变量
 
     /* Indexes are available only on base or "other" member relations. */
     Assert(IS_SIMPLE_REL(rel));
 
     /* If it's a child rel, we'll need to know what its parent(s) are */
     if (is_child_rel)
         parent_relids = find_childrel_parents(root, rel);
     else
         parent_relids = NULL;   /* not used, but keep compiler quiet */
 
     foreach(lc1, root->eq_classes)//遍历EC
     {
         EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);//当前的EC
         EquivalenceMember *cur_em;//EC成员
         ListCell   *lc2;//链表成员
 
         /*
          * Won't generate joinclauses if const or single-member (the latter
          * test covers the volatile case too)
          */
         if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1)
             continue;
 
         /*
          * No point in searching if rel not mentioned in eclass (but we can't
          * tell that for a child rel).
          */
         if (!is_child_rel &&
             !bms_is_subset(rel->relids, cur_ec->ec_relids))
             continue;
 
         /*
          * Scan members, looking for a match to the target column.  Note that
          * child EC members are considered, but only when they belong to the
          * target relation.  (Unlike regular members, the same expression
          * could be a child member of more than one EC.  Therefore, it's
          * potentially order-dependent which EC a child relation's target
          * column gets matched to.  This is annoying but it only happens in
          * corner cases, so for now we live with just reporting the first
          * match.  See also get_eclass_for_sort_expr.)
          */
         cur_em = NULL;
         foreach(lc2, cur_ec->ec_members)//遍历EC的成员
         {
             cur_em = (EquivalenceMember *) lfirst(lc2);//当前成员
             if (bms_equal(cur_em->em_relids, rel->relids) &&
                 callback(root, rel, cur_ec, cur_em, callback_arg))//调用ec_member_matches_indexcol函数
                 break;//找到匹配的成员,跳出
             cur_em = NULL;
         }
 
         if (!cur_em)
             continue;
 
         /*
          * Found our match.  Scan the other EC members and attempt to generate
          * joinclauses.
          */
         foreach(lc2, cur_ec->ec_members)
         {
             EquivalenceMember *other_em = (EquivalenceMember *) lfirst(lc2);
             Oid         eq_op;
             RestrictInfo *rinfo;
 
             if (other_em->em_is_child)//
                 continue;       /* 忽略子成员,ignore children here */
 
             /* Make sure it'll be a join to a different rel */
             if (other_em == cur_em ||
                 bms_overlap(other_em->em_relids, rel->relids))//过滤cur_em
                 continue;
 
             /* Forget it if caller doesn't want joins to this rel */
             if (bms_overlap(other_em->em_relids, prohibited_rels))
                 continue;
 
             /*
              * Also, if this is a child rel, avoid generating a useless join
              * to its parent rel(s).
              */
             if (is_child_rel &&
                 bms_overlap(parent_relids, other_em->em_relids))
                 continue;
 
             eq_op = select_equality_operator(cur_ec,
                                              cur_em->em_datatype,
                                              other_em->em_datatype);
             if (!OidIsValid(eq_op))
                 continue;
 
             /* set parent_ec to mark as redundant with other joinclauses */
             rinfo = create_join_clause(root, cur_ec, eq_op,
                                        cur_em, other_em,
                                        cur_ec);//创建连接条件语句
 
             result = lappend(result, rinfo);
         }
 
         /*
          * If somehow we failed to create any join clauses, we might as well
          * keep scanning the ECs for another match.  But if we did make any,
          * we're done, because we don't want to return non-redundant clauses.
          */
         if (result)
             break;
     }
 
     return result;
 }

//---------------------------- match_clause_to_index

 /*
  * match_clause_to_index
  *    Test whether a qual clause can be used with an index.
  *
  * If the clause is usable, add it to the appropriate list in *clauseset.
  * *clauseset must be initialized to zeroes before first call.
  *
  * Note: in some circumstances we may find the same RestrictInfos coming from
  * multiple places.  Defend against redundant outputs by refusing to add a
  * clause twice (pointer equality should be a good enough check for this).
  *
  * Note: it's possible that a badly-defined index could have multiple matching
  * columns.  We always select the first match if so; this avoids scenarios
  * wherein we get an inflated idea of the index's selectivity by using the
  * same clause multiple times with different index columns.
  */
 static void
 match_clause_to_index(IndexOptInfo *index,
                       RestrictInfo *rinfo,
                       IndexClauseSet *clauseset)
 {
     int         indexcol;
 
     /*
      * Never match pseudoconstants to indexes.  (Normally a match could not
      * happen anyway, since a pseudoconstant clause couldn't contain a Var,
      * but what if someone builds an expression index on a constant? It's not
      * totally unreasonable to do so with a partial index, either.)
      */
     if (rinfo->pseudoconstant)
         return;
 
     /*
      * If clause can't be used as an indexqual because it must wait till after
      * some lower-security-level restriction clause, reject it.
      */
     if (!restriction_is_securely_promotable(rinfo, index->rel))
         return;
 
     /* OK, check each index key column for a match */
     for (indexcol = 0; indexcol < index->nkeycolumns; indexcol++)
     {
         if (match_clause_to_indexcol(index,
                                      indexcol,
                                      rinfo))
         {
             clauseset->indexclauses[indexcol] =
                 list_append_unique_ptr(clauseset->indexclauses[indexcol],
                                        rinfo);//赋值
             clauseset->nonempty = true;//设置标记
             return;
         }
     }
 }

//------------------- match_clause_to_indexcol

 /*
  * match_clause_to_indexcol()
  *    Determines whether a restriction clause matches a column of an index.
  *    判断约束条件是否与索引中的某一列匹配
  *
  *    To match an index normally, the clause:
  *    通常来说,匹配索引,子句必须:
  *    (1)  must be in the form (indexkey op const) or (const op indexkey);
  *         and
  *         满足格式:(索引键 操作符 常量) 或者 (常量 操作符 索引键),而且
  *    (2)  must contain an operator which is in the same family as the index
  *         operator for this column, or is a "special" operator as recognized
  *         by match_special_index_operator();
  *         and
  *         包含一种与索引列同一family的操作符,或者是一种通过
            match_special_index_operator方法认定的特殊操作符
  *    (3)  must match the collation of the index, if collation is relevant.
  *         与索引的排序规则collation匹配
  * 
  *    Our definition of "const" is exceedingly liberal: we allow anything that
  *    doesn't involve a volatile function or a Var of the index's relation.
  *    In particular, Vars belonging to other relations of the query are
  *    accepted here, since a clause of that form can be used in a
  *    parameterized indexscan.  It's the responsibility of higher code levels
  *    to manage restriction and join clauses appropriately.
  *    这里"const"常量的定义非常自由:除了易变函数或索引关系的Var之外的,均视为"const"
  *    由于存在参数化索引扫描的可能,因此查询中属于其他Relations的Vars也可以在此出现.
  *    调用此函数的代码有责任"合适"的管理限制条件和连接条件.
  *
  *    Note: we do need to check for Vars of the index's relation on the
  *    "const" side of the clause, since clauses like (a.f1 OP (b.f2 OP a.f3))
  *    are not processable by a parameterized indexscan on a.f1, whereas
  *    something like (a.f1 OP (b.f2 OP c.f3)) is.
  *    注意:需要在子句的const部分检查索引关系的Vars，因为子句
  *    如(a.f1 OP (b.f2 OP a.f3)不能通过a上的参数化索引扫描进行处理
  *
  *    Presently, the executor can only deal with indexquals that have the
  *    indexkey on the left, so we can only use clauses that have the indexkey
  *    on the right if we can commute the clause to put the key on the left.
  *    We do not actually do the commuting here, but we check whether a
  *    suitable commutator operator is available.
  *    目前为止,执行器只能处理索引键在左边的索引表达式,因此只能使用那些可以
  *    把索引键变换到左边的条件表达式.在这个函数中不执行变换,但会执行相应的检查.
  *
  *    If the index has a collation, the clause must have the same collation.
  *    For collation-less indexes, we assume it doesn't matter; this is
  *    necessary for cases like "hstore ? text", wherein hstore's operators
  *    don't care about collation but the clause will get marked with a
  *    collation anyway because of the text argument.  (This logic is
  *    embodied in the macro IndexCollMatchesExprColl.)
  *    如果索引含有排序规则(collation),条件子句必须包含相同的排序规则.
  *    对于无collation的索引,假定collation没有任何影响.
  *
  *    It is also possible to match RowCompareExpr clauses to indexes (but
  *    currently, only btree indexes handle this).  In this routine we will
  *    report a match if the first column of the row comparison matches the
  *    target index column.  This is sufficient to guarantee that some index
  *    condition can be constructed from the RowCompareExpr --- whether the
  *    remaining columns match the index too is considered in
  *    adjust_rowcompare_for_index().
  *    RowCompareExpr有可能与索引进行匹配,在这个处理过程中,如果行对比的第一个列
  *    与目标索引匹配,那么可以认为是匹配的.
  *
  *    It is also possible to match ScalarArrayOpExpr clauses to indexes, when
  *    the clause is of the form "indexkey op ANY (arrayconst)".
  *    如果子句的格式是"indexkey op ANY (arrayconst)",那么匹配ScalarArrayOpExpr
  *    也是可能的.
  *
  *    For boolean indexes, it is also possible to match the clause directly
  *    to the indexkey; or perhaps the clause is (NOT indexkey).
  *    对于布尔索引,可以直接与索引键进行匹配
  *
  * 输入参数:
  * 'index' is the index of interest.
  * index-正在处理的索引
  * 'indexcol' is a column number of 'index' (counting from 0).
  * indexcol-索引列(从0起算)
  * 'rinfo' is the clause to be tested (as a RestrictInfo node).
  * rinfo-RestrictInfo Node
  *
  * Returns true if the clause can be used with this index key.
  * 如可以使用索引,则返回T
  *
  * NOTE:  returns false if clause is an OR or AND clause; it is the
  * responsibility of higher-level routines to cope with those.
  * 注意:如果条件语句是OR/AND语句,则返回F,由上层处理逻辑处理
  */
 static bool
 match_clause_to_indexcol(IndexOptInfo *index,
                          int indexcol,
                          RestrictInfo *rinfo)
 {
     Expr       *clause = rinfo->clause;//条件语句
     Index       index_relid = index->rel->relid;//Index的Relid
     Oid         opfamily;//操作符种类
     Oid         idxcollation;//索引排序规则
     Node       *leftop,//左节点
                *rightop;//右节点
     Relids      left_relids;//左节点相关Relids
     Relids      right_relids;//右节点相关Relids
     Oid         expr_op;//表达式操作符的Oid
     Oid         expr_coll;//表达式Collation的Oid
     bool        plain_op;//是否Plain操作符
 
     Assert(indexcol < index->nkeycolumns);
 
     opfamily = index->opfamily[indexcol];//获取操作符种类
     idxcollation = index->indexcollations[indexcol];//获取索引排序规则
 
     /* First check for boolean-index cases. */
     if (IsBooleanOpfamily(opfamily))//是否布尔类
     {
         if (match_boolean_index_clause((Node *) clause, indexcol, index))//是否匹配
             return true;//如匹配,返回T
     }
 
     /*
      * Clause must be a binary opclause, or possibly a ScalarArrayOpExpr
      * (which is always binary, by definition).  Or it could be a
      * RowCompareExpr, which we pass off to match_rowcompare_to_indexcol().
      * Or, if the index supports it, we can handle IS NULL/NOT NULL clauses.
      */
     if (is_opclause(clause))//OpExpr
     {
         leftop = get_leftop(clause);
         rightop = get_rightop(clause);
         if (!leftop || !rightop)
             return false;
         left_relids = rinfo->left_relids;
         right_relids = rinfo->right_relids;
         expr_op = ((OpExpr *) clause)->opno;
         expr_coll = ((OpExpr *) clause)->inputcollid;
         plain_op = true;
     }
     else if (clause && IsA(clause, ScalarArrayOpExpr))//ScalarArrayOpExpr
     {
         ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
 
         /* We only accept ANY clauses, not ALL */
         if (!saop->useOr)
             return false;
         leftop = (Node *) linitial(saop->args);
         rightop = (Node *) lsecond(saop->args);
         left_relids = NULL;     /* not actually needed */
         right_relids = pull_varnos(rightop);
         expr_op = saop->opno;
         expr_coll = saop->inputcollid;
         plain_op = false;
     }
     else if (clause && IsA(clause, RowCompareExpr))//RowCompareExpr
     {
         return match_rowcompare_to_indexcol(index, indexcol,
                                             opfamily, idxcollation,
                                             (RowCompareExpr *) clause);
     }
     else if (index->amsearchnulls && IsA(clause, NullTest))//NullTest
     {
         NullTest   *nt = (NullTest *) clause;
 
         if (!nt->argisrow &&
             match_index_to_operand((Node *) nt->arg, indexcol, index))
             return true;
         return false;
     }
     else
         return false;
 
     /*
      * Check for clauses of the form: (indexkey operator constant) or
      * (constant operator indexkey).  See above notes about const-ness.
      * (indexkey operator constant)和(constant operator indexkey)格式的语句
      */
     //处理:(indexkey operator constant)
     if (match_index_to_operand(leftop, indexcol, index) &&
         !bms_is_member(index_relid, right_relids) &&
         !contain_volatile_functions(rightop))
     {
         if (IndexCollMatchesExprColl(idxcollation, expr_coll) &&
             is_indexable_operator(expr_op, opfamily, true))//排序规则&操作符种类匹配
             return true;//返回T
 
         /*
          * If we didn't find a member of the index's opfamily, see whether it
          * is a "special" indexable operator.
          */
         if (plain_op &&
             match_special_index_operator(clause, opfamily,
                                          idxcollation, true))//Plain操作&特殊操作符,返回T
             return true;
         return false;//否则,返回F
     }
 
     //处理(constant operator indexkey)
     if (plain_op &&
         match_index_to_operand(rightop, indexcol, index) &&
         !bms_is_member(index_relid, left_relids) &&
         !contain_volatile_functions(leftop))
     {
         if (IndexCollMatchesExprColl(idxcollation, expr_coll) &&
             is_indexable_operator(expr_op, opfamily, false))
             return true;
 
         /*
          * If we didn't find a member of the index's opfamily, see whether it
          * is a "special" indexable operator.
          */
         if (match_special_index_operator(clause, opfamily,
                                          idxcollation, false))
             return true;
         return false;
     }
 
     return false;
 }

三、跟踪分析

测试脚本如下

select a.*,b.grbh,b.je 
from t_dwxx a,
    lateral (select t1.dwbh,t1.grbh,t2.je 
     from t_grxx t1 
          inner join t_jfxx t2 on t1.dwbh = a.dwbh and t1.grbh = t2.grbh) b
where a.dwbh = '1001'
order by b.dwbh;

注意:按先前的分析,SQL语句存在等价类{t_dwxx.dwbh t_grxx.dwbh '1001'}和{t_grxx.grbh t_jfxx.grbh},在构造t_grxx的索引访问路径时,使用等价类构造.

启动gdb,第一个RelOptInfo(对应t_dwxx)有3个Index,第二个RelOptInfo(对应t_grxx)有2个Index(分别是在dwbh和grbh上的索引),第三个RelOptInfo(对应t_jfxx)有1个Index(grbh上的索引),本节以t_jfxx和t_grxx为例进行跟踪分析

...
(gdb) c
Continuing.

Breakpoint 1, create_index_paths (root=0x2714c50, rel=0x2729530) at indxpath.c:242
242   if (rel->indexlist == NIL)
(gdb) p *(IndexOptInfo *)rel->indexlist->head->data.ptr_value
$38 = {type = T_IndexOptInfo, indexoid = 16750, reltablespace = 0, rel = 0x2729530, pages = 276, tuples = 100000, 
  tree_height = 1, ncolumns = 1, nkeycolumns = 1, indexkeys = 0x2729998, indexcollations = 0x27299b0, opfamily = 0x27299c8, 
  opcintype = 0x27299e0, sortopfamily = 0x27299c8, reverse_sort = 0x2729a10, nulls_first = 0x2729a28, 
  canreturn = 0x27299f8, relam = 403, indexprs = 0x0, indpred = 0x0, indextlist = 0x2729ae0, indrestrictinfo = 0x0, 
  predOK = false, unique = false, immediate = true, hypothetical = false, amcanorderbyop = false, amoptionalkey = true, 
  amsearcharray = true, amsearchnulls = true, amhasgettuple = true, amhasgetbitmap = true, amcanparallel = true, 
  amcostestimate = 0x94f0ad <btcostestimate>}

输入信息是已熟知的root(PlannerInfo)和rel(RelOptInfo).首先进行索引遍历循环

(gdb) c
Continuing.

Breakpoint 1, create_index_paths (root=0x2714c50, rel=0x2729530) at indxpath.c:242
242   if (rel->indexlist == NIL)
(gdb) p *(IndexOptInfo *)rel->indexlist->head->data.ptr_value
$38 = {type = T_IndexOptInfo, indexoid = 16750, reltablespace = 0, rel = 0x2729530, pages = 276, tuples = 100000, 
  tree_height = 1, ncolumns = 1, nkeycolumns = 1, indexkeys = 0x2729998, indexcollations = 0x27299b0, opfamily = 0x27299c8, 
  opcintype = 0x27299e0, sortopfamily = 0x27299c8, reverse_sort = 0x2729a10, nulls_first = 0x2729a28, 
  canreturn = 0x27299f8, relam = 403, indexprs = 0x0, indpred = 0x0, indextlist = 0x2729ae0, indrestrictinfo = 0x0, 
  predOK = false, unique = false, immediate = true, hypothetical = false, amcanorderbyop = false, amoptionalkey = true, 
  amsearcharray = true, amsearchnulls = true, amhasgettuple = true, amhasgetbitmap = true, amcanparallel = true, 
  amcostestimate = 0x94f0ad <btcostestimate>}

查询数据字典pg_class,oid=16750相应的索引是idx_t_jfxx_grbh

testdb=# select relname from pg_class where oid=16750;
     relname     
-----------------
 idx_t_jfxx_grbh
(1 row)

调用match_restriction_clauses_to_index和match_join_clauses_to_index,子句集合均为NULL

(gdb) 
match_restriction_clauses_to_index (rel=0x2729530, index=0x2729888, clauseset=0x7fff69cf0890) at indxpath.c:2117
2117  }
(gdb) 
create_index_paths (root=0x2714c50, rel=0x2729530) at indxpath.c:275
275     get_index_paths(root, rel, index, &rclauseset,
(gdb) 
284     MemSet(&jclauseset, 0, sizeof(jclauseset));
(gdb) 
285     match_join_clauses_to_index(root, rel, index,
(gdb) 
292     MemSet(&eclauseset, 0, sizeof(eclauseset));
(gdb) 
293     match_eclass_clauses_to_index(root, index,
(gdb) p rclauseset
$2 = {nonempty = false, indexclauses = {0x0 <repeats 32 times>}}
(gdb) p joinorclauses
$3 = (List *) 0x0
(gdb) p jclauseset
$4 = {nonempty = false, indexclauses = {0x0 <repeats 32 times>}}

进入match_eclass_clauses_to_index

...
268     match_restriction_clauses_to_index(rel, index, &rclauseset);
(gdb) step
match_restriction_clauses_to_index (rel=0x2724c88, index=0x27254d8, clauseset=0x7fff69cf0890) at indxpath.c:2116
2116    match_clauses_to_index(index, index->indrestrictinfo, clauseset);

进入generate_implied_equalities_for_column

...
(gdb) step
generate_implied_equalities_for_column (root=0x2714c50, rel=0x2729530, callback=0x7509b0 <ec_member_matches_indexcol>, 
    callback_arg=0x7fff69cf0620, prohibited_rels=0x0) at equivclass.c:2219
2219    List     *result = NIL;

等价类信息

...
2235      EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
(gdb) 
2243      if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1)
(gdb) p *cur_ec
$6 = {type = T_EquivalenceClass, ec_opfamilies = 0x272a268, ec_collation = 100, ec_members = 0x272a4a8, 
  ec_sources = 0x272a3f0, ec_derives = 0x272d2f0, ec_relids = 0x272a470, ec_has_const = false, ec_has_volatile = false, 
  ec_below_outer_join = false, ec_broken = false, ec_sortref = 0, ec_min_security = 0, ec_max_security = 0, ec_merged = 0x0}

遍历EC的成员后,cur_em不为NULL,查看cur_em内存结构(匹配的成员,即t_jfxx.grbh)

2281      foreach(lc2, cur_ec->ec_members)
(gdb) p *cur_em
$7 = {type = T_EquivalenceMember, em_expr = 0x2722890, em_relids = 0x272a238, em_nullable_relids = 0x0, 
  em_is_const = false, em_is_child = false, em_datatype = 25}
(gdb) p *cur_em->em_expr
$8 = {type = T_RelabelType}
(gdb) p *(RelabelType *)cur_em->em_expr
$9 = {xpr = {type = T_RelabelType}, arg = 0x2722840, resulttype = 25, resulttypmod = -1, resultcollid = 100, 
  relabelformat = COERCE_IMPLICIT_CAST, location = -1}
(gdb) p *((RelabelType *)cur_em->em_expr)->arg
$10 = {type = T_Var}
(gdb) p *(Var *)((RelabelType *)cur_em->em_expr)->arg
$11 = {xpr = {type = T_Var}, varno = 4, varattno = 1, vartype = 1043, vartypmod = 14, varcollid = 100, varlevelsup = 0, 
  varnoold = 4, varoattno = 1, location = 168}

再次遍历等价类的成员,得到第一个约束条件(t_jfxx.grbh=t_grxx.grbh)

(gdb) n
2314        rinfo = create_join_clause(root, cur_ec, eq_op,
(gdb) 
2318        result = lappend(result, rinfo);
(gdb) p *rinfo
$18 = {type = T_RestrictInfo, clause = 0x272d910, is_pushed_down = true, outerjoin_delayed = false, can_join = true, 
  pseudoconstant = false, leakproof = false, security_level = 0, clause_relids = 0x272db10, required_relids = 0x272d5f0, 
  outer_relids = 0x0, nullable_relids = 0x0, left_relids = 0x272dae0, right_relids = 0x272daf8, orclause = 0x0, 
  parent_ec = 0x272a340, eval_cost = {startup = 0, per_tuple = 0.0025000000000000001}, norm_selec = -1, outer_selec = -1, 
  mergeopfamilies = 0x272db48, left_ec = 0x272a340, right_ec = 0x272a340, left_em = 0x272a4d8, right_em = 0x272a420, 
  scansel_cache = 0x0, outer_is_left = false, hashjoinoperator = 98, left_bucketsize = -1, right_bucketsize = -1, 
  left_mcvfreq = -1, right_mcvfreq = -1}
(gdb) set $tmp1=(RelabelType *)((OpExpr *)rinfo->clause)->args->head->data.ptr_value
(gdb) set $tmp2=(RelabelType *)((OpExpr *)rinfo->clause)->args->head->next->data.ptr_value
(gdb) p *(Var *)$tmp1->arg
$31 = {xpr = {type = T_Var}, varno = 4, varattno = 1, vartype = 1043, vartypmod = 14, varcollid = 100, varlevelsup = 0, 
  varnoold = 4, varoattno = 1, location = 168}
(gdb) p *(Var *)$tmp2->arg
$32 = {xpr = {type = T_Var}, varno = 3, varattno = 2, vartype = 1043, vartypmod = 14, varcollid = 100, varlevelsup = 0, 
  varnoold = 3, varoattno = 2, location = 158}

获得了结果,返回到match_eclass_clauses_to_index

2281      foreach(lc2, cur_ec->ec_members)
(gdb) 
2326      if (result)
(gdb) 
2327        break;
(gdb) 
2330    return result;
(gdb) 
2331  }
(gdb) 
match_eclass_clauses_to_index (root=0x2714c50, index=0x2729888, clauseset=0x7fff69cf0670) at indxpath.c:2184
2184      match_clauses_to_index(index, clauses, clauseset);
...

下面再考察t_grxx.dwbh上的索引为例,分析match_clause_to_index

(gdb) c
Continuing.

Breakpoint 1, create_index_paths (root=0x2714c50, rel=0x2728c38) at indxpath.c:242
242   if (rel->indexlist == NIL)
(gdb) p *(IndexOptInfo *)rel->indexlist->head->data.ptr_value
$39 = {type = T_IndexOptInfo, indexoid = 16752, reltablespace = 0, rel = 0x2728c38, pages = 276, tuples = 100000, 
  tree_height = 1, ncolumns = 1, nkeycolumns = 1, indexkeys = 0x2729378, indexcollations = 0x2729390, opfamily = 0x27293a8, 
  opcintype = 0x27293c0, sortopfamily = 0x27293a8, reverse_sort = 0x27293f0, nulls_first = 0x2729408, 
  canreturn = 0x27293d8, relam = 403, indexprs = 0x0, indpred = 0x0, indextlist = 0x27294e0, indrestrictinfo = 0x272b040, 
  predOK = false, unique = false, immediate = true, hypothetical = false, amcanorderbyop = false, amoptionalkey = true, 
  amsearcharray = true, amsearchnulls = true, amhasgettuple = true, amhasgetbitmap = true, amcanparallel = true, 
  amcostestimate = 0x94f0ad <btcostestimate>}

oid=16752,对应的object为idx_t_grxx_dwbh

testdb=# select relname from pg_class where oid=16752;
     relname     
-----------------
 idx_t_grxx_dwbh
(1 row)

进入IndexOptInfo循环,第一个元素对应的IndexOptInfo为idx_t_grxx_dwbh

249   foreach(lc, rel->indexlist)
(gdb) p *rel->indexlist
$40 = {type = T_List, length = 2, head = 0x2729510, tail = 0x2729218}
(gdb) p *(IndexOptInfo *)rel->indexlist->head->data->ptr_value
$42 = {type = T_IndexOptInfo, indexoid = 16752, reltablespace = 0, rel = 0x2728c38, pages = 276, tuples = 100000, 
  tree_height = 1, ncolumns = 1, nkeycolumns = 1, indexkeys = 0x2729378, indexcollations = 0x2729390, opfamily = 0x27293a8, 
  opcintype = 0x27293c0, sortopfamily = 0x27293a8, reverse_sort = 0x27293f0, nulls_first = 0x2729408, 
  canreturn = 0x27293d8, relam = 403, indexprs = 0x0, indpred = 0x0, indextlist = 0x27294e0, indrestrictinfo = 0x272b040, 
  predOK = false, unique = false, immediate = true, hypothetical = false, amcanorderbyop = false, amoptionalkey = true, 
  amsearcharray = true, amsearchnulls = true, amhasgettuple = true, amhasgetbitmap = true, amcanparallel = true, 
  amcostestimate = 0x94f0ad <btcostestimate>}

一路小跑,进入match_clause_to_indexcol

...
(gdb) step
match_clause_to_indexcol (index=0x2729268, indexcol=0, rinfo=0x272ae58) at indxpath.c:2330
2330    Expr     *clause = rinfo->clause;
(gdb) n
2331    Index   index_relid = index->rel->relid;
(gdb) n
2344    opfamily = index->opfamily[indexcol];
(gdb) 
2345    idxcollation = index->indexcollations[indexcol];
(gdb) p index_relid
$47 = 3
(gdb) p opfamily
$48 = 1994
(gdb)

根据opfamily查询数据字典

testdb=# select * from pg_opfamily where oid=1994;
 opfmethod | opfname  | opfnamespace | opfowner 
-----------+----------+--------------+----------
       403 | text_ops |           11 |       10
(1 row)
-- 索引访问方法(btree)
testdb=# select * from pg_am where oid=403;
 amname | amhandler | amtype 
--------+-----------+--------
 btree  | bthandler | i
(1 row)

下面进入is_opclause判断分支

(gdb) p idxcollation
$49 = 100
(gdb) n
2360    if (is_opclause(clause))
(gdb) 
2362      leftop = get_leftop(clause);
(gdb) 
2363      rightop = get_rightop(clause);
(gdb) 
2364      if (!leftop || !rightop)
(gdb) p *leftop
$50 = {type = T_RelabelType}
(gdb) p *rightop
$51 = {type = T_Const}

限制条件下推后,形成限制条件t_grxx.dwbh = '1001'

#Var:t_grxx.dwbh
(gdb) p *(RelabelType *)leftop
$56 = {xpr = {type = T_RelabelType}, arg = 0x272ad80, resulttype = 25, resulttypmod = -1, resultcollid = 100, 
  relabelformat = COERCE_IMPLICIT_CAST, location = -1}
#常量:'1001'
(gdb) p *(Const *)rightop
$57 = {xpr = {type = T_Const}, consttype = 25, consttypmod = -1, constcollid = 100, constlen = -1, constvalue = 41069848, 
  constisnull = false, constbyval = false, location = 194}

执行相关判断,返回T

(gdb) n
2366      left_relids = rinfo->left_relids;
(gdb) 
2367      right_relids = rinfo->right_relids;
(gdb) 
2368      expr_op = ((OpExpr *) clause)->opno;
(gdb) 
2369      expr_coll = ((OpExpr *) clause)->inputcollid;
(gdb) 
2370      plain_op = true;
(gdb) 
2409    if (match_index_to_operand(leftop, indexcol, index) &&
(gdb) 
2410      !bms_is_member(index_relid, right_relids) &&
(gdb) 
2409    if (match_index_to_operand(leftop, indexcol, index) &&
(gdb) 
2411      !contain_volatile_functions(rightop))
(gdb) 
2410      !bms_is_member(index_relid, right_relids) &&
(gdb) 
2413      if (IndexCollMatchesExprColl(idxcollation, expr_coll) &&
(gdb) 
2414        is_indexable_operator(expr_op, opfamily, true))
(gdb) 
2413      if (IndexCollMatchesExprColl(idxcollation, expr_coll) &&
(gdb) 
2415        return true;

给clauseset变量赋值

(gdb) 
match_clause_to_index (index=0x2729268, rinfo=0x272ae58, clauseset=0x7fff69cf0890) at indxpath.c:2255
2255          list_append_unique_ptr(clauseset->indexclauses[indexcol],
(gdb) 
2254        clauseset->indexclauses[indexcol] =
(gdb) 
2257        clauseset->nonempty = true;
(gdb) 
2258        return;
(gdb) 
2261  }

返回到match_clauses_to_index

(gdb) 
match_clauses_to_index (index=0x2729268, clauses=0x272b040, clauseset=0x7fff69cf0890) at indxpath.c:2200
2200    foreach(lc, clauses)

至此,match_XXX的跟踪完毕,下一小节介绍get_index_paths.

四、参考资料

allpaths.c
cost.h
costsize.c
PG Document:Query Planning

PostgreSQL 源码解读（55）- 查询语句#40（mak

一、数据结构

二、源码解读

三、跟踪分析

四、参考资料

相关文章

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读

PostgreSQL