PostgreSQL查询优化之子查询优化

子查询优化

上拉子连接

上拉子连接主要是把ANY和EXIST子句转换为半连接

void
pull_up_sublinks(PlannerInfo *root)
{
    Node       *jtnode; //子连接上拉生成的结果
    Relids      relids;
    
    /* Begin recursion through the jointree ,jointree代表From和join子句*/
    jtnode = pull_up_sublinks_jointree_recurse(root,
                                           (Node *) root->parse->jointree,
                                           &relids);

    //把生成的结果jtnode,放入jointree表达式中
    if (IsA(jtnode, FromExpr))
        root->parse->jointree = (FromExpr *) jtnode;
    else
        root->parse->jointree = makeFromExpr(list_make1(jtnode), NULL);
}

pull_up_sublinks_jointree_recurse递归上拉子连接函数

static Node *
pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
                                  Relids *relids)
{
    if (jtnode == NULL)
    else if (IsA(jtnode, RangeTblRef))//如果是范围表,直接合并到表示关系的relid中
    //处理FromExpr
    else if (IsA(jtnode, FromExpr))
    {
        /* 递归上拉From中所有子连接 */
        foreach(l, f->fromlist)
        {
            newchild = pull_up_sublinks_jointree_recurse(root,
                            lfirst(l),
                            &childrelids);
            //合并上拉的子连接到本层
            newfromlist = lappend(newfromlist, newchild);
            frelids = bms_join(frelids, childrelids);
        }
        //递归上拉子连接中的条件
        newf->quals = pull_up_sublinks_qual_recurse(root, f->quals,
                        &jtlink, frelids,
                        NULL, NULL);
    }
    else if (IsA(jtnode, JoinExpr))
    {
        /* 上拉左右连接中的子连接 */
        j->larg = pull_up_sublinks_jointree_recurse(root, j->larg,
                            &leftrelids);
        j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg,
                            &rightrelids);
        //递归上拉子连接中的条件
        switch (j->jointype)
        {
            case JOIN_INNER:
                j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
                                     &jtlink,
                                     bms_union(leftrelids,
                                            rightrelids),
                                     NULL, NULL);
                break;
            case JOIN_LEFT:
                j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
                             &j->rarg,
                             rightrelids,
                             NULL, NULL);
                break;
            case JOIN_FULL:
                /* can't do anything with full-join quals */
                break;
            case JOIN_RIGHT:
                j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
                             &j->larg,
                             leftrelids,
                             NULL, NULL);
                break;
            default:
                elog(ERROR, "unrecognized join type: %d",
                     (int) j->jointype);
                break;
        }
    
    }
    else
        elog(ERROR, "unrecognized node type: %d",
             (int) nodeTag(jtnode));
    return jtnode;
}

pull_up_sublinks_qual_recurse上拉限制条件中的子连接

用于递归上拉限制条件中存在的子连接(ANY,EXISTS),经过convert_ANY_sublink_to_join转按ANY子连接,经过convert_EXISTS_sublink_to_join消除EXISTS递归消除

static Node *
//node为子连接节点,jtlink1和available_rels1都是上拉后的返回结果
pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
              Node **jtlink1, Relids available_rels1,
              Node **jtlink2, Relids available_rels2)
{
    if (node == NULL)
        return NULL;
    if (IsA(node, SubLink))
    {
        /* 上拉ANY_SUBLINK类型子连接*/
        if (sublink->subLinkType == ANY_SUBLINK)
        {
            if ((j = convert_ANY_sublink_to_join(root, sublink,
                        available_rels1)) != NULL)//上拉ANY子连接成功
            {
                //处理刚上拉的右关系
                j->rarg = pull_up_sublinks_jointree_recurse(root,
                                    j->rarg,
                                    &child_rels);
        
                //处理刚刚上拉的子连接
                j->quals = pull_up_sublinks_qual_recurse(root,
                                 j->quals,
                                 &j->larg,
                                 available_rels1,
                                 &j->rarg,
                                 child_rels);
                /* Return NULL representing constant TRUE */
                return NULL;
            }
            //处理available_rels2对应项
            if (available_rels2 != NULL &&
                (j = convert_ANY_sublink_to_join(root, sublink,
                         available_rels2)) != NULL)

        }
        //上拉EXIST_SUBLINKS对应项
        else if (sublink->subLinkType == EXISTS_SUBLINK)
        {
            if ((j = convert_EXISTS_sublink_to_join(root, sublink, false,
                        available_rels1)) != NULL)
            {
                //基本逻辑和ANY块相同
            }
            if (available_rels2 != NULL &&
                (j = convert_EXISTS_sublink_to_join(root, sublink, false,
                            available_rels2)) != NULL)
            {
                //基本逻辑和ANY块相同
            }
        }
        /* Else return it unmodified */
        return node;
    }
    //处理NOT_EXISTS
    if (not_clause(node))
    {
    }
    if (and_clause(node)) //处理AND每一个子项
    {
        /* Recurse into AND clause */
        List       *newclauses = NIL;
        ListCell   *l;
    
        foreach(l, ((BoolExpr *) node)->args)
        {
            Node       *oldclause = (Node *) lfirst(l);
            Node       *newclause;
    
            newclause = pull_up_sublinks_qual_recurse(root,
                              oldclause,
                              jtlink1,
                              available_rels1,
                              jtlink2,
                              available_rels2);
            if (newclause)
                newclauses = lappend(newclauses, newclause);
        }
        /* We might have got back fewer clauses than we started with */
        if (newclauses == NIL)
            return NULL;
        else if (list_length(newclauses) == 1)
            return (Node *) linitial(newclauses);
        else
            return (Node *) make_andclause(newclauses);
    }
    /* Stop if not an AND */
    return node;
}

pull_up_sublinks_qual_recurse和pull_up_sublinks_qual_recurse交替调用,穷尽所有ANY/EXISTS类型子连接

convert_ANY_sublink_to_join

ANY(类型包括ANY,NOT ANY,SOME, NOT SOME, IN, NOT IN)

JoinExpr *
convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
                            Relids available_rels)
{
    JoinExpr   *result;//子链接转换为连接关系
    
    Assert(sublink->subLinkType == ANY_SUBLINK);
    
    //子连接右操作数:不能出现包含上层任何Var对象
    if (contain_vars_of_level((Node *) subselect, 1))
        return NULL;
    
    /*子连接左操作数
     *a. 一定与上层出现的Var结构体表示的对象有相同,如果没有,可以直接求解,不用和上层关联
     *b. 不能引用上层出现的关系
     *c. 不能出现易失函数
     */
    upper_varnos = pull_varnos(sublink->testexpr);//情况a
    if (bms_is_empty(upper_varnos))
        return NULL;
    
    /*
     * However, it can't refer to anything outside available_rels.
     */
    if (!bms_is_subset(upper_varnos, available_rels))//情况b
        return NULL;
    
    /*
     * The combining operators and left-hand expressions mustn't be volatile.
     */
    if (contain_volatile_functions(sublink->testexpr))情况c
        return NULL;
    
    /* Create a dummy ParseState for addRangeTableEntryForSubquery */
    pstate = make_parsestate(NULL);
    
    /*上拉子链接到上层范围表中,作为未来连接的对象 */
    rte = addRangeTableEntryForSubquery(pstate,
                subselect,
                makeAlias("ANY_subquery", NIL),
                false,
                false);
    parse->rtable = lappend(parse->rtable, rte);
    rtindex = list_length(parse->rtable);
    
    /*
     * Form a RangeTblRef for the pulled-up sub-select.
     */
    rtr = makeNode(RangeTblRef);
    rtr->rtindex = rtindex;
    
    /*
     * Build a list of Vars representing the subselect outputs.
     */
    subquery_vars = generate_subquery_vars(root,
               subselect->targetList,
               rtindex);
    
    /* 连接条件*/
    quals = convert_testexpr(root, sublink->testexpr, subquery_vars);
    
    /*
     * And finally, build the JoinExpr node.
     */
    result = makeNode(JoinExpr);
    result->jointype = JOIN_SEMI;//半连接
    
    return result;
}

有几种情况会上拉失败:

  1. 右操作数不能包含上层操作对象(包含就是关联子查询了)
  2. 左边操作符
    1. 与上层对象无关
    2. 不能引用上层的关系?
    3. 含有易失函数

convert_EXISTS_sublink_to_join

将EXISTS,NOT EXISTS上拉

JoinExpr *
convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
       bool under_not, Relids available_rels)
{
    JoinExpr   *result;
    Query      *parse = root->parse;
    
    Assert(sublink->subLinkType == EXISTS_SUBLINK);
    
    /*对于右操作数
     * a. 不支持带有WITH子句的格式
     * b. 不支持集合操作或者带有CTE,聚合函数, HAVING,LIMIT等格式
     * c. 不支持FROM或者WHERE子句为空
     */
    if (subselect->cteList)//情况a
        return NULL;
    

    if (!simplify_EXISTS_query(root, subselect))//情况b
        return NULL;

    if (subselect->jointree->fromlist == NIL)//情况c
        return NULL;
    
    whereClause = subselect->jointree->quals;//子查询条件保存
    subselect->jointree->quals = NULL;
    
    //右操作树的子查询不能包含上层中出现的任何Var对象
    if (contain_vars_of_level((Node *) subselect, 1))
        return NULL;
    //右操作数的where需要有山层的Var,这样才能够成功连接
    if (!contain_vars_of_level(whereClause, 1))
        return NULL;
    //易失函数不能优化
    if (contain_volatile_functions(whereClause))
        return NULL;
    
    //上拉成功,到顶层范围表
    rtoffset = list_length(parse->rtable);
    OffsetVarNodes((Node *) subselect, rtoffset, 0);
    OffsetVarNodes(whereClause, rtoffset, 0);
    
    /* 从技术上看来,上拉子查询就是把子查询中范围表拉到From子句,并把Where条件合并 */
    parse->rtable = list_concat(parse->rtable, subselect->rtable);
    
    /*
     * And finally, build the JoinExpr node.
     */
    result = makeNode(JoinExpr);
    result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
    return result;
}

不能上拉的条件:

  1. 右操作数:
    1. 不支持带有WITH子句的格式
    2. 不支持集合操作或者带有CTE(定义临时表的存在只是一个查询/withi子句),聚合函数, HAVING,LIMIT等格式
    3. 不支持FROM或者WHERE子句为空
    4. 不支持关联子查询,但是Where必须有上层Var
    5. 易失函数不能优化

上拉子查询

搜索From子句中的范围表,上拉其中子查询

void
pull_up_subqueries(PlannerInfo *root)
{
    /* Recursion starts with no containing join nor appendrel */
    root->parse->jointree = (FromExpr *)
        pull_up_subqueries_recurse(root, (Node *) root->parse->jointree,
                            NULL, NULL, NULL, false);
}

真正执行上拉

static Node *
pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode,
                           JoinExpr *lowest_outer_join,
                           JoinExpr *lowest_nulling_outer_join,
                           AppendRelInfo *containing_appendrel,
                           bool deletion_ok)
{
    Assert(jtnode != NULL);
    if (IsA(jtnode, RangeTblRef))//一个范围表
    {
        if (rte->rtekind == RTE_SUBQUERY &&
            is_simple_subquery(rte->subquery, rte,
               lowest_outer_join, deletion_ok) &&//如果是简单子查询,上拉
            (containing_appendrel == NULL ||
             is_safe_append_member(rte->subquery)))
            return pull_up_simple_subquery(root, jtnode, rte,
                   lowest_outer_join,
                   lowest_nulling_outer_join,
                   containing_appendrel,
                   deletion_ok);
    

        if (rte->rtekind == RTE_SUBQUERY &&
            is_simple_union_all(rte->subquery))//如果是简单UNION操作,上拉
            return pull_up_simple_union_all(root, jtnode, rte);
    
        //普通Value,上拉
        if (rte->rtekind == RTE_VALUES &&
            lowest_outer_join == NULL &&
            containing_appendrel == NULL &&
            is_simple_values(root, rte, deletion_ok))
            return pull_up_simple_values(root, jtnode, rte);
    
        /* Otherwise, do nothing at this node. */
    }
    else if (IsA(jtnode, FromExpr))
    {
        FromExpr   *f = (FromExpr *) jtnode;

        foreach(l, f->fromlist)
        {
            lfirst(l) = pull_up_subqueries_recurse(root, lfirst(l),
                           lowest_outer_join,
                           lowest_nulling_outer_join,
                           NULL,
                           sub_deletion_ok);
        }
    }
    else if (IsA(jtnode, JoinExpr))
    {
        JoinExpr   *j = (JoinExpr *) jtnode;
    
        /* Recurse, being careful to tell myself when inside outer join */
        switch (j->jointype)
        {
            //对各种JOIN子句进行上拉
        }
    }
    else
        elog(ERROR, "unrecognized node type: %d",
             (int) nodeTag(jtnode));
    return jtnode;
}

可以上拉的:

  1. 简单子查询,Union ALL,普通Value
  2. 可以递归多个From子句,也可以递归上拉join左右操作数

优化顶层Union all

void
flatten_simple_union_all(PlannerInfo *root)
{
    Query      *parse = root->parse;

    /* 存在Union嵌套,就是WITH子句 */
    if (root->hasRecursion)
        return;

    /* UNION ALL的列不同*/
    if (!is_simple_union_all_recurse((Node *) topop, parse, topop->colTypes))
        return;
    //构造AppendRefInfo节点优化UNION ALL
    pull_up_union_leaf_queries((Node *) topop, root, leftmostRTI, parse, 0);
}

UNION和UNION ALL和OR

  1. UNION和UNION ALL都是使用AppendRefInfo替代,但是UNION会排序和去重
  2. OR里面会有OrFilter
    原文作者:PostgreSQL
    原文地址: https://www.cnblogs.com/biterror/p/7161666.html
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞