189 8069 5689

PostgreSQL源码解读(202)-查询#115(类型转换)

本节简单介绍了PostgreSQL中的类型转换实现.
首先来看看几个示例SQL:


testdb=# 
testdb=# drop table if exists t_conv;
om t_conv where id = '1'::text;
explain verbose select * from t_conv where id > '13'::text;
DROP TABLE
testdb=# create table t_conv(id int);
CREATE TABLE
testdb=# 
testdb=# insert into t_conv values(1);
INSERT 0 1
testdb=# insert into t_conv values(2);
INSERT 0 1
testdb=#

查询条件为id = ‘1’,’1’转换为int进行比较.

成都创新互联公司是一家专业提供大箐山企业网站建设,专注与网站建设、做网站HTML5、小程序制作等业务。10年已为大箐山众多企业、政府机构等服务。创新互联专业网络公司优惠进行中。


testdb=# explain verbose select * from t_conv where id = '1';
                          QUERY PLAN                           
---------------------------------------------------------------
 Seq Scan on public.t_conv  (cost=0.00..41.88 rows=13 width=4)
   Output: id
   Filter: (t_conv.id = 1)
(3 rows)

查询条件为id = ‘1.1’,出错(1.1无法转换为整型)


testdb=# explain verbose select * from t_conv where id = '1.1';
psql: ERROR:  invalid input syntax for type integer: "1.1"
LINE 1: explain verbose select * from t_conv where id = '1.1';
                                                        ^

查询条件为id = ‘1’::text,出错(text和int4没有转换规则)


testdb=# explain verbose select * from t_conv where id = '1'::text;
psql: ERROR:  operator does not exist: integer = text
LINE 1: explain verbose select * from t_conv where id = '1'::text;
                                                      ^
HINT:  No operator matches the given name and argument types. You might need to add explicit type casts.
testdb=#

查询条件为id > ‘13’::text,转换为numeric进行比较


testdb=# explain verbose select * from t_conv where id > '13'::text;
                           QUERY PLAN                           
----------------------------------------------------------------
 Seq Scan on public.t_conv  (cost=0.00..48.25 rows=850 width=4)
   Output: id
   Filter: ((t_conv.id)::numeric > '13'::numeric)
(3 rows)
testdb=#

系统转换规则定义,”>”操作符有定义text&int的比较,而”=”操作符没有定义,因此”select * from t_conv where id = ‘1’::text”语句会出错.


testdb=# select oid,typname from pg_type where typname in ('int4','text');
 oid | typname 
-----+---------
  23 | int4
  25 | text
(2 rows)
testdb=# select oid,oprname,oprleft,oprright,oprresult,oprcode from pg_operator where oprname = '>' and oprleft = 23 and oprright = 25;
  oid  | oprname | oprleft | oprright | oprresult |     oprcode      
-------+---------+---------+----------+-----------+------------------
 16407 | >       |      23 |       25 |        16 | int_greater_text
(1 row)
testdb=# select oid,oprname,oprleft,oprright,oprresult,oprcode from pg_operator where oprname = '=' and oprleft = 23 and oprright = 25;
 oid | oprname | oprleft | oprright | oprresult | oprcode 
-----+---------+---------+----------+-----------+---------
(0 rows)

总结一下:
解析表达式,涉及不同数据类型时:
1.如有相应类型的Operator定义(pg_operator),则尝试进行类型转换,否则报错;
2.如有相应类型的转换规则,转换为目标类型后解析,否则报错.

一、数据结构

Form_pg_operator
pg_operator中的定义,代码会其中的定义转换为FormData_pg_operator结构体


/* ----------------
 *    pg_operator definition.  cpp turns this into
 *    typedef struct FormData_pg_operator
 * ----------------
 */
CATALOG(pg_operator,2617,OperatorRelationId)
{
  Oid     oid;      /* oid */
  /* name of operator */
  NameData  oprname;
  /* OID of namespace containing this oper */
  Oid     oprnamespace BKI_DEFAULT(PGNSP);
  /* operator owner */
  Oid     oprowner BKI_DEFAULT(PGUID);
  /* 'l', 'r', or 'b' */
  char    oprkind BKI_DEFAULT(b);
  /* can be used in merge join? */
  bool    oprcanmerge BKI_DEFAULT(f);
  /* can be used in hash join? */
  bool    oprcanhash BKI_DEFAULT(f);
  /* left arg type, or 0 if 'l' oprkind */
  Oid     oprleft BKI_LOOKUP(pg_type);
  /* right arg type, or 0 if 'r' oprkind */
  Oid     oprright BKI_LOOKUP(pg_type);
  /* result datatype */
  Oid     oprresult BKI_LOOKUP(pg_type);
  /* OID of commutator oper, or 0 if none */
  Oid     oprcom BKI_DEFAULT(0) BKI_LOOKUP(pg_operator);
  /* OID of negator oper, or 0 if none */
  Oid     oprnegate BKI_DEFAULT(0) BKI_LOOKUP(pg_operator);
  /* OID of underlying function */
  regproc   oprcode BKI_LOOKUP(pg_proc);
  /* OID of restriction estimator, or 0 */
  regproc   oprrest BKI_DEFAULT(-) BKI_LOOKUP(pg_proc);
  /* OID of join estimator, or 0 */
  regproc   oprjoin BKI_DEFAULT(-) BKI_LOOKUP(pg_proc);
} FormData_pg_operator;
/* ----------------
 *    Form_pg_operator corresponds to a pointer to a tuple with
 *    the format of pg_operator relation.
 * ----------------
 */
typedef FormData_pg_operator *Form_pg_operator;

二、源码解读

make_op
变换操作符表达式以确保类型兼容,会存在某些类型转换.


/*
 * make_op()
 *    Operator expression construction.
 *
 * Transform operator expression ensuring type compatibility.
 * This is where some type conversion happens.
 * 变换操作符表达式以确保类型兼容,会存在某些类型转换.
 *
 * last_srf should be a copy of pstate->p_last_srf from just before we
 * started transforming the operator's arguments; this is used for nested-SRF
 * detection.  If the caller will throw an error anyway for a set-returning
 * expression, it's okay to cheat and just pass pstate->p_last_srf.
 * last_srf应为pstate->p_last_srf的一个拷贝.
 */
Expr *
make_op(ParseState *pstate, List *opname, Node *ltree, Node *rtree,
    Node *last_srf, int location)
{
  Oid     ltypeId,//左算子类型OID
        rtypeId;//右算子类型OID
  Operator  tup;//操作符tuple
  Form_pg_operator opform;//操作符form(参照pg_operator)
  Oid     actual_arg_types[2];//实际参数类型
  Oid     declared_arg_types[2];//操作符声明的参数类型
  int     nargs;//参数格式
  List     *args;//参数链表
  Oid     rettype;//返回结果类型
  OpExpr     *result;//结果
  /* Select the operator */
  //选择操作符
  if (rtree == NULL)
  {
    /* right operator */
    //右树为NULL,则为右操作符,如阶乘运算符"!" : 10!
    ltypeId = exprType(ltree);
    rtypeId = InvalidOid;
    tup = right_oper(pstate, opname, ltypeId, false, location);
  }
  else if (ltree == NULL)
  {
    /* left operator */
    //左树为空,则为左操作符,如按位NOT运算符 : "~21"
    rtypeId = exprType(rtree);
    ltypeId = InvalidOid;
    tup = left_oper(pstate, opname, rtypeId, false, location);
  }
  else
  {
    /* otherwise, binary operator */
    //二元操作符
    ltypeId = exprType(ltree);
    rtypeId = exprType(rtree);
    tup = oper(pstate, opname, ltypeId, rtypeId, false, location);
  }
  //获取操作符
  opform = (Form_pg_operator) GETSTRUCT(tup);
  /* Check it's not a shell */
  if (!RegProcedureIsValid(opform->oprcode))
    ereport(ERROR,
        (errcode(ERRCODE_UNDEFINED_FUNCTION),
         errmsg("operator is only a shell: %s",
            op_signature_string(opname,
                      opform->oprkind,
                      opform->oprleft,
                      opform->oprright)),
         parser_errposition(pstate, location)));
  /* Do typecasting and build the expression tree */
  //执行类型转换并构建表达式树
  if (rtree == NULL)
  {
    /* right operator */
    args = list_make1(ltree);//参数
    actual_arg_types[0] = ltypeId;//左实际参数类型
    declared_arg_types[0] = opform->oprleft;//声明类型
    nargs = 1;//参数个数
  }
  else if (ltree == NULL)
  {
    /* left operator */
    args = list_make1(rtree);
    actual_arg_types[0] = rtypeId;
    declared_arg_types[0] = opform->oprright;
    nargs = 1;
  }
  else
  {
    /* otherwise, binary operator */
    args = list_make2(ltree, rtree);
    actual_arg_types[0] = ltypeId;
    actual_arg_types[1] = rtypeId;
    declared_arg_types[0] = opform->oprleft;
    declared_arg_types[1] = opform->oprright;
    nargs = 2;
  }
  /*
   * enforce consistency with polymorphic argument and return types,
   * possibly adjusting return type or declared_arg_types (which will be
   * used as the cast destination by make_fn_arguments)
   * 强制多态参数与返回类型的一致性,可能会调整返回类型或者declared_arg_types
   * (可能会在函数make_fn_arguments中作为转换目标类型)
   */
  rettype = enforce_generic_type_consistency(actual_arg_types,
                         declared_arg_types,
                         nargs,
                         opform->oprresult,
                         false);
  /* perform the necessary typecasting of arguments */
  //参数类型转换
  make_fn_arguments(pstate, args, actual_arg_types, declared_arg_types);
  /* and build the expression node */
  //构建表达式节点
  result = makeNode(OpExpr);
  result->opno = oprid(tup);
  result->opfuncid = opform->oprcode;
  result->opresulttype = rettype;
  result->opretset = get_func_retset(opform->oprcode);
  /* opcollid and inputcollid will be set by parse_collate.c */
  result->args = args;
  result->location = location;
  /* if it returns a set, check that's OK */
  if (result->opretset)
  {
    check_srf_call_placement(pstate, last_srf, location);
    /* ... and remember it for error checks at higher levels */
    pstate->p_last_srf = (Node *) result;
  }
  ReleaseSysCache(tup);
  return (Expr *) result;
}

make_fn_arguments
给定实际的参数表达式和期望的输入类型,为表达式树执行有需要的类型转换.


/*
 * make_fn_arguments()
 *
 * Given the actual argument expressions for a function, and the desired
 * input types for the function, add any necessary typecasting to the
 * expression tree.  Caller should already have verified that casting is
 * allowed.
 * 给定实际的参数表达式和期望的输入类型,为表达式树执行有需要的类型转换.
 * 调用者已验证转换是可行的.
 *
 * Caution: given argument list is modified in-place.
 * 就地转换参数类型.
 *
 * As with coerce_type, pstate may be NULL if no special unknown-Param
 * processing is wanted.
 */
void
make_fn_arguments(ParseState *pstate,
          List *fargs,
          Oid *actual_arg_types,
          Oid *declared_arg_types)
{
  ListCell   *current_fargs;
  int     i = 0;
  foreach(current_fargs, fargs)
  {
    /* types don't match? then force coercion using a function call... */
    //类型不匹配?
    if (actual_arg_types[i] != declared_arg_types[i])
    {
      //获取节点
      Node     *node = (Node *) lfirst(current_fargs);
      /*
       * If arg is a NamedArgExpr, coerce its input expr instead --- we
       * want the NamedArgExpr to stay at the top level of the list.
       * 如果参数是NamedArgExpr,则强制转换为输入表达式
       */
      if (IsA(node, NamedArgExpr))
      {
        NamedArgExpr *na = (NamedArgExpr *) node;
        node = coerce_type(pstate,
                   (Node *) na->arg,
                   actual_arg_types[i],
                   declared_arg_types[i], -1,
                   COERCION_IMPLICIT,
                   COERCE_IMPLICIT_CAST,
                   -1);
        na->arg = (Expr *) node;
      }
      else
      {
        //执行转换
        node = coerce_type(pstate,
                   node,
                   actual_arg_types[i],
                   declared_arg_types[i], -1,
                   COERCION_IMPLICIT,
                   COERCE_IMPLICIT_CAST,
                   -1);
        lfirst(current_fargs) = node;
      }
    }
    i++;
  }
}

三、跟踪分析

SQL脚本


testdb=# select * from t_conv where id = '1';

跟踪分析


(gdb) b make_op
Breakpoint 2 at 0x619cdb: file parse_oper.c, line 762.
(gdb) c
Continuing.
Breakpoint 2, make_op (pstate=0x22d6e30, opname=0x22d6c48, ltree=0x22d7620, rtree=0x22d7678, last_srf=0x0, location=30)
    at parse_oper.c:762
762   if (rtree == NULL)
(gdb)

调用栈信息


(gdb) bt
#0  make_op (pstate=0x22d6e30, opname=0x22d6c48, ltree=0x22d7620, rtree=0x22d7678, last_srf=0x0, location=30)
    at parse_oper.c:859
#1  0x000000000060df9c in transformAExprOp (pstate=0x22d6e30, a=0x22d6ba0) at parse_expr.c:1010
#2  0x000000000060c55b in transformExprRecurse (pstate=0x22d6e30, expr=0x22d6ba0) at parse_expr.c:216
#3  0x000000000060c298 in transformExpr (pstate=0x22d6e30, expr=0x22d6ba0, exprKind=EXPR_KIND_WHERE) at parse_expr.c:155
#4  0x0000000000602164 in transformWhereClause (pstate=0x22d6e30, clause=0x22d6ba0, exprKind=EXPR_KIND_WHERE, 
    constructName=0xb458a7 "WHERE") at parse_clause.c:1691
#5  0x00000000005c7eb5 in transformSelectStmt (pstate=0x22d6e30, stmt=0x22d6c80) at analyze.c:1239
#6  0x00000000005c6392 in transformStmt (pstate=0x22d6e30, parseTree=0x22d6c80) at analyze.c:301
#7  0x00000000005c626d in transformOptionalSelectInto (pstate=0x22d6e30, parseTree=0x22d6c80) at analyze.c:246
#8  0x00000000005c612b in transformTopLevelStmt (pstate=0x22d6e30, parseTree=0x22d6d98) at analyze.c:196
#9  0x00000000005c5f83 in parse_analyze (parseTree=0x22d6d98, sourceText=0x22d5e08 "select * from t_conv where id = '1';", 
    paramTypes=0x0, numParams=0, queryEnv=0x0) at analyze.c:116
#10 0x00000000008e78d9 in pg_analyze_and_rewrite (parsetree=0x22d6d98, 
    query_string=0x22d5e08 "select * from t_conv where id = '1';", paramTypes=0x0, numParams=0, queryEnv=0x0)
    at postgres.c:695
#11 0x00000000008e7f23 in exec_simple_query (query_string=0x22d5e08 "select * from t_conv where id = '1';")
    at postgres.c:1140
#12 0x00000000008ec3a0 in PostgresMain (argc=1, argv=0x22ffdb8, dbname=0x22ffc00 "testdb", username=0x22d2a68 "pg12")
    at postgres.c:4249
#13 0x0000000000843650 in BackendRun (port=0x22f7be0) at postmaster.c:4431
#14 0x0000000000842e2e in BackendStartup (port=0x22f7be0) at postmaster.c:4122
#15 0x000000000083f06a in ServerLoop () at postmaster.c:1704
#16 0x000000000083e920 in PostmasterMain (argc=1, argv=0x22d0a20) at postmaster.c:1377
#17 0x000000000075f834 in main (argc=1, argv=0x22d0a20) at main.c:228

输入参数
操作符为”=”;ltree为字段列,类型为int4(vartype = 23);rtree为常量,类型为unknown(consttype = 705);last_srf为NULL;该操作符在字符串中的偏移为30.


(gdb) p *pstate
$7 = {parentParseState = 0x0, p_sourcetext = 0x22d5e08 "select * from t_conv where id = '1';", p_rtable = 0x22d72a0, 
  p_joinexprs = 0x0, p_joinlist = 0x22d73a8, p_namespace = 0x22d7328, p_lateral_active = false, p_ctenamespace = 0x0, 
  p_future_ctes = 0x0, p_parent_cte = 0x0, p_target_relation = 0x0, p_target_rangetblentry = 0x0, p_is_insert = false, 
  p_windowdefs = 0x0, p_expr_kind = EXPR_KIND_WHERE, p_next_resno = 2, p_multiassign_exprs = 0x0, p_locking_clause = 0x0, 
  p_locked_from_parent = false, p_resolve_unknowns = true, p_queryEnv = 0x0, p_hasAggs = false, p_hasWindowFuncs = false, 
  p_hasTargetSRFs = false, p_hasSubLinks = false, p_hasModifyingCTE = false, p_last_srf = 0x0, p_pre_columnref_hook = 0x0, 
  p_post_columnref_hook = 0x0, p_paramref_hook = 0x0, p_coerce_param_hook = 0x0, p_ref_hook_state = 0x0}
(gdb) p *opname
$8 = {type = T_List, length = 1, head = 0x22d6c20, tail = 0x22d6c20}
(gdb) p *(Node *)opname->head->data.ptr_value
$9 = {type = T_String}
(gdb) p *(Value *)opname->head->data.ptr_value
$14 = {type = T_String, val = {ival = 12298942, str = 0xbbaabe "="}}
(gdb) p *ltree
$15 = {type = T_Var}
(gdb) p *(Var *)ltree
$16 = {xpr = {type = T_Var}, varno = 1, varattno = 1, vartype = 23, vartypmod = -1, varcollid = 0, varlevelsup = 0, 
  varnoold = 1, varoattno = 1, location = 27}
(gdb) p *(Const *)rtree
$18 = {xpr = {type = T_Const}, consttype = 705, consttypmod = -1, constcollid = 0, constlen = -2, constvalue = 36531016, 
  constisnull = false, constbyval = false, location = 32}  
###
testdb=# select typname from pg_type where oid in (23,705);
 typname 
---------
 int4
 unknown
(2 rows)
###

确定操作符(二元操作符)


(gdb) n
769   else if (ltree == NULL)
(gdb) 
779     ltypeId = exprType(ltree);
(gdb) 
780     rtypeId = exprType(rtree);
(gdb) 
781     tup = oper(pstate, opname, ltypeId, rtypeId, false, location);
(gdb) p ltypeId
$19 = 23
(gdb) p rtypeId
$20 = 705
(gdb)

获取操作符(对应pg_operator)


(gdb) n
784   opform = (Form_pg_operator) GETSTRUCT(tup);
(gdb) 
787   if (!RegProcedureIsValid(opform->oprcode))
(gdb) p *opform
$21 = {oid = 96, oprname = {data = "=", '\000' }, oprnamespace = 11, oprowner = 10, oprkind = 98 'b', 
  oprcanmerge = true, oprcanhash = true, oprleft = 23, oprright = 23, oprresult = 16, oprcom = 96, oprnegate = 518, 
  oprcode = 65, oprrest = 101, oprjoin = 105}
(gdb)

执行类型转换并构建表达式树.
实际类型为23&705,操作符声明操作类型为23&23


(gdb) n
798   if (rtree == NULL)
(gdb) 
806   else if (ltree == NULL)
(gdb) 
817     args = list_make2(ltree, rtree);
(gdb) 
818     actual_arg_types[0] = ltypeId;
(gdb) 
819     actual_arg_types[1] = rtypeId;
(gdb) 
820     declared_arg_types[0] = opform->oprleft;
(gdb) 
821     declared_arg_types[1] = opform->oprright;
(gdb) 
822     nargs = 2;
(gdb) p opform->oprleft
$22 = 23
(gdb) p opform->oprright
$23 = 23
(gdb) n
830   rettype = enforce_generic_type_consistency(actual_arg_types,
(gdb)

进入函数make_fn_arguments


(gdb) 
837   make_fn_arguments(pstate, args, actual_arg_types, declared_arg_types);
(gdb) n
Breakpoint 1, make_fn_arguments (pstate=0x22d6e30, fargs=0x22d76f8, actual_arg_types=0x7ffda1b2af80, 
    declared_arg_types=0x7ffda1b2af70) at parse_func.c:1835
1835    int     i = 0;
(gdb)

调用coerce_type函数执行转换,unknown -> int4


(gdb) n
1837    foreach(current_fargs, fargs)
(gdb) 
1840      if (actual_arg_types[i] != declared_arg_types[i])
(gdb) 
1873      i++;
(gdb) 
1837    foreach(current_fargs, fargs)
(gdb) 
1840      if (actual_arg_types[i] != declared_arg_types[i])
(gdb) p actual_arg_types[i]
$24 = 705
(gdb) p declared_arg_types[i]
$25 = 23
(gdb) n
1842        Node     *node = (Node *) lfirst(current_fargs);
(gdb) n
1848        if (IsA(node, NamedArgExpr))
(gdb) 
1866                     declared_arg_types[i], -1,
(gdb) 
1863          node = coerce_type(pstate,
(gdb) 
1865                     actual_arg_types[i],
(gdb) 
1863          node = coerce_type(pstate,
(gdb) 
1870          lfirst(current_fargs) = node;
(gdb) p *node
$26 = {type = T_Const}
(gdb) p *(Const *)node
$27 = {xpr = {type = T_Const}, consttype = 23, consttypmod = -1, constcollid = 0, constlen = 4, constvalue = 1, 
  constisnull = false, constbyval = true, location = 32}
(gdb)

完成调用


(gdb) n
1873      i++;
(gdb) 
1837    foreach(current_fargs, fargs)
(gdb) 
1875  }
(gdb) 
make_op (pstate=0x22d6e30, opname=0x22d6c48, ltree=0x22d7620, rtree=0x22d7678, last_srf=0x0, location=30)
    at parse_oper.c:840
840   result = makeNode(OpExpr);

回到make_op,构造结果result结构体


(gdb) 
841   result->opno = oprid(tup);
(gdb) 
842   result->opfuncid = opform->oprcode;
(gdb) 
843   result->opresulttype = rettype;
(gdb) 
844   result->opretset = get_func_retset(opform->oprcode);
(gdb) 
846   result->args = args;
(gdb) 
847   result->location = location;
(gdb) 
850   if (result->opretset)
(gdb) 
857   ReleaseSysCache(tup);
(gdb) 
859   return (Expr *) result;
(gdb) p *(Expr *) result
$28 = {type = T_OpExpr}
(gdb) p *(OpExpr *) result
$29 = {xpr = {type = T_OpExpr}, opno = 96, opfuncid = 65, opresulttype = 16, opretset = false, opcollid = 0, 
  inputcollid = 0, args = 0x22d76f8, location = 30}
(gdb)

DONE!

实现函数coerce_type下节再行介绍

四、参考资料

PostgreSQL Type Conversion
PostgreSQL数据类型转换规则#1
PostgreSQL数据类型转换规则#2
PostgreSQL数据类型转换规则#3


名称栏目:PostgreSQL源码解读(202)-查询#115(类型转换)
分享路径:http://cdxtjz.cn/article/ggpiio.html