本篇内容介绍了“PostgreSQL中的User subroutines有什么作用”的有关知识,在实际案例的操作过程中,不少人都会遇到这样的困境,接下来就让小编带领大家学习一下如何处理这些情况吧!希望大家仔细阅读,能够学有所成!
Flex输入文件由四部分组成:
%{ Declarations %} Definitions %% Rules %% User subroutines
在规则之后是自定义例程,在scan.l中定义的例程主要是对输入的SQL语句进行解析以及执行初始化和事后清理工作等.
/* LCOV_EXCL_STOP */ /* * Arrange access to yyextra for subroutines of the main yylex() function. * We expect each subroutine to have a yyscanner parameter. Rather than * use the yyget_xxx functions, which might or might not get inlined by the * compiler, we cheat just a bit and cast yyscanner to the right type. * 为主yylex()函数提供yyextra的访问. * 我们期望每一个子例程都有参数:yyscanner. * 相对于使用yyget_xxx函数(可能或不可能被编译器内联),我们强制yyscanner为正确的类型. */ #undef yyextra #define yyextra (((struct yyguts_t *) yyscanner)->yyextra_r) /* Likewise for a couple of other things we need. */ //定义其他需要的东西:yylloc/yyleng #undef yylloc #define yylloc (((struct yyguts_t *) yyscanner)->yylloc_r) #undef yyleng #define yyleng (((struct yyguts_t *) yyscanner)->yyleng_r) /* * scanner_errposition * Report a lexer or grammar error cursor position, if possible. * scanner_errposition : 如可以,报告词法或语法错误位置 * * This is expected to be used within an ereport() call. The return value * is a dummy (always 0, in fact). * 该例程在ereport()调用中使用.返回值是伪列(实际上通常为0). * * Note that this can only be used for messages emitted during raw parsing * (essentially, scan.l and gram.y), since it requires the yyscanner struct * to still be available. * 注意,这只能用于在原始解析期间产生的消息(scan.l & gram.y), * 因为需要yyscanner结构体仍然可用才行. */ int scanner_errposition(int location, core_yyscan_t yyscanner) { int pos; if (location < 0) return 0; /* no-op if location is unknown */ /* Convert byte offset to character number */ pos = pg_mbstrlen_with_len(yyextra->scanbuf, location) + 1; /* And pass it to the ereport mechanism */ return errposition(pos); } /* * scanner_yyerror * Report a lexer or grammar error. * 报告词法或语法错误. * * The message's cursor position is whatever YYLLOC was last set to, * ie, the start of the current token if called within yylex(), or the * most recently lexed token if called from the grammar. * This is OK for syntax error messages from the Bison parser, because Bison * parsers report error as soon as the first unparsable token is reached. * Beware of using yyerror for other purposes, as the cursor position might * be misleading! * 该消息游标的位置在于YYLLOC最后设置的地方,比如如果在yylex()中则是当前的token开始位置, * 或者如果是grammer调用则为最近一次的词法token. * 在Bison解析器中抛出语法错误是没有问题的,因为Bison及诶吸气在遇到第一个无法解析的token时就会报错. * 注意:如果处于其他目的使用yyerror,这时候游标的位置可能会出现误导. */ void scanner_yyerror(const char *message, core_yyscan_t yyscanner) { const char *loc = yyextra->scanbuf + *yylloc; if (*loc == YY_END_OF_BUFFER_CHAR) { ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), /* translator: %s is typically the translation of "syntax error" */ errmsg("%s at end of input", _(message)), lexer_errposition())); } else { ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), /* translator: first %s is typically the translation of "syntax error" */ errmsg("%s at or near \"%s\"", _(message), loc), lexer_errposition())); } } /* * Called before any actual parsing is done * 初始化扫描器,在实际解析完成前调用 */ core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, const ScanKeyword *keywords, int num_keywords) { Size slen = strlen(str); yyscan_t scanner; if (yylex_init(&scanner) != 0) elog(ERROR, "yylex_init() failed: %m"); core_yyset_extra(yyext, scanner); yyext->keywords = keywords; yyext->num_keywords = num_keywords; yyext->backslash_quote = backslash_quote; yyext->escape_string_warning = escape_string_warning; yyext->standard_conforming_strings = standard_conforming_strings; /* * Make a scan buffer with special termination needed by flex. */ yyext->scanbuf = (char *) palloc(slen + 2); yyext->scanbuflen = slen; memcpy(yyext->scanbuf, str, slen); yyext->scanbuf[slen] = yyext->scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR; yy_scan_buffer(yyext->scanbuf, slen + 2, scanner); /* initialize literal buffer to a reasonable but expansible size */ yyext->literalalloc = 1024; yyext->literalbuf = (char *) palloc(yyext->literalalloc); yyext->literallen = 0; return scanner; } /* * Called after parsing is done to clean up after scanner_init() * 在解析完成后调用,用于在scanner_init()之后进行清理. */ void scanner_finish(core_yyscan_t yyscanner) { /* * We don't bother to call yylex_destroy(), because all it would do is * pfree a small amount of control storage. It's cheaper to leak the * storage until the parsing context is destroyed. The amount of space * involved is usually negligible compared to the output parse tree * anyway. * 不需要调用yylex_destroy(),因为所有需要做的事情只是释放一小块控制内存而已. * 在解析上下文被销毁前,保留这部分内存成本会更低. * 无论如何,与输出解析树相比,所涉及到的空间大小通常可以忽略不计. * * We do bother to pfree the scanbuf and literal buffer, but only if they * represent a nontrivial amount of space. The 8K cutoff is arbitrary. * 需要使用pfree释放扫描缓存和字面值缓存,但前提是它们代表了一个不小的空间才需要. * 8K这个数值其实是很随意的. */ if (yyextra->scanbuflen >= 8192) pfree(yyextra->scanbuf); if (yyextra->literalalloc >= 8192) pfree(yyextra->literalbuf); } static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner) { /* enlarge buffer if needed */ //增大缓存 if ((yyextra->literallen + yleng) >= yyextra->literalalloc) { do { yyextra->literalalloc *= 2; } while ((yyextra->literallen + yleng) >= yyextra->literalalloc); yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf, yyextra->literalalloc); } /* append new data */ //追加新数据 memcpy(yyextra->literalbuf + yyextra->literallen, ytext, yleng); yyextra->literallen += yleng; } static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner) { /* enlarge buffer if needed */ if ((yyextra->literallen + 1) >= yyextra->literalalloc) { yyextra->literalalloc *= 2; yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf, yyextra->literalalloc); } /* append new data */ yyextra->literalbuf[yyextra->literallen] = ychar; yyextra->literallen += 1; } /* * Create a palloc'd copy of literalbuf, adding a trailing null. * 创建字面值缓存的拷贝,在末尾增加null. */ static char * litbufdup(core_yyscan_t yyscanner) { int llen = yyextra->literallen; char *new; new = palloc(llen + 1); memcpy(new, yyextra->literalbuf, llen); new[llen] = '\0'; return new; } static int process_integer_literal(const char *token, YYSTYPE *lval) { //处理整型字面值 int val; char *endptr; errno = 0; val = strtoint(token, &endptr, 10); if (*endptr != '\0' || errno == ERANGE) { /* integer too large, treat it as a float */ lval->str = pstrdup(token); return FCONST; } lval->ival = val; return ICONST; } static unsigned int hexval(unsigned char c) { //十六进制 if (c >= '0' && c <= '9') return c - '0'; if (c >= 'a' && c <= 'f') return c - 'a' + 0xA; if (c >= 'A' && c <= 'F') return c - 'A' + 0xA; elog(ERROR, "invalid hexadecimal digit"); return 0; /* not reached */ } static void check_unicode_value(pg_wchar c, char *loc, core_yyscan_t yyscanner) { if (GetDatabaseEncoding() == PG_UTF8) return; if (c > 0x7F) { ADVANCE_YYLLOC(loc - yyextra->literalbuf + 3); /* 3 for U&" */ yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8"); } } static bool is_utf16_surrogate_first(pg_wchar c) { return (c >= 0xD800 && c <= 0xDBFF); } static bool is_utf16_surrogate_second(pg_wchar c) { return (c >= 0xDC00 && c <= 0xDFFF); } static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second) { return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF); } static void addunicode(pg_wchar c, core_yyscan_t yyscanner) { char buf[8]; if (c == 0 || c > 0x10FFFF) yyerror("invalid Unicode escape value"); if (c > 0x7F) { if (GetDatabaseEncoding() != PG_UTF8) yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8"); yyextra->saw_non_ascii = true; } unicode_to_utf8(c, (unsigned char *) buf); addlit(buf, pg_mblen(buf), yyscanner); } /* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */ static bool check_uescapechar(unsigned char escape) { if (isxdigit(escape) || escape == '+' || escape == '\'' || escape == '"' || scanner_isspace(escape)) { return false; } else return true; } /* like litbufdup, but handle unicode escapes */ static char * litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner) { char *new; char *litbuf, *in, *out; pg_wchar pair_first = 0; /* Make literalbuf null-terminated to simplify the scanning loop */ litbuf = yyextra->literalbuf; litbuf[yyextra->literallen] = '\0'; /* * This relies on the subtle assumption that a UTF-8 expansion cannot be * longer than its escaped representation. */ new = palloc(yyextra->literallen + 1); in = litbuf; out = new; while (*in) { if (in[0] == escape) { if (in[1] == escape) { if (pair_first) { ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ yyerror("invalid Unicode surrogate pair"); } *out++ = escape; in += 2; } else if (isxdigit((unsigned char) in[1]) && isxdigit((unsigned char) in[2]) && isxdigit((unsigned char) in[3]) && isxdigit((unsigned char) in[4])) { pg_wchar unicode; unicode = (hexval(in[1]) << 12) + (hexval(in[2]) << 8) + (hexval(in[3]) << 4) + hexval(in[4]); check_unicode_value(unicode, in, yyscanner); if (pair_first) { if (is_utf16_surrogate_second(unicode)) { unicode = surrogate_pair_to_codepoint(pair_first, unicode); pair_first = 0; } else { ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ yyerror("invalid Unicode surrogate pair"); } } else if (is_utf16_surrogate_second(unicode)) yyerror("invalid Unicode surrogate pair"); if (is_utf16_surrogate_first(unicode)) pair_first = unicode; else { unicode_to_utf8(unicode, (unsigned char *) out); out += pg_mblen(out); } in += 5; } else if (in[1] == '+' && isxdigit((unsigned char) in[2]) && isxdigit((unsigned char) in[3]) && isxdigit((unsigned char) in[4]) && isxdigit((unsigned char) in[5]) && isxdigit((unsigned char) in[6]) && isxdigit((unsigned char) in[7])) { pg_wchar unicode; unicode = (hexval(in[2]) << 20) + (hexval(in[3]) << 16) + (hexval(in[4]) << 12) + (hexval(in[5]) << 8) + (hexval(in[6]) << 4) + hexval(in[7]); check_unicode_value(unicode, in, yyscanner); if (pair_first) { if (is_utf16_surrogate_second(unicode)) { unicode = surrogate_pair_to_codepoint(pair_first, unicode); pair_first = 0; } else { ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ yyerror("invalid Unicode surrogate pair"); } } else if (is_utf16_surrogate_second(unicode)) yyerror("invalid Unicode surrogate pair"); if (is_utf16_surrogate_first(unicode)) pair_first = unicode; else { unicode_to_utf8(unicode, (unsigned char *) out); out += pg_mblen(out); } in += 8; } else { ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ yyerror("invalid Unicode escape value"); } } else { if (pair_first) { ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ yyerror("invalid Unicode surrogate pair"); } *out++ = *in++; } } /* unfinished surrogate pair? */ if (pair_first) { ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ yyerror("invalid Unicode surrogate pair"); } *out = '\0'; /* * We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII * codes; but it's probably not worth the trouble, since this isn't likely * to be a performance-critical path. */ pg_verifymbstr(new, out - new, false); return new; } static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner) { switch (c) { case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; default: /* check for backslash followed by non-7-bit-ASCII */ if (c == '\0' || IS_HIGHBIT_SET(c)) yyextra->saw_non_ascii = true; return c; } } static void check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner) { if (ychar == '\'') { if (yyextra->warn_on_first_escape && yyextra->escape_string_warning) ereport(WARNING, (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), errmsg("nonstandard use of \\' in a string literal"), errhint("Use '' to write quotes in strings, or use the escape string syntax (E'...')."), lexer_errposition())); yyextra->warn_on_first_escape = false; /* warn only once per string */ } else if (ychar == '\\') { if (yyextra->warn_on_first_escape && yyextra->escape_string_warning) ereport(WARNING, (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), errmsg("nonstandard use of \\\\ in a string literal"), errhint("Use the escape string syntax for backslashes, e.g., E'\\\\'."), lexer_errposition())); yyextra->warn_on_first_escape = false; /* warn only once per string */ } else check_escape_warning(yyscanner); } static void check_escape_warning(core_yyscan_t yyscanner) { if (yyextra->warn_on_first_escape && yyextra->escape_string_warning) ereport(WARNING, (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), errmsg("nonstandard use of escape in a string literal"), errhint("Use the escape string syntax for escapes, e.g., E'\\r\\n'."), lexer_errposition())); yyextra->warn_on_first_escape = false; /* warn only once per string */ } /* * Interface functions to make flex use palloc() instead of malloc(). * It'd be better to make these static, but flex insists otherwise. */ void * core_yyalloc(yy_size_t bytes, core_yyscan_t yyscanner) { return palloc(bytes); } void * core_yyrealloc(void *ptr, yy_size_t bytes, core_yyscan_t yyscanner) { if (ptr) return repalloc(ptr, bytes); else return palloc(bytes); } void core_yyfree(void *ptr, core_yyscan_t yyscanner) { if (ptr) pfree(ptr); }
“PostgreSQL中的User subroutines有什么作用”的内容就介绍到这里了,感谢大家的阅读。如果想了解更多行业相关的知识可以关注亿速云网站,小编将为大家输出更多高质量的实用文章!
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。