本篇内容介绍了“PostgreSQL中的Rules有什么作用”的有关知识,在实际案例的操作过程中,不少人都会遇到这样的困境,接下来就让小编带领大家学习一下如何处理这些情况吧!希望大家仔细阅读,能够学有所成!
Flex输入文件由四部分组成:
%{
Declarations
%}
Definitions
%%
Rules
%%
User subroutines
在Flex的模式文件中,%%和%%之间的内容被称为规则(rules),每一行表示一条规则,每条规则由匹配模式(pattern)和 动作(action)组成。其中模式在前面,用正则表达式表示,动作在后面,即C代码。每当一个模式被匹配到时,后面的C代码将被执行。
Flex会将规则翻译成名为yylex的函数,该函数扫描输入文件(默认标准输入),当扫描到一个完整的、最长的、可以和某条规则的正则表达式所匹配的输入时,函数会执行此规则后面的C代码。如果代码中没有return语句,则执行完毕后,yylex会继续运行,开始下一轮的扫描和匹配。注意:当有多条规则的模式被匹配到时, yylex会优先选择匹配长度最长的那条规则,如果有匹配长度相等的规则,则选择排在最前面的那条规则。
PG中的规则定义如下:
%%
{whitespace} {
//--------- 空白字符
//忽略,不作任何处理
/* ignore */
}
{xcstart} {
//--------- C风格注释
/* Set location in case of syntax error in comment */
//设置位置,以防注释中的语法错误
SET_YYLLOC();
//深度
yyextra->xcdepth = 0;
//进入xc状态
BEGIN(xc);
/* Put back any characters past slash-star; see above */
//把斜杠星后的字符放回去
// 注意:"/*"是2个字符,从位置2(偏移从0起算)开始把之后的字符放回去
yyless(2);
}
<xc>{xcstart} {
//遇到下一层的注释,深度+1
(yyextra->xcdepth)++;
/* Put back any characters past slash-star; see above */
//类似的,把之后的字符放回去
yyless(2);
}
<xc>{xcstop} {
//层次≤0,回到INITIAL状态,否则层次减1
if (yyextra->xcdepth <= 0)
BEGIN(INITIAL);
else
(yyextra->xcdepth)--;
}
<xc>{xcinside} {
//注释里面的内容,忽略
/* ignore */
}
<xc>{op_chars} {
//注释里面的内容,忽略
/* ignore */
}
<xc>\*+ {
//注释里面的内容,忽略
/* ignore */
}
<xc><<EOF>> { yyerror("unterminated /* comment"); }//遇到结束符,出错
{xbstart} {
/* Binary bit type.
* At some point we should simply pass the string
* forward to the parser and label it there.
* In the meantime, place a leading "b" on the string
* to mark it for the input routine as a binary string.
*/
//--------- 二进制位串
//在某些点上,我们应该简单的把字符串向前传递给解析器并标记它
//在此期间,设置一个打头的字符"b"以标记该输入为二进制串
SET_YYLLOC();
BEGIN(xb);
startlit();
addlitchar('b', yyscanner);
}
<xb>{quotestop} |
<xb>{quotefail} {
yyless(1);
BEGIN(INITIAL);
yylval->str = litbufdup(yyscanner);
return BCONST;
}
<xh>{xhinside} |
<xb>{xbinside} {
addlit(yytext, yyleng, yyscanner);
}
<xh>{quotecontinue} |
<xb>{quotecontinue} {
/* ignore */
}
<xb><<EOF>> { yyerror("unterminated bit string literal"); }
{xhstart} {
//------------- 十六进制串
/* Hexadecimal bit type.
* At some point we should simply pass the string
* forward to the parser and label it there.
* In the meantime, place a leading "x" on the string
* to mark it for the input routine as a hex string.
*/
SET_YYLLOC();
BEGIN(xh);
startlit();
addlitchar('x', yyscanner);
}
<xh>{quotestop} |
<xh>{quotefail} {
yyless(1);
BEGIN(INITIAL);
yylval->str = litbufdup(yyscanner);
return XCONST;
}
<xh><<EOF>> { yyerror("unterminated hexadecimal string literal"); }
{xnstart} {
//------------- 国家字符
/* National character.
* We will pass this along as a normal character string,
* but preceded with an internally-generated "NCHAR".
*/
const ScanKeyword *keyword;
SET_YYLLOC();
yyless(1); /* eat only 'n' this time */
keyword = ScanKeywordLookup("nchar",
yyextra->keywords,
yyextra->num_keywords);
if (keyword != NULL)
{
yylval->keyword = keyword->name;
return keyword->value;
}
else
{
/* If NCHAR isn't a keyword, just return "n" */
yylval->str = pstrdup("n");
return IDENT;
}
}
{xqstart} {
yyextra->warn_on_first_escape = true;
yyextra->saw_non_ascii = false;
SET_YYLLOC();
if (yyextra->standard_conforming_strings)
BEGIN(xq);
else
BEGIN(xe);
startlit();
}
{xestart} {
yyextra->warn_on_first_escape = false;
yyextra->saw_non_ascii = false;
SET_YYLLOC();
BEGIN(xe);
startlit();
}
{xusstart} {
SET_YYLLOC();
if (!yyextra->standard_conforming_strings)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("unsafe use of string constant with Unicode escapes"),
errdetail("String constants with Unicode escapes cannot be used when standard_conforming_strings is off."),
lexer_errposition()));
BEGIN(xus);
startlit();
}
<xq,xe>{quotestop} |
<xq,xe>{quotefail} {
yyless(1);
BEGIN(INITIAL);
/*
* check that the data remains valid if it might have been
* made invalid by unescaping any chars.
*/
if (yyextra->saw_non_ascii)
pg_verifymbstr(yyextra->literalbuf,
yyextra->literallen,
false);
yylval->str = litbufdup(yyscanner);
return SCONST;
}
<xus>{quotestop} |
<xus>{quotefail} {
/* throw back all but the quote */
yyless(1);
/* xusend state looks for possible UESCAPE */
BEGIN(xusend);
}
<xusend>{whitespace} {
/* stay in xusend state over whitespace */
}
<xusend><<EOF>> |
<xusend>{other} |
<xusend>{xustop1} {
/* no UESCAPE after the quote, throw back everything */
yyless(0);
BEGIN(INITIAL);
yylval->str = litbuf_udeescape('\\', yyscanner);
return SCONST;
}
<xusend>{xustop2} {
/* found UESCAPE after the end quote */
BEGIN(INITIAL);
if (!check_uescapechar(yytext[yyleng - 2]))
{
SET_YYLLOC();
ADVANCE_YYLLOC(yyleng - 2);
yyerror("invalid Unicode escape character");
}
yylval->str = litbuf_udeescape(yytext[yyleng - 2],
yyscanner);
return SCONST;
}
<xq,xe,xus>{xqdouble} {
addlitchar('\'', yyscanner);
}
<xq,xus>{xqinside} {
addlit(yytext, yyleng, yyscanner);
}
<xe>{xeinside} {
addlit(yytext, yyleng, yyscanner);
}
<xe>{xeunicode} {
pg_wchar c = strtoul(yytext + 2, NULL, 16);
check_escape_warning(yyscanner);
if (is_utf16_surrogate_first(c))
{
yyextra->utf16_first_part = c;
BEGIN(xeu);
}
else if (is_utf16_surrogate_second(c))
yyerror("invalid Unicode surrogate pair");
else
addunicode(c, yyscanner);
}
<xeu>{xeunicode} {
pg_wchar c = strtoul(yytext + 2, NULL, 16);
if (!is_utf16_surrogate_second(c))
yyerror("invalid Unicode surrogate pair");
c = surrogate_pair_to_codepoint(yyextra->utf16_first_part, c);
addunicode(c, yyscanner);
BEGIN(xe);
}
<xeu>. { yyerror("invalid Unicode surrogate pair"); }
<xeu>\n { yyerror("invalid Unicode surrogate pair"); }
<xeu><<EOF>> { yyerror("invalid Unicode surrogate pair"); }
<xe,xeu>{xeunicodefail} {
ereport(ERROR,
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
errmsg("invalid Unicode escape"),
errhint("Unicode escapes must be \\uXXXX or \\UXXXXXXXX."),
lexer_errposition()));
}
<xe>{xeescape} {
if (yytext[1] == '\'')
{
if (yyextra->backslash_quote == BACKSLASH_QUOTE_OFF ||
(yyextra->backslash_quote == BACKSLASH_QUOTE_SAFE_ENCODING &&
PG_ENCODING_IS_CLIENT_ONLY(pg_get_client_encoding())))
ereport(ERROR,
(errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
errmsg("unsafe use of \\' in a string literal"),
errhint("Use '' to write quotes in strings. \\' is insecure in client-only encodings."),
lexer_errposition()));
}
check_string_escape_warning(yytext[1], yyscanner);
addlitchar(unescape_single_char(yytext[1], yyscanner),
yyscanner);
}
<xe>{xeoctesc} {
unsigned char c = strtoul(yytext + 1, NULL, 8);
check_escape_warning(yyscanner);
addlitchar(c, yyscanner);
if (c == '\0' || IS_HIGHBIT_SET(c))
yyextra->saw_non_ascii = true;
}
<xe>{xehexesc} {
unsigned char c = strtoul(yytext + 2, NULL, 16);
check_escape_warning(yyscanner);
addlitchar(c, yyscanner);
if (c == '\0' || IS_HIGHBIT_SET(c))
yyextra->saw_non_ascii = true;
}
<xq,xe,xus>{quotecontinue} {
/* ignore */
}
<xe>. {
/* This is only needed for \ just before EOF */
addlitchar(yytext[0], yyscanner);
}
<xq,xe,xus><<EOF>> { yyerror("unterminated quoted string"); }
{dolqdelim} {
SET_YYLLOC();
yyextra->dolqstart = pstrdup(yytext);
BEGIN(xdolq);
startlit();
}
{dolqfailed} {
SET_YYLLOC();
/* throw back all but the initial "$" */
yyless(1);
/* and treat it as {other} */
return yytext[0];
}
<xdolq>{dolqdelim} {
if (strcmp(yytext, yyextra->dolqstart) == 0)
{
pfree(yyextra->dolqstart);
yyextra->dolqstart = NULL;
BEGIN(INITIAL);
yylval->str = litbufdup(yyscanner);
return SCONST;
}
else
{
/*
* When we fail to match $...$ to dolqstart, transfer
* the $... part to the output, but put back the final
* $ for rescanning. Consider $delim$...$junk$delim$
*/
addlit(yytext, yyleng - 1, yyscanner);
yyless(yyleng - 1);
}
}
<xdolq>{dolqinside} {
addlit(yytext, yyleng, yyscanner);
}
<xdolq>{dolqfailed} {
addlit(yytext, yyleng, yyscanner);
}
<xdolq>. {
/* This is only needed for $ inside the quoted text */
addlitchar(yytext[0], yyscanner);
}
<xdolq><<EOF>> { yyerror("unterminated dollar-quoted string"); }
{xdstart} {
SET_YYLLOC();
BEGIN(xd);
startlit();
}
{xuistart} {
SET_YYLLOC();
BEGIN(xui);
startlit();
}
<xd>{xdstop} {
char *ident;
BEGIN(INITIAL);
if (yyextra->literallen == 0)
yyerror("zero-length delimited identifier");
ident = litbufdup(yyscanner);
if (yyextra->literallen >= NAMEDATALEN)
truncate_identifier(ident, yyextra->literallen, true);
yylval->str = ident;
return IDENT;
}
<xui>{dquote} {
yyless(1);
/* xuiend state looks for possible UESCAPE */
BEGIN(xuiend);
}
<xuiend>{whitespace} {
/* stay in xuiend state over whitespace */
}
<xuiend><<EOF>> |
<xuiend>{other} |
<xuiend>{xustop1} {
/* no UESCAPE after the quote, throw back everything */
char *ident;
int identlen;
yyless(0);
BEGIN(INITIAL);
if (yyextra->literallen == 0)
yyerror("zero-length delimited identifier");
ident = litbuf_udeescape('\\', yyscanner);
identlen = strlen(ident);
if (identlen >= NAMEDATALEN)
truncate_identifier(ident, identlen, true);
yylval->str = ident;
return IDENT;
}
<xuiend>{xustop2} {
/* found UESCAPE after the end quote */
char *ident;
int identlen;
BEGIN(INITIAL);
if (yyextra->literallen == 0)
yyerror("zero-length delimited identifier");
if (!check_uescapechar(yytext[yyleng - 2]))
{
SET_YYLLOC();
ADVANCE_YYLLOC(yyleng - 2);
yyerror("invalid Unicode escape character");
}
ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner);
identlen = strlen(ident);
if (identlen >= NAMEDATALEN)
truncate_identifier(ident, identlen, true);
yylval->str = ident;
return IDENT;
}
<xd,xui>{xddouble} {
addlitchar('"', yyscanner);
}
<xd,xui>{xdinside} {
addlit(yytext, yyleng, yyscanner);
}
<xd,xui><<EOF>> { yyerror("unterminated quoted identifier"); }
{xufailed} {
char *ident;
SET_YYLLOC();
/* throw back all but the initial u/U */
yyless(1);
/* and treat it as {identifier} */
ident = downcase_truncate_identifier(yytext, yyleng, true);
yylval->str = ident;
return IDENT;
}
{typecast} {
SET_YYLLOC();
return TYPECAST;
}
{dot_dot} {
SET_YYLLOC();
return DOT_DOT;
}
{colon_equals} {
SET_YYLLOC();
return COLON_EQUALS;
}
{equals_greater} {
SET_YYLLOC();
return EQUALS_GREATER;
}
{less_equals} {
SET_YYLLOC();
return LESS_EQUALS;
}
{greater_equals} {
SET_YYLLOC();
return GREATER_EQUALS;
}
{less_greater} {
/* We accept both "<>" and "!=" as meaning NOT_EQUALS */
SET_YYLLOC();
return NOT_EQUALS;
}
{not_equals} {
/* We accept both "<>" and "!=" as meaning NOT_EQUALS */
SET_YYLLOC();
return NOT_EQUALS;
}
{self} {
SET_YYLLOC();
return yytext[0];
}
{operator} {
/*
* Check for embedded slash-star or dash-dash; those
* are comment starts, so operator must stop there.
* Note that slash-star or dash-dash at the first
* character will match a prior rule, not this one.
*/
int nchars = yyleng;
char *slashstar = strstr(yytext, "/*");
char *dashdash = strstr(yytext, "--");
if (slashstar && dashdash)
{
/* if both appear, take the first one */
if (slashstar > dashdash)
slashstar = dashdash;
}
else if (!slashstar)
slashstar = dashdash;
if (slashstar)
nchars = slashstar - yytext;
/*
* For SQL compatibility, '+' and '-' cannot be the
* last char of a multi-char operator unless the operator
* contains chars that are not in SQL operators.
* The idea is to lex '=-' as two operators, but not
* to forbid operator names like '?-' that could not be
* sequences of SQL operators.
*/
if (nchars > 1 &&
(yytext[nchars - 1] == '+' ||
yytext[nchars - 1] == '-'))
{
int ic;
for (ic = nchars - 2; ic >= 0; ic--)
{
char c = yytext[ic];
if (c == '~' || c == '!' || c == '@' ||
c == '#' || c == '^' || c == '&' ||
c == '|' || c == '`' || c == '?' ||
c == '%')
break;
}
if (ic < 0)
{
/*
* didn't find a qualifying character, so remove
* all trailing [+-]
*/
do {
nchars--;
} while (nchars > 1 &&
(yytext[nchars - 1] == '+' ||
yytext[nchars - 1] == '-'));
}
}
SET_YYLLOC();
if (nchars < yyleng)
{
/* Strip the unwanted chars from the token */
yyless(nchars);
/*
* If what we have left is only one char, and it's
* one of the characters matching "self", then
* return it as a character token the same way
* that the "self" rule would have.
*/
if (nchars == 1 &&
strchr(",()[].;:+-*/%^<>=", yytext[0]))
return yytext[0];
/*
* Likewise, if what we have left is two chars, and
* those match the tokens ">=", "<=", "=>", "<>" or
* "!=", then we must return the appropriate token
* rather than the generic Op.
*/
if (nchars == 2)
{
if (yytext[0] == '=' && yytext[1] == '>')
return EQUALS_GREATER;
if (yytext[0] == '>' && yytext[1] == '=')
return GREATER_EQUALS;
if (yytext[0] == '<' && yytext[1] == '=')
return LESS_EQUALS;
if (yytext[0] == '<' && yytext[1] == '>')
return NOT_EQUALS;
if (yytext[0] == '!' && yytext[1] == '=')
return NOT_EQUALS;
}
}
/*
* Complain if operator is too long. Unlike the case
* for identifiers, we make this an error not a notice-
* and-truncate, because the odds are we are looking at
* a syntactic mistake anyway.
*/
if (nchars >= NAMEDATALEN)
yyerror("operator too long");
yylval->str = pstrdup(yytext);
return Op;
}
{param} {
SET_YYLLOC();
yylval->ival = atol(yytext + 1);
return PARAM;
}
{integer} {
SET_YYLLOC();
return process_integer_literal(yytext, yylval);
}
{decimal} {
SET_YYLLOC();
yylval->str = pstrdup(yytext);
return FCONST;
}
{decimalfail} {
/* throw back the .., and treat as integer */
yyless(yyleng - 2);
SET_YYLLOC();
return process_integer_literal(yytext, yylval);
}
{real} {
SET_YYLLOC();
yylval->str = pstrdup(yytext);
return FCONST;
}
{realfail1} {
/*
* throw back the [Ee], and treat as {decimal}. Note
* that it is possible the input is actually {integer},
* but since this case will almost certainly lead to a
* syntax error anyway, we don't bother to distinguish.
*/
yyless(yyleng - 1);
SET_YYLLOC();
yylval->str = pstrdup(yytext);
return FCONST;
}
{realfail2} {
/* throw back the [Ee][+-], and proceed as above */
yyless(yyleng - 2);
SET_YYLLOC();
yylval->str = pstrdup(yytext);
return FCONST;
}
{identifier} {
//---------- 标识符
const ScanKeyword *keyword;
char *ident;
SET_YYLLOC();
/* Is it a keyword? */
//是否关键字?
keyword = ScanKeywordLookup(yytext,
yyextra->keywords,
yyextra->num_keywords);
if (keyword != NULL)
{
//是,则返回关键字值
yylval->keyword = keyword->name;
return keyword->value;
}
/*
* No. Convert the identifier to lower case, and truncate
* if necessary.
*/
//如果不是关键字,则设置为小写字母,如需要则截断
ident = downcase_truncate_identifier(yytext, yyleng, true);
yylval->str = ident;
return IDENT;
}
{other} {
SET_YYLLOC();
return yytext[0];
}
<<EOF>> {
SET_YYLLOC();
yyterminate();
}
%%
“PostgreSQL中的Rules有什么作用”的内容就介绍到这里了,感谢大家的阅读。如果想了解更多行业相关的知识可以关注亿速云网站,小编将为大家输出更多高质量的实用文章!
亿速云「云服务器」,即开即用、新一代英特尔至强铂金CPU、三副本存储NVMe SSD云盘,价格低至29元/月。点击查看>>
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。
原文链接:http://blog.itpub.net/6906/viewspace-2641703/