github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/lightning/mydump/parser.rl (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 // Please edit `parser.rl` if you want to modify this file. To generate 15 // `parser_generated.go`, please execute 16 // 17 // ```sh 18 // make data_parsers 19 // ``` 20 21 package mydump 22 23 import ( 24 "io" 25 26 "github.com/pingcap/errors" 27 ) 28 29 %%{ 30 #` 31 32 # This is a ragel parser to quickly scan through a data source file consisting 33 # of INSERT statements only. You may find detailed syntax explanation on its 34 # website <https://www.colm.net/open-source/ragel/>. 35 36 machine chunk_parser; 37 38 # We treat all unimportant patterns as "comments". This include: 39 # - Real SQL comments `/* ... */` and `-- ...` 40 # - Whitespace 41 # - Separators `,` and `;` 42 # - The keyword `INTO` (suffix `i` means case-insensitive). 43 # - The parts of the function `CONVERT(` and `USING UTF8MB4)` 44 # (to strip the unnecessary detail from mydumper JSON output) 45 block_comment = '/*' any* :>> '*/'; 46 line_comment = /--[^\r\n]*/; 47 comment = 48 block_comment | 49 line_comment | 50 space | 51 [,;] | 52 'convert('i | 53 'using utf8mb4)'i; 54 55 # The patterns parse quoted strings. 56 bs = '\\' when { parser.escFlavor != backslashEscapeFlavorNone }; 57 58 single_quoted = "'" (^"'" | bs any | "''")** "'"; 59 double_quoted = '"' (^'"' | bs any | '""')** '"'; 60 back_quoted = '`' (^'`' | '``')* '`'; 61 unquoted = ^([,;()'"`/*] | space)+; 62 63 integer = '-'? [0-9]+; 64 hex_string = '0x' [0-9a-fA-F]+ | "x'"i [0-9a-fA-F]* "'"; 65 bin_string = '0b' [01]+ | "b'"i [01]* "'"; 66 67 main := |* 68 comment; 69 70 '(' => { 71 consumedToken = tokRowBegin 72 fbreak; 73 }; 74 75 ')' => { 76 consumedToken = tokRowEnd 77 fbreak; 78 }; 79 80 'values'i => { 81 consumedToken = tokValues 82 fbreak; 83 }; 84 85 'null'i => { 86 consumedToken = tokNull 87 fbreak; 88 }; 89 90 'true'i => { 91 consumedToken = tokTrue 92 fbreak; 93 }; 94 95 'false'i => { 96 consumedToken = tokFalse 97 fbreak; 98 }; 99 100 integer => { 101 consumedToken = tokInteger 102 fbreak; 103 }; 104 105 hex_string => { 106 consumedToken = tokHexString 107 fbreak; 108 }; 109 110 bin_string => { 111 consumedToken = tokBinString 112 fbreak; 113 }; 114 115 single_quoted => { 116 consumedToken = tokSingleQuoted 117 fbreak; 118 }; 119 120 double_quoted => { 121 consumedToken = tokDoubleQuoted 122 fbreak; 123 }; 124 125 back_quoted => { 126 consumedToken = tokBackQuoted 127 fbreak; 128 }; 129 130 unquoted => { 131 consumedToken = tokUnquoted 132 fbreak; 133 }; 134 *|; 135 136 #` 137 }%% 138 139 %% write data; 140 141 func (parser *ChunkParser) lex() (token, []byte, error) { 142 var cs, ts, te, act, p int 143 %% write init; 144 145 for { 146 data := parser.buf 147 consumedToken := tokNil 148 pe := len(data) 149 eof := -1 150 if parser.isLastChunk { 151 eof = pe 152 } 153 154 %% write exec; 155 156 if cs == %%{ write error; }%% { 157 parser.logSyntaxError() 158 return tokNil, nil, errors.New("syntax error") 159 } 160 161 if consumedToken != tokNil { 162 result := data[ts:te] 163 parser.buf = data[te:] 164 parser.pos += int64(te) 165 return consumedToken, result, nil 166 } 167 168 if parser.isLastChunk { 169 if te == eof { 170 return tokNil, nil, io.EOF 171 } else { 172 return tokNil, nil, errors.New("syntax error: unexpected EOF") 173 } 174 } 175 176 parser.buf = parser.buf[ts:] 177 parser.pos += int64(ts) 178 p -= ts 179 te -= ts 180 ts = 0 181 if err := parser.readBlock(); err != nil { 182 return tokNil, nil, errors.Trace(err) 183 } 184 } 185 186 return tokNil, nil, nil 187 }