github.com/pingcap/tidb-lightning@v5.0.0-rc.0.20210428090220-84b649866577+incompatible/lightning/mydump/parser.rl (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  // Please edit `parser.rl` if you want to modify this file. To generate
    15  // `parser_generated.go`, please execute
    16  //
    17  // ```sh
    18  // make data_parsers
    19  // ```
    20  
    21  package mydump
    22  
    23  import (
    24  	"io"
    25  
    26  	"github.com/pingcap/errors"
    27  )
    28  
    29  %%{
    30  #`
    31  
    32  # This is a ragel parser to quickly scan through a data source file consisting
    33  # of INSERT statements only. You may find detailed syntax explanation on its
    34  # website <https://www.colm.net/open-source/ragel/>.
    35  
    36  machine chunk_parser;
    37  
    38  # We treat all unimportant patterns as "comments". This include:
    39  #  - Real SQL comments `/* ... */` and `-- ...`
    40  #  - Whitespace
    41  #  - Separators `,` and `;`
    42  #  - The keyword `INTO` (suffix `i` means case-insensitive).
    43  #  - The parts of the function `CONVERT(` and `USING UTF8MB4)`
    44  #    (to strip the unnecessary detail from mydumper JSON output)
    45  block_comment = '/*' any* :>> '*/';
    46  line_comment = /--[^\r\n]*/;
    47  comment =
    48  	block_comment |
    49  	line_comment |
    50  	space |
    51  	[,;] |
    52  	'convert('i |
    53  	'using utf8mb4)'i;
    54  
    55  # The patterns parse quoted strings.
    56  bs = '\\' when { parser.escFlavor != backslashEscapeFlavorNone };
    57  
    58  single_quoted = "'" (^"'" | bs any | "''")** "'";
    59  double_quoted = '"' (^'"' | bs any | '""')** '"';
    60  back_quoted = '`' (^'`' | '``')* '`';
    61  unquoted = ^([,;()'"`/*] | space)+;
    62  
    63  integer = '-'? [0-9]+;
    64  hex_string = '0x' [0-9a-fA-F]+ | "x'"i [0-9a-fA-F]* "'";
    65  bin_string = '0b' [01]+ | "b'"i [01]* "'";
    66  
    67  main := |*
    68  	comment;
    69  
    70  	'(' => {
    71  		consumedToken = tokRowBegin
    72  		fbreak;
    73  	};
    74  
    75  	')' => {
    76  		consumedToken = tokRowEnd
    77  		fbreak;
    78  	};
    79  
    80  	'values'i => {
    81  		consumedToken = tokValues
    82  		fbreak;
    83  	};
    84  
    85  	'null'i => {
    86  		consumedToken = tokNull
    87  		fbreak;
    88  	};
    89  
    90  	'true'i => {
    91  		consumedToken = tokTrue
    92  		fbreak;
    93  	};
    94  
    95  	'false'i => {
    96  		consumedToken = tokFalse
    97  		fbreak;
    98  	};
    99  
   100  	integer => {
   101  		consumedToken = tokInteger
   102  		fbreak;
   103  	};
   104  
   105  	hex_string => {
   106  		consumedToken = tokHexString
   107  		fbreak;
   108  	};
   109  
   110  	bin_string => {
   111  		consumedToken = tokBinString
   112  		fbreak;
   113  	};
   114  
   115  	single_quoted => {
   116  		consumedToken = tokSingleQuoted
   117  		fbreak;
   118  	};
   119  
   120  	double_quoted => {
   121  		consumedToken = tokDoubleQuoted
   122  		fbreak;
   123  	};
   124  
   125  	back_quoted => {
   126  		consumedToken = tokBackQuoted
   127  		fbreak;
   128  	};
   129  
   130  	unquoted => {
   131  		consumedToken = tokUnquoted
   132  		fbreak;
   133  	};
   134  *|;
   135  
   136  #`
   137  }%%
   138  
   139  %% write data;
   140  
   141  func (parser *ChunkParser) lex() (token, []byte, error) {
   142  	var cs, ts, te, act, p int
   143  	%% write init;
   144  
   145  	for {
   146  		data := parser.buf
   147  		consumedToken := tokNil
   148  		pe := len(data)
   149  		eof := -1
   150  		if parser.isLastChunk {
   151  			eof = pe
   152  		}
   153  
   154  		%% write exec;
   155  
   156  		if cs == %%{ write error; }%% {
   157  			parser.logSyntaxError()
   158  			return tokNil, nil, errors.New("syntax error")
   159  		}
   160  
   161  		if consumedToken != tokNil {
   162  			result := data[ts:te]
   163  			parser.buf = data[te:]
   164  			parser.pos += int64(te)
   165  			return consumedToken, result, nil
   166  		}
   167  
   168  		if parser.isLastChunk {
   169  			if te == eof {
   170  				return tokNil, nil, io.EOF
   171  			} else {
   172  				return tokNil, nil, errors.New("syntax error: unexpected EOF")
   173  			}
   174  		}
   175  
   176  		parser.buf = parser.buf[ts:]
   177  		parser.pos += int64(ts)
   178  		p -= ts
   179  		te -= ts
   180  		ts = 0
   181  		if err := parser.readBlock(); err != nil {
   182  			return tokNil, nil, errors.Trace(err)
   183  		}
   184  	}
   185  
   186  	return tokNil, nil, nil
   187  }