github.com/hashicorp/hcl/v2@v2.20.0/hclsyntax/scan_string_lit.rl (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package hclsyntax
     5  
     6  // This file is generated from scan_string_lit.rl. DO NOT EDIT.
     7  %%{
     8    # (except you are actually in scan_string_lit.rl here, so edit away!)
     9  
    10    machine hclstrtok;
    11    write data;
    12  }%%
    13  
    14  func scanStringLit(data []byte, quoted bool) [][]byte {
    15      var ret [][]byte
    16  
    17      %%{
    18          include UnicodeDerived "unicode_derived.rl";
    19  
    20          UTF8Cont = 0x80 .. 0xBF;
    21          AnyUTF8 = (
    22              0x00..0x7F |
    23              0xC0..0xDF . UTF8Cont |
    24              0xE0..0xEF . UTF8Cont . UTF8Cont |
    25              0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont
    26          );
    27          BadUTF8 = any - AnyUTF8;
    28  
    29          Hex = ('0'..'9' | 'a'..'f' | 'A'..'F');
    30  
    31          # Our goal with this patterns is to capture user intent as best as
    32          # possible, even if the input is invalid. The caller will then verify
    33          # whether each token is valid and generate suitable error messages
    34          # if not.
    35          UnicodeEscapeShort = "\\u" . Hex{0,4};
    36          UnicodeEscapeLong = "\\U" . Hex{0,8};
    37          UnicodeEscape = (UnicodeEscapeShort | UnicodeEscapeLong);
    38          SimpleEscape = "\\" . (AnyUTF8 - ('U'|'u'))?;
    39          TemplateEscape = ("$" . ("$" . ("{"?))?) | ("%" . ("%" . ("{"?))?);
    40          Newline = ("\r\n" | "\r" | "\n");
    41  
    42          action Begin {
    43              // If te is behind p then we've skipped over some literal
    44              // characters which we must now return.
    45              if te < p {
    46                  ret = append(ret, data[te:p])
    47              }
    48              ts = p;
    49          }
    50          action End {
    51              te = p;
    52              ret = append(ret, data[ts:te]);
    53          }
    54  
    55          QuotedToken = (UnicodeEscape | SimpleEscape | TemplateEscape | Newline) >Begin %End;
    56          UnquotedToken = (TemplateEscape | Newline) >Begin %End;
    57          QuotedLiteral = (any - ("\\" | "$" | "%" | "\r" | "\n"));
    58          UnquotedLiteral = (any - ("$" | "%" | "\r" | "\n"));
    59  
    60          quoted := (QuotedToken | QuotedLiteral)**;
    61          unquoted := (UnquotedToken | UnquotedLiteral)**;
    62  
    63      }%%
    64  
    65      // Ragel state
    66  	p := 0  // "Pointer" into data
    67  	pe := len(data) // End-of-data "pointer"
    68      ts := 0
    69      te := 0
    70      eof := pe
    71  
    72      var cs int // current state
    73      switch {
    74      case quoted:
    75          cs = hclstrtok_en_quoted
    76      default:
    77          cs = hclstrtok_en_unquoted
    78      }
    79  
    80      // Make Go compiler happy
    81      _ = ts
    82      _ = eof
    83  
    84      /*token := func () {
    85          ret = append(ret, data[ts:te])
    86      }*/
    87  
    88      %%{
    89          write init nocs;
    90          write exec;
    91      }%%
    92  
    93      if te < p {
    94          // Collect any leftover literal characters at the end of the input
    95          ret = append(ret, data[te:p])
    96      }
    97  
    98      // If we fall out here without being in a final state then we've
    99      // encountered something that the scanner can't match, which should
   100      // be impossible (the scanner matches all bytes _somehow_) but we'll
   101      // tolerate it and let the caller deal with it.
   102      if cs < hclstrtok_first_final {
   103          ret = append(ret, data[p:len(data)])
   104      }
   105  
   106      return ret
   107  }