github.com/lab47/exprcore@v0.0.0-20210525052339-fb7d6bd9331e/syntax/scan_test.go (about)

     1  // Copyright 2017 The Bazel Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package syntax
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"go/build"
    11  	"io/ioutil"
    12  	"path/filepath"
    13  	"testing"
    14  )
    15  
    16  func scan(src interface{}) (tokens string, err error) {
    17  	sc, err := newScanner("foo.star", src, false)
    18  	if err != nil {
    19  		return "", err
    20  	}
    21  
    22  	defer sc.recover(&err)
    23  
    24  	var buf bytes.Buffer
    25  	var val tokenValue
    26  	for {
    27  		tok := sc.nextToken(&val)
    28  
    29  		if buf.Len() > 0 {
    30  			buf.WriteByte(' ')
    31  		}
    32  		switch tok {
    33  		case EOF:
    34  			buf.WriteString("EOF")
    35  		case IDENT:
    36  			buf.WriteString(val.raw)
    37  		case INT:
    38  			if val.bigInt != nil {
    39  				fmt.Fprintf(&buf, "%d", val.bigInt)
    40  			} else {
    41  				fmt.Fprintf(&buf, "%d", val.int)
    42  			}
    43  		case FLOAT:
    44  			fmt.Fprintf(&buf, "%e", val.float)
    45  		case STRING:
    46  			fmt.Fprintf(&buf, "%q", val.string)
    47  		case SHELL:
    48  			fmt.Fprintf(&buf, "shell(`%s`)", val.string)
    49  		case DSHELL_START:
    50  			fmt.Fprintf(&buf, "shell(`%s`", val.string)
    51  		case DSHELL_PART:
    52  			fmt.Fprintf(&buf, "`%s`", val.string)
    53  		case DSHELL_END:
    54  			fmt.Fprintf(&buf, "`%s`)", val.string)
    55  		default:
    56  			buf.WriteString(tok.String())
    57  		}
    58  		if tok == EOF {
    59  			break
    60  		}
    61  	}
    62  	return buf.String(), nil
    63  }
    64  
    65  func TestScanner(t *testing.T) {
    66  	for _, test := range []struct {
    67  		input, want string
    68  	}{
    69  		{``, "EOF"},
    70  		{`123`, "123 ; EOF"},
    71  		{`x.y`, "x . y ; EOF"},
    72  		{`chocolate.éclair`, `chocolate . éclair ; EOF`},
    73  		{`123 "foo" hello x.y`, `123 "foo" hello x . y ; EOF`},
    74  		{`print(x)`, "print ( x ) ; EOF"},
    75  		{`print(x); print(y)`, "print ( x ) ; print ( y ) ; EOF"},
    76  		{"\nprint(\n1\n)\n", "print ( 1 ; ) ; EOF"}, // final \n is at toplevel on non-blank line => token
    77  		{`/ // /= //= ///=`, "/ // /= //= // /= EOF"},
    78  		{`# hello
    79  print(x)`, "print ( x ) ; EOF"},
    80  		{`# hello
    81  print(1)
    82  cc_binary(name="foo")
    83  def f(x) {
    84  		return x+1
    85  }
    86  print(1)
    87  `,
    88  			`print ( 1 ) ; ` +
    89  				`cc_binary ( name = "foo" ) ; ` +
    90  				`def f ( x ) { ` +
    91  				`return x + 1 ; } ; ` +
    92  				`print ( 1 ) ; ` +
    93  				`EOF`},
    94  		// EOF should act line an implicit newline.
    95  		{`def f() { } `,
    96  			"def f ( ) { } ; EOF"},
    97  		{`def f() {
    98  	}`,
    99  			"def f ( ) { } ; EOF"},
   100  		{`def f() {
   101  	}
   102  # oops`,
   103  			"def f ( ) { } ; EOF"},
   104  		{`def f() {
   105  	} \
   106  `,
   107  			"def f ( ) { } ; EOF"},
   108  		{`def f() {
   109  	}
   110  `,
   111  			"def f ( ) { } ; EOF"},
   112  		{`pass
   113  
   114  
   115  pass`, "pass ; pass ; EOF"}, // consecutive newlines are consolidated
   116  		{`def f() {
   117      }
   118      `, "def f ( ) { } ; EOF"},
   119  		{`def f() {
   120      } 
   121      ` + "\n", "def f ( ) { } ; EOF"},
   122  		{"pass", "pass ; EOF"},
   123  		{"pass\n", "pass ; EOF"},
   124  		{"pass\n ", "pass ; EOF"},
   125  		{"pass\n \n", "pass ; EOF"},
   126  		{"if x {\n  }\n ", "if x { } ; EOF"},
   127  		{`x = 1 + \
   128  2`, `x = 1 + 2 ; EOF`},
   129  		{`x = 'a\nb'`, `x = "a\nb" ; EOF`},
   130  		{`x = r'a\nb'`, `x = "a\\nb" ; EOF`},
   131  		{"x = 'a\\\nb'", `x = "ab" ; EOF`},
   132  		{`x = '\''`, `x = "'" ; EOF`},
   133  		{`x = "\""`, `x = "\"" ; EOF`},
   134  		{`x = r'\''`, `x = "\\'" ; EOF`},
   135  		{`x = '''\''''`, `x = "'" ; EOF`},
   136  		{`x = r'''\''''`, `x = "\\'" ; EOF`},
   137  		{`x = ''''a'b'c'''`, `x = "'a'b'c" ; EOF`},
   138  		{"x = '''a\nb'''", `x = "a\nb" ; EOF`},
   139  		{"x = '''a\rb'''", `x = "a\nb" ; EOF`},
   140  		{"x = '''a\r\nb'''", `x = "a\nb" ; EOF`},
   141  		{"x = '''a\n\rb'''", `x = "a\n\nb" ; EOF`},
   142  		{"x = r'a\\\nb'", `x = "a\\\nb" ; EOF`},
   143  		{"x = r'a\\\rb'", `x = "a\\\nb" ; EOF`},
   144  		{"x = r'a\\\r\nb'", `x = "a\\\nb" ; EOF`},
   145  		{"a\rb", `a ; b ; EOF`},
   146  		{"a\nb", `a ; b ; EOF`},
   147  		{"a\r\nb", `a ; b ; EOF`},
   148  		{"a\n\nb", `a ; b ; EOF`},
   149  		// numbers
   150  		{"0", `0 ; EOF`},
   151  		{"1\n", `1 ; EOF`},
   152  		{"00", `0 ; EOF`},
   153  		{"0.", `0.000000e+00 ; EOF`},
   154  		{"0.e1", `0.000000e+00 ; EOF`},
   155  		{".0", `0.000000e+00 ; EOF`},
   156  		{"0.0", `0.000000e+00 ; EOF`},
   157  		{".e1", `. e1 ; EOF`},
   158  		{"1", `1 ; EOF`},
   159  		{"1.", `1.000000e+00 ; EOF`},
   160  		{".1", `1.000000e-01 ; EOF`},
   161  		{".1e1", `1.000000e+00 ; EOF`},
   162  		{".1e+1", `1.000000e+00 ; EOF`},
   163  		{".1e-1", `1.000000e-02 ; EOF`},
   164  		{"1e1", `1.000000e+01 ; EOF`},
   165  		{"1e+1", `1.000000e+01 ; EOF`},
   166  		{"1e-1", `1.000000e-01 ; EOF`},
   167  		{"123", `123 ; EOF`},
   168  		{"123e45", `1.230000e+47 ; EOF`},
   169  		{"999999999999999999999999999999999999999999999999999", `999999999999999999999999999999999999999999999999999 ; EOF`},
   170  		{"12345678901234567890", `12345678901234567890 ; EOF`},
   171  		// hex
   172  		{"0xA", `10 ; EOF`},
   173  		{"0xAAG", `170 G ; EOF`},
   174  		{"0xG", `foo.star:1:1: invalid hex literal`},
   175  		{"0XA", `10 ; EOF`},
   176  		{"0XG", `foo.star:1:1: invalid hex literal`},
   177  		{"0xA.", `10 . ; EOF`},
   178  		{"0xA.e1", `10 . e1 ; EOF`},
   179  		{"0x12345678deadbeef12345678", `5634002672576678570168178296 ; EOF`},
   180  		// binary
   181  		{"0b1010", `10 ; EOF`},
   182  		{"0B111101", `61 ; EOF`},
   183  		{"0b3", `foo.star:1:3: invalid binary literal`},
   184  		{"0b1010201", `10 201 ; EOF`},
   185  		{"0b1010.01", `10 1.000000e-02 ; EOF`},
   186  		{"0b0000", `0 ; EOF`},
   187  		// octal
   188  		{"0o123", `83 ; EOF`},
   189  		{"0o12834", `10 834 ; EOF`},
   190  		{"0o12934", `10 934 ; EOF`},
   191  		{"0o12934.", `10 9.340000e+02 ; EOF`},
   192  		{"0o12934.1", `10 9.341000e+02 ; EOF`},
   193  		{"0o12934e1", `10 9.340000e+03 ; EOF`},
   194  		{"0o123.", `83 . ; EOF`},
   195  		{"0o123.1", `83 1.000000e-01 ; EOF`},
   196  		{"0123", `foo.star:1:5: obsolete form of octal literal; use 0o123`},
   197  		{"012834", `foo.star:1:1: invalid int literal`},
   198  		{"012934", `foo.star:1:1: invalid int literal`},
   199  		{"i = 012934", `foo.star:1:5: invalid int literal`},
   200  		// octal escapes in string literals
   201  		{`"\037"`, `"\x1f" ; EOF`},
   202  		{`"\377"`, `"\xff" ; EOF`},
   203  		{`"\378"`, `"\x1f8" ; EOF`},                              // = '\37' + '8'
   204  		{`"\400"`, `foo.star:1:1: invalid escape sequence \400`}, // unlike Python 2 and 3
   205  
   206  		// backslash escapes
   207  		// As in Go, a backslash must escape something.
   208  		// (Python started issuing a deprecation warning in 3.6.)
   209  		{`"foo\(bar"`, `foo.star:1:1: invalid escape sequence \(`},
   210  		{`"\+"`, `foo.star:1:1: invalid escape sequence \+`},
   211  		{`"\w"`, `foo.star:1:1: invalid escape sequence \w`},
   212  		{`"\""`, `"\"" ; EOF`},
   213  		{`"\'"`, `foo.star:1:1: invalid escape sequence \'`},
   214  		{`'\w'`, `foo.star:1:1: invalid escape sequence \w`},
   215  		{`'\''`, `"'" ; EOF`},
   216  		{`'\"'`, `foo.star:1:1: invalid escape sequence \"`},
   217  		{`"""\w"""`, `foo.star:1:1: invalid escape sequence \w`},
   218  		{`"""\""""`, `"\"" ; EOF`},
   219  		{`"""\'"""`, `foo.star:1:1: invalid escape sequence \'`},
   220  		{`'''\w'''`, `foo.star:1:1: invalid escape sequence \w`},
   221  		{`'''\''''`, `"'" ; EOF`},
   222  		{`'''\"'''`, `foo.star:1:1: invalid escape sequence \"`}, // error
   223  		{`r"\w"`, `"\\w" ; EOF`},
   224  		{`r"\""`, `"\\\"" ; EOF`},
   225  		{`r"\'"`, `"\\'" ; EOF`},
   226  		{`r'\w'`, `"\\w" ; EOF`},
   227  		{`r'\''`, `"\\'" ; EOF`},
   228  		{`r'\"'`, `"\\\"" ; EOF`},
   229  		{`'a\zb'`, `foo.star:1:1: invalid escape sequence \z`},
   230  		{`"\o123"`, `foo.star:1:1: invalid escape sequence \o`},
   231  		// floats starting with octal digits
   232  		{"012934.", `1.293400e+04 ; EOF`},
   233  		{"012934.1", `1.293410e+04 ; EOF`},
   234  		{"012934e1", `1.293400e+05 ; EOF`},
   235  		{"0123.", `1.230000e+02 ; EOF`},
   236  		{"0123.1", `1.231000e+02 ; EOF`},
   237  		// github.com/google/skylark/issues/16
   238  		{"x ! 0", "foo.star:1:3: unexpected input character '!'"},
   239  		// github.com/google/exprcore-go/issues/80
   240  		{"([{<>}])", "( [ { < > } ] ) ; EOF"},
   241  		{"f();", "f ( ) ; EOF"},
   242  		// github.com/google/exprcore-go/issues/104
   243  		{"def f() {\n  if x {\n    }\n  } ", `def f ( ) { if x { } ; } ; EOF`},
   244  		{`while cond { pass }`, "while cond { pass } ; EOF"},
   245  		// github.com/google/exprcore-go/issues/107
   246  		{"~= ~= 5", "~ = ~ = 5 ; EOF"},
   247  		{"0in", "0 in EOF"},
   248  		{"0or", "foo.star:1:3: invalid octal literal"},
   249  		{"6in", "6 in EOF"},
   250  		{"6or", "6 or EOF"},
   251  		{`pkg(
   252    install = () => {
   253      1
   254      2
   255    }
   256  )`, "pkg ( install = ( ) => { 1 ; 2 ; } ; ) ; EOF"},
   257  		{`if True { pass
   258  } else {
   259  	pass }`,
   260  			`if True { pass ; } else { pass } ; EOF`},
   261  		{"if cond {\n\tpass\n}",
   262  			`if cond { pass ; } ; EOF`},
   263  		{"{\nx: y for a in b if c\n}",
   264  			`{ x : y for a in b if c ; } ; EOF`},
   265  		{"1 + 2\n1", `1 + 2 ; 1 ; EOF`},
   266  		{"1 + \\\n2\n1", `1 + 2 ; 1 ; EOF`},
   267  		{"%{ age = 12, bar = 13 }", "%{ age = 12 , bar = 13 } ; EOF"},
   268  		{"$ foo bar", "shell(`foo bar`) ; EOF"},
   269  		{"$ env\n$ foo\n", "shell(`env`) ; shell(`foo`) ; EOF"},
   270  		{"$ env ${x.y}\n$ foo\n", "shell(`env ` x . y ``) ; shell(`foo`) ; EOF"},
   271  		{"$ foo bar ${x}", "shell(`foo bar ` x ``) ; EOF"},
   272  		{"$ foo bar ${x} baz ${1+2}", "shell(`foo bar ` x ` baz ` 1 + 2 ``) ; EOF"},
   273  		{"`foo bar ${x}`", "shell(`foo bar ` x ``) ; EOF"},
   274  		{"`foo bar ${x} baz ${1+2}`", "shell(`foo bar ` x ` baz ` 1 + 2 ``) ; EOF"},
   275  		{"import foo, bar", "import foo , bar ; EOF"},
   276  		{`import "github.com/foo", bar`, `import "github.com/foo" , bar ; EOF`},
   277  		{"import foo as f, bar", "import foo as f , bar ; EOF"},
   278  		{`import "github.com/foo" as f, bar`, `import "github.com/foo" as f , bar ; EOF`},
   279  	} {
   280  		got, err := scan(test.input)
   281  		if err != nil {
   282  			got = err.(Error).Error()
   283  		}
   284  		if test.want != got {
   285  			t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want)
   286  		}
   287  	}
   288  }
   289  
   290  // dataFile is the same as exprcoretest.DataFile.
   291  // We make a copy to avoid a dependency cycle.
   292  var dataFile = func(pkgdir, filename string) string {
   293  	return filepath.Join(build.Default.GOPATH, "src/github.com/lab47/exprcore", pkgdir, filename)
   294  }
   295  
   296  func BenchmarkScan(b *testing.B) {
   297  	filename := dataFile("syntax", "testdata/scan.star")
   298  	b.StopTimer()
   299  	data, err := ioutil.ReadFile(filename)
   300  	if err != nil {
   301  		b.Fatal(err)
   302  	}
   303  	b.StartTimer()
   304  
   305  	for i := 0; i < b.N; i++ {
   306  		sc, err := newScanner(filename, data, false)
   307  		if err != nil {
   308  			b.Fatal(err)
   309  		}
   310  		var val tokenValue
   311  		for sc.nextToken(&val) != EOF {
   312  		}
   313  	}
   314  }