github.com/u-root/u-root@v7.0.1-0.20200915234505-ad7babab0a8e+incompatible/pkg/pogosh/lexer_test.go (about)

     1  // Copyright 2020 the u-root Authors. All rights reserved
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package pogosh
     6  
     7  import (
     8  	"reflect"
     9  	"testing"
    10  )
    11  
    12  // The positive tests are expected to pass lexing.
    13  var lexerPositiveTests = []struct {
    14  	name string
    15  	in   string
    16  	out  []token
    17  }{
    18  	// Words
    19  	{"Single Word",
    20  		`abc`,
    21  		[]token{{"abc", ttWord}},
    22  	},
    23  	{"Single Character Word",
    24  		`a`,
    25  		[]token{{"a", ttWord}},
    26  	},
    27  	{"Multiple Words",
    28  		`a bc`,
    29  		[]token{{"a", ttWord}, {"bc", ttWord}},
    30  	},
    31  
    32  	// Newlines and blanks
    33  	{"Empty input",
    34  		"",
    35  		[]token{},
    36  	},
    37  	{"Blank input",
    38  		"\t  \t  ",
    39  		[]token{},
    40  	},
    41  	{"Various whitespace",
    42  		"  \n\t\n \n",
    43  		[]token{{"\n", ttNewLine}, {"\n", ttNewLine}, {"\n", ttNewLine}},
    44  	},
    45  	{"Whitespace Word Combo",
    46  		" a \nb\t\nc \nd",
    47  		[]token{
    48  			{"a", ttWord}, {"\n", ttNewLine}, {"b", ttWord}, {"\n", ttNewLine},
    49  			{"c", ttWord}, {"\n", ttNewLine}, {"d", ttWord}},
    50  	},
    51  
    52  	// Single quotes
    53  	{"Single quotes",
    54  		"'a'",
    55  		[]token{{"'a'", ttWord}},
    56  	},
    57  	{"Single quotes with spaces",
    58  		"'a bc'",
    59  		[]token{{"'a bc'", ttWord}},
    60  	},
    61  	{"Single escape",
    62  		"'\t\\\n'",
    63  		[]token{{"'\t\\\n'", ttWord}},
    64  	},
    65  
    66  	// Double quotes
    67  	{"Double Quote",
    68  		`"a"`,
    69  		[]token{{`"a"`, ttWord}},
    70  	},
    71  	{"Double Quotes with spaces",
    72  		`"a bc"`,
    73  		[]token{{`"a bc"`, ttWord}},
    74  	},
    75  	{"Double Quote With Escapes",
    76  		`"\"a\\\$(" ")"`,
    77  		[]token{{`"\"a\\\$("`, ttWord}, {`")"`, ttWord}},
    78  	},
    79  	{"Double Quote With Subexpression",
    80  		`"a$(b)"`,
    81  		[]token{{`"a$(b)"`, ttWord}},
    82  	},
    83  	// TODO: The following two rules aren't a regular language.
    84  	//{"Double Quote With Subexpression Nested",
    85  	//	`"a$(b "c")"`,
    86  	//	[]token{{`"a$(b "c")"`, ttWord}},
    87  	//},
    88  	//{"Double Quote With Subexpression Double Nested",
    89  	//	`"a$(b "c$(d "e")")"`,
    90  	//	[]token{{`"a$(b "c$(d "e")")"`, ttWord}},
    91  	//},
    92  
    93  	// Command substitution
    94  	// TODO
    95  
    96  	// Line comments
    97  	{"Line Comment",
    98  		`# "comment"`,
    99  		[]token{},
   100  	},
   101  	{"Multiple Line Comments",
   102  		`abc d # comment
   103  efg # comment2`,
   104  		[]token{{"abc", ttWord}, {"d", ttWord}, {"\n", ttNewLine}, {"efg", ttWord}},
   105  	},
   106  	{"Line Comment Continuation",
   107  		"# comment \\\nabc",
   108  		[]token{},
   109  	},
   110  
   111  	// Line continuation
   112  	{"Line Continuation",
   113  		"\\\n",
   114  		[]token{},
   115  	},
   116  	{"Line Continuation in Token",
   117  		"ech\\\no abc",
   118  		[]token{{"ech\\\no", ttWord}, {"abc", ttWord}},
   119  	},
   120  	{"Line Continuation in Double Quote",
   121  		"echo \"ab\\\nc\"",
   122  		[]token{{"echo", ttWord}, {"\"ab\\\nc\"", ttWord}},
   123  	},
   124  	{"Line Continuation in Single Quote",
   125  		"echo 'ab\\\nc'",
   126  		[]token{{"echo", ttWord}, {"'ab\\\nc'", ttWord}},
   127  	},
   128  
   129  	// Operators
   130  	{"Operator Single",
   131  		">>",
   132  		[]token{{">>", ttDGreat}},
   133  	},
   134  	{"Operators Compact",
   135  		"a&&b||c;;d<<e>>f<&g>&h<>i<<-j>|k",
   136  		[]token{{"a", ttWord}, {"&&", ttAndIf}, {"b", ttWord},
   137  			{"||", ttOrIf}, {"c", ttWord}, {";;", ttDSemi}, {"d", ttWord},
   138  			{"<<", ttDLess}, {"e", ttWord}, {">>", ttDGreat}, {"f", ttWord},
   139  			{"<&", ttLessAnd}, {"g", ttWord}, {">&", ttGreatAnd}, {"h", ttWord},
   140  			{"<>", ttLessGreat}, {"i", ttWord}, {"<<-", ttDLessDash},
   141  			{"j", ttWord}, {">|", ttClobber}, {"k", ttWord}},
   142  	},
   143  	{"Operators Whitespace",
   144  		" a && b || c ;; d << e >> f <& g >& h <> i <<- j >| k ",
   145  		[]token{{"a", ttWord}, {"&&", ttAndIf}, {"b", ttWord},
   146  			{"||", ttOrIf}, {"c", ttWord}, {";;", ttDSemi}, {"d", ttWord},
   147  			{"<<", ttDLess}, {"e", ttWord}, {">>", ttDGreat}, {"f", ttWord},
   148  			{"<&", ttLessAnd}, {"g", ttWord}, {">&", ttGreatAnd}, {"h", ttWord},
   149  			{"<>", ttLessGreat}, {"i", ttWord}, {"<<-", ttDLessDash},
   150  			{"j", ttWord}, {">|", ttClobber}, {"k", ttWord}},
   151  	},
   152  	{"Operators Escaped",
   153  		` a \&\& b \|\| c \;\; d \<\< e \>\> f \<\& g \>\& h \<\> i \<\<\- j \>\| k `,
   154  		[]token{{"a", ttWord}, {`\&\&`, ttWord}, {"b", ttWord},
   155  			{`\|\|`, ttWord}, {"c", ttWord}, {`\;\;`, ttWord}, {"d", ttWord},
   156  			{`\<\<`, ttWord}, {"e", ttWord}, {`\>\>`, ttWord}, {"f", ttWord},
   157  			{`\<\&`, ttWord}, {"g", ttWord}, {`\>\&`, ttWord}, {"h", ttWord},
   158  			{`\<\>`, ttWord}, {"i", ttWord}, {`\<\<\-`, ttWord},
   159  			{"j", ttWord}, {`\>\|`, ttWord}, {"k", ttWord}},
   160  	},
   161  	{"Operator Dash",
   162  		`\<\<- -`,
   163  		[]token{{`\<\<-`, ttWord}, {`-`, ttWord}},
   164  	},
   165  	{"Operator Half Escape",
   166  		`echo \&&`,
   167  		[]token{{"echo", ttWord}, {`\&`, ttWord}, {"&", ttWord}},
   168  	},
   169  
   170  	// Examples from POSIX.1-2017
   171  	// TODO: these tests require some work
   172  	/*{"POSIX Example 1",
   173  			`a=1
   174  	set 2
   175  	echo ${a}b-$ab-${1}0-${10}-$10
   176  	`,
   177  			[]string{`a=1`, `set`, `2`, `echo`, `${a}b-$ab-${1}0-${10}-$10`},
   178  		},
   179  		{"POSIX Example 2",
   180  			`foo=asdf
   181  	echo ${foo-bar}xyz}
   182  	foo=
   183  	echo ${foo-bar}xyz}
   184  	unset foo
   185  	echo ${foo-bar}xyz}`,
   186  			[]string{},
   187  		},
   188  		{"POSIX Example 3",
   189  			`# repeat a command 100 times
   190  	x=100
   191  	while [ $x -gt 0 ]
   192  	do
   193  		command x=$(($x-1))
   194  	done`,
   195  			[]string{`x=100`, `while`, `[`, `$x`, `-gt`, `0`, `]`, `do`, `command`, `x=$(($x-1))`, `done`},
   196  		},*/
   197  }
   198  
   199  func TestLexerPositive(t *testing.T) {
   200  	for _, tt := range lexerPositiveTests {
   201  		t.Run(tt.name, func(t *testing.T) {
   202  			tokens, err := tokenize(tt.in)
   203  
   204  			if err != nil {
   205  				t.Error(err)
   206  			} else {
   207  				if !reflect.DeepEqual(tokens, tt.out) {
   208  					t.Errorf("got %v, want %v", tokens, tt.out)
   209  				}
   210  			}
   211  		})
   212  	}
   213  }
   214  
   215  // Every input is tested with and without a trailing newline
   216  func TestLexerPositiveTrailingNewline(t *testing.T) {
   217  	for _, tt := range lexerPositiveTests {
   218  		t.Run(tt.name, func(t *testing.T) {
   219  			tokens, err := tokenize(tt.in + "\n")
   220  
   221  			if err != nil {
   222  				t.Error(err)
   223  			} else if len(tokens) == 0 || tokens[len(tokens)-1].ttype != ttNewLine {
   224  				t.Errorf("expected %v to have a trailing newline", tokens)
   225  			} else {
   226  				tokens = tokens[:len(tokens)-1]
   227  				if !reflect.DeepEqual(tokens, tt.out) {
   228  					t.Errorf("got %v, want %v", tokens, tt.out)
   229  				}
   230  			}
   231  		})
   232  	}
   233  }