github.com/u-root/u-root@v7.0.1-0.20200915234505-ad7babab0a8e+incompatible/pkg/pogosh/lexer_test.go (about) 1 // Copyright 2020 the u-root Authors. All rights reserved 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package pogosh 6 7 import ( 8 "reflect" 9 "testing" 10 ) 11 12 // The positive tests are expected to pass lexing. 13 var lexerPositiveTests = []struct { 14 name string 15 in string 16 out []token 17 }{ 18 // Words 19 {"Single Word", 20 `abc`, 21 []token{{"abc", ttWord}}, 22 }, 23 {"Single Character Word", 24 `a`, 25 []token{{"a", ttWord}}, 26 }, 27 {"Multiple Words", 28 `a bc`, 29 []token{{"a", ttWord}, {"bc", ttWord}}, 30 }, 31 32 // Newlines and blanks 33 {"Empty input", 34 "", 35 []token{}, 36 }, 37 {"Blank input", 38 "\t \t ", 39 []token{}, 40 }, 41 {"Various whitespace", 42 " \n\t\n \n", 43 []token{{"\n", ttNewLine}, {"\n", ttNewLine}, {"\n", ttNewLine}}, 44 }, 45 {"Whitespace Word Combo", 46 " a \nb\t\nc \nd", 47 []token{ 48 {"a", ttWord}, {"\n", ttNewLine}, {"b", ttWord}, {"\n", ttNewLine}, 49 {"c", ttWord}, {"\n", ttNewLine}, {"d", ttWord}}, 50 }, 51 52 // Single quotes 53 {"Single quotes", 54 "'a'", 55 []token{{"'a'", ttWord}}, 56 }, 57 {"Single quotes with spaces", 58 "'a bc'", 59 []token{{"'a bc'", ttWord}}, 60 }, 61 {"Single escape", 62 "'\t\\\n'", 63 []token{{"'\t\\\n'", ttWord}}, 64 }, 65 66 // Double quotes 67 {"Double Quote", 68 `"a"`, 69 []token{{`"a"`, ttWord}}, 70 }, 71 {"Double Quotes with spaces", 72 `"a bc"`, 73 []token{{`"a bc"`, ttWord}}, 74 }, 75 {"Double Quote With Escapes", 76 `"\"a\\\$(" ")"`, 77 []token{{`"\"a\\\$("`, ttWord}, {`")"`, ttWord}}, 78 }, 79 {"Double Quote With Subexpression", 80 `"a$(b)"`, 81 []token{{`"a$(b)"`, ttWord}}, 82 }, 83 // TODO: The following two rules aren't a regular language. 84 //{"Double Quote With Subexpression Nested", 85 // `"a$(b "c")"`, 86 // []token{{`"a$(b "c")"`, ttWord}}, 87 //}, 88 //{"Double Quote With Subexpression Double Nested", 89 // `"a$(b "c$(d "e")")"`, 90 // []token{{`"a$(b "c$(d "e")")"`, ttWord}}, 91 //}, 92 93 // Command substitution 94 // TODO 95 96 // Line comments 97 {"Line Comment", 98 `# "comment"`, 99 []token{}, 100 }, 101 {"Multiple Line Comments", 102 `abc d # comment 103 efg # comment2`, 104 []token{{"abc", ttWord}, {"d", ttWord}, {"\n", ttNewLine}, {"efg", ttWord}}, 105 }, 106 {"Line Comment Continuation", 107 "# comment \\\nabc", 108 []token{}, 109 }, 110 111 // Line continuation 112 {"Line Continuation", 113 "\\\n", 114 []token{}, 115 }, 116 {"Line Continuation in Token", 117 "ech\\\no abc", 118 []token{{"ech\\\no", ttWord}, {"abc", ttWord}}, 119 }, 120 {"Line Continuation in Double Quote", 121 "echo \"ab\\\nc\"", 122 []token{{"echo", ttWord}, {"\"ab\\\nc\"", ttWord}}, 123 }, 124 {"Line Continuation in Single Quote", 125 "echo 'ab\\\nc'", 126 []token{{"echo", ttWord}, {"'ab\\\nc'", ttWord}}, 127 }, 128 129 // Operators 130 {"Operator Single", 131 ">>", 132 []token{{">>", ttDGreat}}, 133 }, 134 {"Operators Compact", 135 "a&&b||c;;d<<e>>f<&g>&h<>i<<-j>|k", 136 []token{{"a", ttWord}, {"&&", ttAndIf}, {"b", ttWord}, 137 {"||", ttOrIf}, {"c", ttWord}, {";;", ttDSemi}, {"d", ttWord}, 138 {"<<", ttDLess}, {"e", ttWord}, {">>", ttDGreat}, {"f", ttWord}, 139 {"<&", ttLessAnd}, {"g", ttWord}, {">&", ttGreatAnd}, {"h", ttWord}, 140 {"<>", ttLessGreat}, {"i", ttWord}, {"<<-", ttDLessDash}, 141 {"j", ttWord}, {">|", ttClobber}, {"k", ttWord}}, 142 }, 143 {"Operators Whitespace", 144 " a && b || c ;; d << e >> f <& g >& h <> i <<- j >| k ", 145 []token{{"a", ttWord}, {"&&", ttAndIf}, {"b", ttWord}, 146 {"||", ttOrIf}, {"c", ttWord}, {";;", ttDSemi}, {"d", ttWord}, 147 {"<<", ttDLess}, {"e", ttWord}, {">>", ttDGreat}, {"f", ttWord}, 148 {"<&", ttLessAnd}, {"g", ttWord}, {">&", ttGreatAnd}, {"h", ttWord}, 149 {"<>", ttLessGreat}, {"i", ttWord}, {"<<-", ttDLessDash}, 150 {"j", ttWord}, {">|", ttClobber}, {"k", ttWord}}, 151 }, 152 {"Operators Escaped", 153 ` a \&\& b \|\| c \;\; d \<\< e \>\> f \<\& g \>\& h \<\> i \<\<\- j \>\| k `, 154 []token{{"a", ttWord}, {`\&\&`, ttWord}, {"b", ttWord}, 155 {`\|\|`, ttWord}, {"c", ttWord}, {`\;\;`, ttWord}, {"d", ttWord}, 156 {`\<\<`, ttWord}, {"e", ttWord}, {`\>\>`, ttWord}, {"f", ttWord}, 157 {`\<\&`, ttWord}, {"g", ttWord}, {`\>\&`, ttWord}, {"h", ttWord}, 158 {`\<\>`, ttWord}, {"i", ttWord}, {`\<\<\-`, ttWord}, 159 {"j", ttWord}, {`\>\|`, ttWord}, {"k", ttWord}}, 160 }, 161 {"Operator Dash", 162 `\<\<- -`, 163 []token{{`\<\<-`, ttWord}, {`-`, ttWord}}, 164 }, 165 {"Operator Half Escape", 166 `echo \&&`, 167 []token{{"echo", ttWord}, {`\&`, ttWord}, {"&", ttWord}}, 168 }, 169 170 // Examples from POSIX.1-2017 171 // TODO: these tests require some work 172 /*{"POSIX Example 1", 173 `a=1 174 set 2 175 echo ${a}b-$ab-${1}0-${10}-$10 176 `, 177 []string{`a=1`, `set`, `2`, `echo`, `${a}b-$ab-${1}0-${10}-$10`}, 178 }, 179 {"POSIX Example 2", 180 `foo=asdf 181 echo ${foo-bar}xyz} 182 foo= 183 echo ${foo-bar}xyz} 184 unset foo 185 echo ${foo-bar}xyz}`, 186 []string{}, 187 }, 188 {"POSIX Example 3", 189 `# repeat a command 100 times 190 x=100 191 while [ $x -gt 0 ] 192 do 193 command x=$(($x-1)) 194 done`, 195 []string{`x=100`, `while`, `[`, `$x`, `-gt`, `0`, `]`, `do`, `command`, `x=$(($x-1))`, `done`}, 196 },*/ 197 } 198 199 func TestLexerPositive(t *testing.T) { 200 for _, tt := range lexerPositiveTests { 201 t.Run(tt.name, func(t *testing.T) { 202 tokens, err := tokenize(tt.in) 203 204 if err != nil { 205 t.Error(err) 206 } else { 207 if !reflect.DeepEqual(tokens, tt.out) { 208 t.Errorf("got %v, want %v", tokens, tt.out) 209 } 210 } 211 }) 212 } 213 } 214 215 // Every input is tested with and without a trailing newline 216 func TestLexerPositiveTrailingNewline(t *testing.T) { 217 for _, tt := range lexerPositiveTests { 218 t.Run(tt.name, func(t *testing.T) { 219 tokens, err := tokenize(tt.in + "\n") 220 221 if err != nil { 222 t.Error(err) 223 } else if len(tokens) == 0 || tokens[len(tokens)-1].ttype != ttNewLine { 224 t.Errorf("expected %v to have a trailing newline", tokens) 225 } else { 226 tokens = tokens[:len(tokens)-1] 227 if !reflect.DeepEqual(tokens, tt.out) { 228 t.Errorf("got %v, want %v", tokens, tt.out) 229 } 230 } 231 }) 232 } 233 }