github.com/u-root/u-root@v7.0.1-0.20200915234505-ad7babab0a8e+incompatible/pkg/pogosh/parser.go (about) 1 // Copyright 2020 the u-root Authors. All rights reserved 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package pogosh 6 7 type tokenizer struct { 8 ts []token 9 } 10 11 // The remainder of this file parses and evaluates an LL(1) grammar using a 12 // predictive parse. This grammar was found in the POSIX.1-2017 spec and 13 // converted to LL(1). 14 // TODO: explain the Backus-Naur 15 // When a parse function is called, can assume FIRST conditions are met. 16 17 // Program ::= LineBreak Program2 18 // Program2 ::= CompleteCommands | 19 // CompleteCommands ::= CompleteCommand CompleteCommands2 20 // CompleteCommands2 ::= NewLineList CompleteCommands3 | 21 // CompleteCommands3 ::= CompleteCommands | 22 func parseProgram(s *State, t *tokenizer) command { 23 cmd := compoundList{} 24 25 parseLineBreak(s, t) 26 for t.ts[0].ttype != ttEOF { 27 cmd.cmds = append(cmd.cmds, parseCompleteCommand(s, t)) 28 switch t.ts[0].ttype { 29 case ttNewLine: 30 parseLineBreak(s, t) 31 case ttEOF: 32 default: 33 panic("Parse error") 34 } 35 parseLineBreak(s, t) 36 } 37 return &cmd 38 } 39 40 // CompleteCommand ::= List CompleteCommand2 41 // CompleteCommand2 ::= SeparatorOp | 42 // List ::= AndOr List2 43 // List2 ::= SeparatorOp List | 44 func parseCompleteCommand(s *State, t *tokenizer) command { 45 cmd := compoundList{} 46 47 for { 48 cmd.cmds = append(cmd.cmds, parseAndOr(s, t)) 49 if t.ts[0].ttype == ttNewLine || t.ts[0].ttype == ttEOF { 50 break 51 } 52 switch t.ts[0].value { 53 case "&": 54 parseSeparatorOp(s, t) 55 cmd.cmds[len(cmd.cmds)-1] = &async{cmd.cmds[len(cmd.cmds)-1]} 56 case ";": 57 parseSeparatorOp(s, t) 58 } 59 if t.ts[0].ttype == ttNewLine || t.ts[0].ttype == ttEOF { 60 break 61 } 62 } 63 return &cmd 64 } 65 66 // AndOr ::= Pipeline AndOr2 67 // AndOr2 ::= '&&' LineBreak AndOr | '||' LineBreak AndOr | 68 func parseAndOr(s *State, t *tokenizer) command { 69 cmd := parsePipeline(s, t) 70 71 for { 72 switch t.ts[0].ttype { 73 case ttAndIf: // FIRST['&&' LineBreak AndOr] 74 t.ts = t.ts[1:] 75 parseLineBreak(s, t) 76 cmd = &and{cmd, parsePipeline(s, t)} 77 case ttOrIf: // FIRST['||' LineBreak AndOr] 78 t.ts = t.ts[1:] 79 parseLineBreak(s, t) 80 cmd = &or{cmd, parsePipeline(s, t)} 81 default: // TODO: FOLLOW[AndOr2] 82 return cmd 83 } 84 } 85 } 86 87 // Pipeline ::= '!' PipeSequence | PipeSequence 88 func parsePipeline(s *State, t *tokenizer) command { 89 switch t.ts[0] { 90 case token{"!", ttWord}: 91 t.ts = t.ts[1:] 92 return ¬{parsePipeSequence(s, t)} 93 default: 94 return parsePipeSequence(s, t) 95 } 96 } 97 98 // PipeSequence ::= Command PipeSequence2 99 // PipeSequence2 ::= '|' LineBreak PipeSequence | 100 func parsePipeSequence(s *State, t *tokenizer) command { 101 cmd := pipeline{} 102 cmd.cmds = append(cmd.cmds, parseCommand(s, t)) 103 104 switch t.ts[0] { 105 case token{"|", ttWord}: 106 t.ts = t.ts[1:] 107 parseLineBreak(s, t) 108 // TODO: possibly wrong associativity 109 cmd.cmds = append(cmd.cmds, parsePipeSequence(s, t)) 110 default: // TODO: FOLLOW[PipeSequence] 111 } 112 return &cmd 113 } 114 115 // TODO: make LL(0) 116 // Command ::= SimpleCommand | CompoundCommand | CompoundCommand RedirectList | FunctionDefinition 117 func parseCommand(s *State, t *tokenizer) command { 118 // TODO: support more than simple command 119 return parseSimpleCommand(s, t) 120 } 121 122 // compound_command : brace_group 123 // | subshell 124 // | for_clause 125 // | case_clause 126 // | if_clause 127 // | while_clause 128 // | until_clause 129 // ; 130 func parseCompoundCommand(s *State, t *tokenizer) { 131 132 } 133 134 // subshell : '(' compound_list ') 135 // ; 136 func parseSubshell(s *State, t *tokenizer) { 137 138 } 139 140 // compound_list : LineBreak term 141 // | LineBreak term separator 142 // ; 143 func parseCompoundList(s *State, t *tokenizer) { 144 145 } 146 147 // term : term separator '&&' 148 // | '&&' 149 // ; 150 func parseTerm(s *State, t *tokenizer) { 151 152 } 153 154 // for_clause : 'for' name do_group 155 // | 'for' name sequential_sep do_group 156 // | 'for' name LineBreak in sequential_sep do_group 157 // | 'for' name LineBreak in wordlist sequential_sep do_group 158 // ; 159 func parseForClause(s *State, t *tokenizer) { 160 161 } 162 163 // in : 'in' 164 // ; 165 func parseIn(s *State, t *tokenizer) { 166 167 } 168 169 // wordlist : wordlist WORD 170 // | WORD 171 // ; 172 func parseWordList(s *State, t *tokenizer) { 173 174 } 175 176 // case_clause : 'case' WORD LineBreak 'in' LineBreak case_list 'esac' 177 // | 'case' WORD LineBreak 'in' LineBreak case_list_ns 'esac' 178 // | 'case' WORD LineBreak 'in' LineBreak 'esac' 179 // ; 180 func parseCaseClause(s *State, t *tokenizer) { 181 182 } 183 184 // case_list_ns : case_list case_item_ns 185 // | case_item_ns 186 // ; 187 func parseCaseListNS(s *State, t *tokenizer) { 188 189 } 190 191 // case_list : case_list case_item 192 // | case_item 193 // ; 194 func parseCaseList(s *State, t *tokenizer) { 195 196 } 197 198 // case_item_ns : pattern ')' LineBreak 199 // | pattern ')' compound_list 200 // | '(' pattern ')' LineBreak 201 // | '(' pattern ')' compound_list 202 // ; 203 func parseCaseItemNS(s *State, t *tokenizer) { 204 205 } 206 207 // case_item : pattern ')' ';;' LineBreak 208 // | pattern ')' ';;' compound_list 209 // | '(' pattern ')' ';;' LineBreak 210 // | '(' pattern ')' ';;' compound_list 211 // ; 212 func parseCaseItem(s *State, t *tokenizer) { 213 214 } 215 216 // pattern : WORD 217 // | pattern '|' WORD 218 // ; 219 func parsePattern(s *State, t *tokenizer) { 220 221 } 222 223 // if_clause : 'if' compound_list 'then' compound_list else_part 'fi' 224 // | 'if' compound_list 'then' compound_list 'fi' 225 func parseIfClause(s *State, t *tokenizer) { 226 227 } 228 229 // else_part : 'elif' compound_list 'then' compound_list 230 // | 'elif' compound_list 'then' compound_list else_part 231 // | 'else' compound_list 232 // ; 233 func parseElsePart(s *State, t *tokenizer) { 234 235 } 236 237 // while_clause : 'while' compound_list do_group 238 // ; 239 func parseWhileClause(s *State, t *tokenizer) { 240 241 } 242 243 // until_clause : 'until' compound_list do_group 244 // ; 245 func parseUntilClause(s *State, t *tokenizer) { 246 247 } 248 249 // function_definition : fname '(' ')' LineBreak function_body 250 // ; 251 func parseFunctionDefinition(s *State, t *tokenizer) { 252 253 } 254 255 // function_body : compound_command 256 // | compound_command redirect_list 257 // ; 258 func parseFunctionBody(s *State, t *tokenizer) { 259 260 } 261 262 // fname : NAME 263 // ; 264 func parseFName(s *State, t *tokenizer) { 265 266 } 267 268 // brace_group : '{' compound_list '}' 269 // ; 270 func parseBraceGroup(s *State, t *tokenizer) { 271 272 } 273 274 // do_group : 'do' compound_list 'done' 275 // ; 276 func parseDoGroup(s *State, t *tokenizer) { 277 278 } 279 280 // SimpleCommand ::= CmdPrefix SimpleCommand2 | CmdName CmdSuffix 281 // SimpleCommand2 ::= CmdWord CmdSuffix | 282 func parseSimpleCommand(s *State, t *tokenizer) command { 283 cmd := simpleCommand{} 284 parseCmdPrefix(s, t, &cmd) 285 parseCmdName(s, t, &cmd) 286 parseCmdSuffix(s, t, &cmd) 287 return &cmd 288 } 289 290 // CmdName ::= WORD 291 func parseCmdName(s *State, t *tokenizer, cmd *simpleCommand) { 292 if t.ts[0].ttype == ttWord { 293 cmd.name = []byte(t.ts[0].value) 294 cmd.args = [][]byte{cmd.name} 295 t.ts = t.ts[1:] 296 } else { 297 panic("Bad parse") // TODO: better error handling 298 } 299 } 300 301 // TODO: generalize to parseWord ??? 302 // CmdWord ::= WORD 303 func parseCmdWord(s *State, t *tokenizer) []byte { 304 if t.ts[0].ttype == ttWord { 305 cmdWord := t.ts[0].value 306 t.ts = t.ts[1:] 307 return []byte(cmdWord) 308 } 309 panic("Bad parse") // TODO: better error handling 310 } 311 312 // CmdPrefix ::= IORedirect CmdPrefix | Assignment_WORD CmdPrefix | 313 func parseCmdPrefix(s *State, t *tokenizer, cmd *simpleCommand) { 314 // TODO 315 } 316 317 // CmdSuffix ::= IORedirect CmdSuffix | WORD CmdSuffix | 318 func parseCmdSuffix(s *State, t *tokenizer, cmd *simpleCommand) { 319 for { 320 switch t.ts[0].value { 321 case "<", "<&", ">", ">&", ">>", "<>", ">|": // TODO: IO_NUMBER 322 parseIORedirect(s, t, cmd) 323 case "&&", "||", ";", "&", "|", "\n", "": // TODO: follow set 324 return 325 default: 326 cmd.args = append(cmd.args, []byte(t.ts[0].value)) 327 t.ts = t.ts[1:] 328 } 329 } 330 } 331 332 // redirect_list : io_redirect 333 // | redirect_list io_redirect 334 // ; 335 func parseRedirectList(s *State, t *tokenizer) { 336 337 } 338 339 // IORedirect ::= IORedirect2 | IO_NUMBER IORedirect2 340 // IORedirect2 ::= IOFile | io_here 341 func parseIORedirect(s *State, t *tokenizer, cmd *simpleCommand) { 342 // TODO: IO_NUMBER io_here 343 parseIOFile(s, t, cmd) 344 } 345 346 // IOFile ::= IOOp Filename 347 // IOOp ::= '<' | '<&' | '>' | '>&' | '>>' | '<>' | '>|' 348 func parseIOFile(s *State, t *tokenizer, cmd *simpleCommand) { 349 cmd.redirects = append(cmd.redirects, redirect{ 350 ioOp: parseCmdWord(s, t), 351 filename: parseFilename(s, t), 352 }) 353 } 354 355 // TODO: might be able to replace by parseWord 356 // Filename ::= WORD 357 func parseFilename(s *State, t *tokenizer) []byte { 358 if t.ts[0].ttype == ttWord { 359 filename := t.ts[0].value 360 t.ts = t.ts[1:] 361 return []byte(filename) 362 } 363 panic("Bad parse") // TODO: better error handling 364 } 365 366 // io_here : DLESS here_end 367 // | DLESSDASH here_end 368 // ; 369 func parseIOHere(s *State, t *tokenizer) { 370 371 } 372 373 // here_end : WORD 374 // ; 375 func parseHereEnd(s *State, t *tokenizer) { 376 377 } 378 379 // NewLineList ::= NEWLINE NewLineList | NEWLINE 380 func parseNewLineList(s *State, t *tokenizer) { 381 if t.ts[0].ttype != ttNewLine { 382 panic("Parse error") // TODO: better error message 383 } 384 for t.ts[0].ttype == ttNewLine { 385 t.ts = t.ts[1:] 386 } 387 // TODO: follow set? 388 } 389 390 // LineBreak ::= NEWLINE LineBreak | 391 func parseLineBreak(s *State, t *tokenizer) { 392 for t.ts[0].ttype == ttNewLine { 393 t.ts = t.ts[1:] 394 } 395 // TODO: follow set? 396 } 397 398 // SeparatorOp ::= '&' | ';' 399 func parseSeparatorOp(s *State, t *tokenizer) { 400 switch t.ts[0].value { 401 case "&", ";": 402 t.ts = t.ts[1:] 403 default: 404 panic("Parse error") 405 } 406 } 407 408 // separator : separator_op LineBreak 409 // | NewLineList 410 // ; 411 func parseSeparator(s *State, t *tokenizer) { 412 413 } 414 415 // sequential_sep : ';' LineBreak 416 // | NewLineList 417 // ; 418 func parseSequentialSep(s *State, t *tokenizer) { 419 420 }