github.com/u-root/u-root@v7.0.1-0.20200915234505-ad7babab0a8e+incompatible/pkg/pogosh/parser.go (about)

     1  // Copyright 2020 the u-root Authors. All rights reserved
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package pogosh
     6  
     7  type tokenizer struct {
     8  	ts []token
     9  }
    10  
    11  // The remainder of this file parses and evaluates an LL(1) grammar using a
    12  // predictive parse. This grammar was found in the POSIX.1-2017 spec and
    13  // converted to LL(1).
    14  // TODO: explain the Backus-Naur
    15  // When a parse function is called, can assume FIRST conditions are met.
    16  
    17  // Program ::= LineBreak Program2
    18  // Program2 ::= CompleteCommands |
    19  // CompleteCommands ::= CompleteCommand CompleteCommands2
    20  // CompleteCommands2 ::= NewLineList CompleteCommands3 |
    21  // CompleteCommands3 ::= CompleteCommands |
    22  func parseProgram(s *State, t *tokenizer) command {
    23  	cmd := compoundList{}
    24  
    25  	parseLineBreak(s, t)
    26  	for t.ts[0].ttype != ttEOF {
    27  		cmd.cmds = append(cmd.cmds, parseCompleteCommand(s, t))
    28  		switch t.ts[0].ttype {
    29  		case ttNewLine:
    30  			parseLineBreak(s, t)
    31  		case ttEOF:
    32  		default:
    33  			panic("Parse error")
    34  		}
    35  		parseLineBreak(s, t)
    36  	}
    37  	return &cmd
    38  }
    39  
    40  // CompleteCommand ::= List CompleteCommand2
    41  // CompleteCommand2 ::= SeparatorOp |
    42  // List ::= AndOr List2
    43  // List2 ::= SeparatorOp List |
    44  func parseCompleteCommand(s *State, t *tokenizer) command {
    45  	cmd := compoundList{}
    46  
    47  	for {
    48  		cmd.cmds = append(cmd.cmds, parseAndOr(s, t))
    49  		if t.ts[0].ttype == ttNewLine || t.ts[0].ttype == ttEOF {
    50  			break
    51  		}
    52  		switch t.ts[0].value {
    53  		case "&":
    54  			parseSeparatorOp(s, t)
    55  			cmd.cmds[len(cmd.cmds)-1] = &async{cmd.cmds[len(cmd.cmds)-1]}
    56  		case ";":
    57  			parseSeparatorOp(s, t)
    58  		}
    59  		if t.ts[0].ttype == ttNewLine || t.ts[0].ttype == ttEOF {
    60  			break
    61  		}
    62  	}
    63  	return &cmd
    64  }
    65  
    66  // AndOr ::= Pipeline AndOr2
    67  // AndOr2 ::= '&&' LineBreak AndOr | '||' LineBreak AndOr |
    68  func parseAndOr(s *State, t *tokenizer) command {
    69  	cmd := parsePipeline(s, t)
    70  
    71  	for {
    72  		switch t.ts[0].ttype {
    73  		case ttAndIf: // FIRST['&&' LineBreak AndOr]
    74  			t.ts = t.ts[1:]
    75  			parseLineBreak(s, t)
    76  			cmd = &and{cmd, parsePipeline(s, t)}
    77  		case ttOrIf: // FIRST['||' LineBreak AndOr]
    78  			t.ts = t.ts[1:]
    79  			parseLineBreak(s, t)
    80  			cmd = &or{cmd, parsePipeline(s, t)}
    81  		default: // TODO: FOLLOW[AndOr2]
    82  			return cmd
    83  		}
    84  	}
    85  }
    86  
    87  // Pipeline ::= '!' PipeSequence | PipeSequence
    88  func parsePipeline(s *State, t *tokenizer) command {
    89  	switch t.ts[0] {
    90  	case token{"!", ttWord}:
    91  		t.ts = t.ts[1:]
    92  		return &not{parsePipeSequence(s, t)}
    93  	default:
    94  		return parsePipeSequence(s, t)
    95  	}
    96  }
    97  
    98  // PipeSequence ::= Command PipeSequence2
    99  // PipeSequence2 ::= '|' LineBreak PipeSequence |
   100  func parsePipeSequence(s *State, t *tokenizer) command {
   101  	cmd := pipeline{}
   102  	cmd.cmds = append(cmd.cmds, parseCommand(s, t))
   103  
   104  	switch t.ts[0] {
   105  	case token{"|", ttWord}:
   106  		t.ts = t.ts[1:]
   107  		parseLineBreak(s, t)
   108  		// TODO: possibly wrong associativity
   109  		cmd.cmds = append(cmd.cmds, parsePipeSequence(s, t))
   110  	default: // TODO: FOLLOW[PipeSequence]
   111  	}
   112  	return &cmd
   113  }
   114  
   115  // TODO: make LL(0)
   116  // Command ::= SimpleCommand | CompoundCommand | CompoundCommand RedirectList | FunctionDefinition
   117  func parseCommand(s *State, t *tokenizer) command {
   118  	// TODO: support more than simple command
   119  	return parseSimpleCommand(s, t)
   120  }
   121  
   122  // compound_command  : brace_group
   123  //                   | subshell
   124  //                   | for_clause
   125  //                   | case_clause
   126  //                   | if_clause
   127  //                   | while_clause
   128  //                   | until_clause
   129  //                   ;
   130  func parseCompoundCommand(s *State, t *tokenizer) {
   131  
   132  }
   133  
   134  // subshell          : '(' compound_list ')
   135  //                   ;
   136  func parseSubshell(s *State, t *tokenizer) {
   137  
   138  }
   139  
   140  // compound_list     : LineBreak term
   141  //                   | LineBreak term separator
   142  //                   ;
   143  func parseCompoundList(s *State, t *tokenizer) {
   144  
   145  }
   146  
   147  // term              : term separator '&&'
   148  //                   | '&&'
   149  //                   ;
   150  func parseTerm(s *State, t *tokenizer) {
   151  
   152  }
   153  
   154  // for_clause        : 'for' name do_group
   155  //                   | 'for' name sequential_sep do_group
   156  //                   | 'for' name LineBreak in sequential_sep do_group
   157  //                   | 'for' name LineBreak in wordlist sequential_sep do_group
   158  //                   ;
   159  func parseForClause(s *State, t *tokenizer) {
   160  
   161  }
   162  
   163  // in                : 'in'
   164  //                   ;
   165  func parseIn(s *State, t *tokenizer) {
   166  
   167  }
   168  
   169  // wordlist          : wordlist WORD
   170  //                   | WORD
   171  //                   ;
   172  func parseWordList(s *State, t *tokenizer) {
   173  
   174  }
   175  
   176  // case_clause       : 'case' WORD LineBreak 'in' LineBreak case_list 'esac'
   177  //                   | 'case' WORD LineBreak 'in' LineBreak case_list_ns 'esac'
   178  //                   | 'case' WORD LineBreak 'in' LineBreak 'esac'
   179  //                   ;
   180  func parseCaseClause(s *State, t *tokenizer) {
   181  
   182  }
   183  
   184  // case_list_ns      : case_list case_item_ns
   185  //                   | case_item_ns
   186  //                   ;
   187  func parseCaseListNS(s *State, t *tokenizer) {
   188  
   189  }
   190  
   191  // case_list         : case_list case_item
   192  //                   | case_item
   193  //                   ;
   194  func parseCaseList(s *State, t *tokenizer) {
   195  
   196  }
   197  
   198  // case_item_ns      : pattern ')' LineBreak
   199  //                   | pattern ')' compound_list
   200  //                   | '(' pattern ')' LineBreak
   201  //                   | '(' pattern ')' compound_list
   202  //                   ;
   203  func parseCaseItemNS(s *State, t *tokenizer) {
   204  
   205  }
   206  
   207  // case_item         : pattern ')' ';;' LineBreak
   208  //                   | pattern ')' ';;' compound_list
   209  //                   | '(' pattern ')' ';;' LineBreak
   210  //                   | '(' pattern ')' ';;' compound_list
   211  //                   ;
   212  func parseCaseItem(s *State, t *tokenizer) {
   213  
   214  }
   215  
   216  // pattern           : WORD
   217  //                   | pattern '|' WORD
   218  //                   ;
   219  func parsePattern(s *State, t *tokenizer) {
   220  
   221  }
   222  
   223  // if_clause         : 'if' compound_list 'then' compound_list else_part 'fi'
   224  //                   | 'if' compound_list 'then' compound_list 'fi'
   225  func parseIfClause(s *State, t *tokenizer) {
   226  
   227  }
   228  
   229  // else_part         : 'elif' compound_list 'then' compound_list
   230  //                   | 'elif' compound_list 'then' compound_list else_part
   231  //                   | 'else' compound_list
   232  //                   ;
   233  func parseElsePart(s *State, t *tokenizer) {
   234  
   235  }
   236  
   237  // while_clause      : 'while' compound_list do_group
   238  //                   ;
   239  func parseWhileClause(s *State, t *tokenizer) {
   240  
   241  }
   242  
   243  // until_clause      : 'until' compound_list do_group
   244  //                   ;
   245  func parseUntilClause(s *State, t *tokenizer) {
   246  
   247  }
   248  
   249  // function_definition : fname '(' ')' LineBreak function_body
   250  //                   ;
   251  func parseFunctionDefinition(s *State, t *tokenizer) {
   252  
   253  }
   254  
   255  // function_body     : compound_command
   256  //                   | compound_command redirect_list
   257  //                   ;
   258  func parseFunctionBody(s *State, t *tokenizer) {
   259  
   260  }
   261  
   262  // fname             : NAME
   263  //                   ;
   264  func parseFName(s *State, t *tokenizer) {
   265  
   266  }
   267  
   268  // brace_group       : '{' compound_list '}'
   269  //                   ;
   270  func parseBraceGroup(s *State, t *tokenizer) {
   271  
   272  }
   273  
   274  // do_group          : 'do' compound_list 'done'
   275  //                   ;
   276  func parseDoGroup(s *State, t *tokenizer) {
   277  
   278  }
   279  
   280  // SimpleCommand ::= CmdPrefix SimpleCommand2 | CmdName CmdSuffix
   281  // SimpleCommand2 ::= CmdWord CmdSuffix |
   282  func parseSimpleCommand(s *State, t *tokenizer) command {
   283  	cmd := simpleCommand{}
   284  	parseCmdPrefix(s, t, &cmd)
   285  	parseCmdName(s, t, &cmd)
   286  	parseCmdSuffix(s, t, &cmd)
   287  	return &cmd
   288  }
   289  
   290  // CmdName ::= WORD
   291  func parseCmdName(s *State, t *tokenizer, cmd *simpleCommand) {
   292  	if t.ts[0].ttype == ttWord {
   293  		cmd.name = []byte(t.ts[0].value)
   294  		cmd.args = [][]byte{cmd.name}
   295  		t.ts = t.ts[1:]
   296  	} else {
   297  		panic("Bad parse") // TODO: better error handling
   298  	}
   299  }
   300  
   301  // TODO: generalize to parseWord ???
   302  // CmdWord ::= WORD
   303  func parseCmdWord(s *State, t *tokenizer) []byte {
   304  	if t.ts[0].ttype == ttWord {
   305  		cmdWord := t.ts[0].value
   306  		t.ts = t.ts[1:]
   307  		return []byte(cmdWord)
   308  	}
   309  	panic("Bad parse") // TODO: better error handling
   310  }
   311  
   312  // CmdPrefix ::= IORedirect CmdPrefix | Assignment_WORD CmdPrefix |
   313  func parseCmdPrefix(s *State, t *tokenizer, cmd *simpleCommand) {
   314  	// TODO
   315  }
   316  
   317  // CmdSuffix ::= IORedirect CmdSuffix | WORD CmdSuffix |
   318  func parseCmdSuffix(s *State, t *tokenizer, cmd *simpleCommand) {
   319  	for {
   320  		switch t.ts[0].value {
   321  		case "<", "<&", ">", ">&", ">>", "<>", ">|": // TODO: IO_NUMBER
   322  			parseIORedirect(s, t, cmd)
   323  		case "&&", "||", ";", "&", "|", "\n", "": // TODO: follow set
   324  			return
   325  		default:
   326  			cmd.args = append(cmd.args, []byte(t.ts[0].value))
   327  			t.ts = t.ts[1:]
   328  		}
   329  	}
   330  }
   331  
   332  // redirect_list     : io_redirect
   333  //                   | redirect_list io_redirect
   334  //                   ;
   335  func parseRedirectList(s *State, t *tokenizer) {
   336  
   337  }
   338  
   339  // IORedirect ::= IORedirect2 | IO_NUMBER IORedirect2
   340  // IORedirect2 ::= IOFile | io_here
   341  func parseIORedirect(s *State, t *tokenizer, cmd *simpleCommand) {
   342  	// TODO: IO_NUMBER io_here
   343  	parseIOFile(s, t, cmd)
   344  }
   345  
   346  // IOFile ::= IOOp Filename
   347  // IOOp ::= '<' | '<&' | '>' | '>&' | '>>' | '<>' | '>|'
   348  func parseIOFile(s *State, t *tokenizer, cmd *simpleCommand) {
   349  	cmd.redirects = append(cmd.redirects, redirect{
   350  		ioOp:     parseCmdWord(s, t),
   351  		filename: parseFilename(s, t),
   352  	})
   353  }
   354  
   355  // TODO: might be able to replace by parseWord
   356  // Filename ::= WORD
   357  func parseFilename(s *State, t *tokenizer) []byte {
   358  	if t.ts[0].ttype == ttWord {
   359  		filename := t.ts[0].value
   360  		t.ts = t.ts[1:]
   361  		return []byte(filename)
   362  	}
   363  	panic("Bad parse") // TODO: better error handling
   364  }
   365  
   366  // io_here           : DLESS here_end
   367  //                   | DLESSDASH here_end
   368  //                   ;
   369  func parseIOHere(s *State, t *tokenizer) {
   370  
   371  }
   372  
   373  // here_end          : WORD
   374  //                   ;
   375  func parseHereEnd(s *State, t *tokenizer) {
   376  
   377  }
   378  
   379  // NewLineList ::= NEWLINE NewLineList | NEWLINE
   380  func parseNewLineList(s *State, t *tokenizer) {
   381  	if t.ts[0].ttype != ttNewLine {
   382  		panic("Parse error") // TODO: better error message
   383  	}
   384  	for t.ts[0].ttype == ttNewLine {
   385  		t.ts = t.ts[1:]
   386  	}
   387  	// TODO: follow set?
   388  }
   389  
   390  // LineBreak ::= NEWLINE LineBreak |
   391  func parseLineBreak(s *State, t *tokenizer) {
   392  	for t.ts[0].ttype == ttNewLine {
   393  		t.ts = t.ts[1:]
   394  	}
   395  	// TODO: follow set?
   396  }
   397  
   398  // SeparatorOp ::= '&' | ';'
   399  func parseSeparatorOp(s *State, t *tokenizer) {
   400  	switch t.ts[0].value {
   401  	case "&", ";":
   402  		t.ts = t.ts[1:]
   403  	default:
   404  		panic("Parse error")
   405  	}
   406  }
   407  
   408  // separator         : separator_op LineBreak
   409  //                   | NewLineList
   410  //                   ;
   411  func parseSeparator(s *State, t *tokenizer) {
   412  
   413  }
   414  
   415  // sequential_sep    : ';' LineBreak
   416  //                   | NewLineList
   417  //                   ;
   418  func parseSequentialSep(s *State, t *tokenizer) {
   419  
   420  }