github.com/lab47/exprcore@v0.0.0-20210525052339-fb7d6bd9331e/syntax/grammar.txt (about)

     1  
     2  Grammar of Starlark
     3  ==================
     4  
     5  File = {Statement | newline} eof .
     6  
     7  Statement = DefStmt | IfStmt | ForStmt | WhileStmt | SimpleStmt .
     8  
     9  DefStmt = 'def' identifier '(' [Parameters [',']] ')' ':' Suite .
    10  
    11  Parameters = Parameter {',' Parameter}.
    12  
    13  Parameter = identifier | identifier '=' Test | '*' | '*' identifier | '**' identifier .
    14  
    15  IfStmt = 'if' Test ':' Suite {'elif' Test ':' Suite} ['else' ':' Suite] .
    16  
    17  ForStmt = 'for' LoopVariables 'in' Expression ':' Suite .
    18  
    19  WhileStmt = 'while' Test ':' Suite .
    20  
    21  Suite = [newline indent {Statement} outdent] | SimpleStmt .
    22  
    23  SimpleStmt = SmallStmt {';' SmallStmt} [';'] '\n' .
    24  # NOTE: '\n' optional at EOF
    25  
    26  SmallStmt = ReturnStmt
    27            | BreakStmt | ContinueStmt | PassStmt
    28            | AssignStmt
    29            | ExprStmt
    30            | LoadStmt
    31            .
    32  
    33  ReturnStmt   = 'return' [Expression] .
    34  BreakStmt    = 'break' .
    35  ContinueStmt = 'continue' .
    36  PassStmt     = 'pass' .
    37  AssignStmt   = Expression ('=' | '+=' | '-=' | '*=' | '/=' | '//=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') Expression .
    38  ExprStmt     = Expression .
    39  
    40  LoadStmt = 'load' '(' string {',' [identifier '='] string} [','] ')' .
    41  
    42  Test = LambdaExpr
    43       | IfExpr
    44       | PrimaryExpr
    45       | UnaryExpr
    46       | BinaryExpr
    47       .
    48  
    49  LambdaExpr = 'lambda' [Parameters] ':' Test .
    50  
    51  IfExpr = Test 'if' Test 'else' Test .
    52  
    53  PrimaryExpr = Operand
    54              | PrimaryExpr DotSuffix
    55              | PrimaryExpr CallSuffix
    56              | PrimaryExpr SliceSuffix
    57              .
    58  
    59  Operand = identifier
    60          | int | float | string
    61          | ListExpr | ListComp
    62          | DictExpr | DictComp
    63          | '(' [Expression [',']] ')'
    64          | ('-' | '+') PrimaryExpr
    65          .
    66  
    67  DotSuffix   = '.' identifier .
    68  CallSuffix  = '(' [Arguments [',']] ')' .
    69  SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' .
    70  
    71  Arguments = Argument {',' Argument} .
    72  Argument  = Test | identifier '=' Test | '*' Test | '**' Test .
    73  
    74  ListExpr = '[' [Expression [',']] ']' .
    75  ListComp = '[' Test {CompClause} ']'.
    76  
    77  DictExpr = '{' [Entries [',']] '}' .
    78  DictComp = '{' Entry {CompClause} '}' .
    79  Entries  = Entry {',' Entry} .
    80  Entry    = Test ':' Test .
    81  
    82  CompClause = 'for' LoopVariables 'in' Test | 'if' Test .
    83  
    84  UnaryExpr = 'not' Test .
    85  
    86  BinaryExpr = Test {Binop Test} .
    87  
    88  Binop = 'or'
    89        | 'and'
    90        | '==' | '!=' | '<' | '>' | '<=' | '>=' | 'in' | 'not' 'in'
    91        | '|'
    92        | '^'
    93        | '&'
    94        | '-' | '+'
    95        | '*' | '%' | '/' | '//'
    96        .
    97  
    98  Expression = Test {',' Test} .
    99  # NOTE: trailing comma permitted only when within [...] or (...).
   100  
   101  LoopVariables = PrimaryExpr {',' PrimaryExpr} .
   102  
   103  
   104  # Notation (similar to Go spec):
   105  - lowercase and 'quoted' items are lexical tokens.
   106  - Capitalized names denote grammar productions.
   107  - (...) implies grouping
   108  - x | y means either x or y.
   109  - [x] means x is optional
   110  - {x} means x is repeated zero or more times
   111  - The end of each declaration is marked with a period.
   112  
   113  # Tokens
   114  - spaces: newline, eof, indent, outdent.
   115  - identifier.
   116  - literals: string, int, float.
   117  - plus all quoted tokens such as '+=', 'return'.
   118  
   119  # Notes:
   120  - Ambiguity is resolved using operator precedence.
   121  - The grammar does not enforce the legal order of params and args,
   122    nor that the first compclause must be a 'for'.
   123  
   124  TODO:
   125  - explain how the lexer generates indent, outdent, and newline tokens.
   126  - why is unary NOT separated from unary - and +?
   127  - the grammar is (mostly) in LL(1) style so, for example,
   128    dot expressions are formed suffixes, not complete expressions,
   129    which makes the spec harder to read.  Reorganize into non-LL(1) form?