go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/starlark/docgen/ast/parser.go (about) 1 // Copyright 2019 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package ast defines AST relevant for the documentation generation. 16 // 17 // It recognizes top-level function declarations, top-level assignments (e.g. 18 // for constants and aliases), load(...) statements (to follow imported 19 // symbols), and struct(...) declarations. 20 package ast 21 22 import ( 23 "fmt" 24 "strings" 25 26 "go.starlark.net/syntax" 27 ) 28 29 // Ellipsis represents a complex expression that we don't care about. 30 // 31 // A value of Ellipsis type is usually literally just "...". 32 type Ellipsis string 33 34 // Node is a documentation-relevant declaration of something in a file. 35 // 36 // Nodes form a tree. This tree is a reduction of a full AST of the starlark 37 // file to a form we care about when generating the documentation. 38 // 39 // The top of the tree is represented by a Module node. 40 type Node interface { 41 // Name is the name of the entity this node defines. 42 // 43 // E.g. it's the name of a function, variable, constant, etc. 44 // 45 // It may be a "private" name. Many definitions are defined using their 46 // private names first, and then exposed publicly via separate definition 47 // (such definitions are represented by Reference or ExternalReference nodes). 48 Name() string 49 50 // Span is where this node was defined in the original starlark code. 51 Span() (start syntax.Position, end syntax.Position) 52 53 // Comments is a comment block immediately preceding the definition. 54 Comments() string 55 56 // Doc is a documentation string for this symbol extracted either from a 57 // docstring or from comments. 58 Doc() string 59 60 // populateFromAST sets the fields based on the given starlark AST node. 61 populateFromAST(name string, n syntax.Node) 62 } 63 64 // EnumerableNode is a node that has a variable number of subnodes. 65 // 66 // Used to represents structs, modules and invocations. 67 type EnumerableNode interface { 68 Node 69 70 // EnumNodes returns a list of subnodes. It should not be mutated. 71 EnumNodes() []Node 72 } 73 74 // base is embedded by all node types and implements some Node methods for them. 75 // 76 // It carries name of the node, where it is defined, and surrounding comments. 77 type base struct { 78 name string 79 ast syntax.Node // where it was defined in Starlark AST 80 } 81 82 func (b *base) Name() string { return b.name } 83 func (b *base) Span() (syntax.Position, syntax.Position) { return b.ast.Span() } 84 85 func (b *base) Comments() string { 86 // Get all comments before `ast`. In particular if there are multiple comment 87 // blocks separated by new lines, `before` contains all of them. 88 var before []syntax.Comment 89 if all := b.ast.Comments(); all != nil { 90 before = all.Before 91 } 92 if len(before) == 0 { 93 return "" 94 } 95 96 // Grab a line number where 'ast' itself is defined. 97 start, _ := b.ast.Span() 98 99 // Pick only comments immediately preceding this line. 100 var comments []string 101 for idx := len(before) - 1; idx >= 0; idx-- { 102 if before[idx].Start.Line != start.Line-int32(len(comments))-1 { 103 break // detected a skipped line, which indicates it's a different block 104 } 105 // Strip '#\s?' (but only one space, spaces may be significant for the doc 106 // syntax in the comment). 107 line := strings.TrimPrefix(strings.TrimPrefix(before[idx].Text, "#"), " ") 108 comments = append(comments, line) 109 } 110 111 // Reverse 'comments', since we recorded them in reverse order. 112 for l, r := 0, len(comments)-1; l < r; l, r = l+1, r-1 { 113 comments[l], comments[r] = comments[r], comments[l] 114 } 115 return strings.Join(comments, "\n") 116 } 117 118 // Doc extracts the documentation for the symbol from its comments. 119 func (b *base) Doc() string { 120 return b.Comments() 121 } 122 123 func (b *base) populateFromAST(name string, ast syntax.Node) { 124 b.name = name 125 b.ast = ast 126 } 127 128 // Var is a node that represents '<var> = int|string|<expr>' definition. 129 // 130 // This is a "terminal" definition, not a reference to something defined 131 // elsewhere. Usually a constant or some computation we replace with '...' in 132 // the docs. 133 type Var struct { 134 base 135 136 Value any // string | int64 | *big.Int | Ellipsis 137 } 138 139 // Function is a node that represents a function definition. 140 type Function struct { 141 base 142 143 docstring string // a doc string, if any 144 } 145 146 // Doc extracts the documentation from the docstring. 147 func (n *Function) Doc() string { return n.docstring } 148 149 // Reference is a node that represents <var> = a.b.c. 150 // 151 // It is either a top-level assignment, or a keyword argument in a function call 152 // (e.g. when defining struct(...)). 153 type Reference struct { 154 base 155 156 Path []string // the ref path on the right hand side, e.g. ['a', 'b', 'c']. 157 } 158 159 // ExternalReference is a node that represents a symbol imported though 160 // load(...) statement. 161 // 162 // For load statement load("file.star", x="y") we get an ExternalReference with 163 // name "x", ExternalName "y" and Module "file.star". 164 type ExternalReference struct { 165 base 166 167 ExternalName string // name of the symbol in the loaded module 168 Module string // normalized path of the loaded module 169 } 170 171 // Invocation represents `<name> = ns1.ns2.func(arg1=..., arg2=...)` call. Only 172 // keyword arguments are recognized. 173 type Invocation struct { 174 base 175 176 Func []string // e.g. ["ns1, "ns2", "func"] 177 Args []Node // keyword arguments in order of their definition 178 } 179 180 // EnumNodes returns list of nodes that represent arguments. 181 func (inv *Invocation) EnumNodes() []Node { return inv.Args } 182 183 // Namespace is a node that contains a bunch of definitions grouped together. 184 // 185 // Examples of namespaces are top-level module dicts and structs. 186 type Namespace struct { 187 base 188 189 Nodes []Node // nodes defined in the namespace, in order they were defined 190 } 191 192 // EnumNodes returns list of nodes that represent definitions in the namespace. 193 func (ns *Namespace) EnumNodes() []Node { return ns.Nodes } 194 195 // Module is a parsed Starlark file. 196 type Module struct { 197 Namespace // all top-level symbols 198 199 docstring string // a doc string, if any 200 } 201 202 // Doc extracts the documentation from the docstring. 203 func (n *Module) Doc() string { return n.docstring } 204 205 // ParseModule parses a single Starlark module. 206 // 207 // Filename is only used when recording position information. 208 func ParseModule(filename, body string, normalize func(string) (string, error)) (*Module, error) { 209 ast, err := syntax.Parse(filename, body, syntax.RetainComments) 210 if err != nil { 211 return nil, err 212 } 213 214 m := &Module{docstring: extractDocstring(ast.Stmts)} 215 m.populateFromAST(filename, ast) 216 217 // emit adds a node to the module. 218 emit := func(name string, ast syntax.Node, n Node) { 219 n.populateFromAST(name, ast) 220 m.Nodes = append(m.Nodes, n) 221 } 222 223 // Walk over top-level statements and match them against patterns we recognize 224 // as relevant. 225 for _, stmt := range ast.Stmts { 226 switch st := stmt.(type) { 227 case *syntax.LoadStmt: 228 // A load(...) statement. Each imported symbol ends up in the module's 229 // namespace, so add corresponding ExternalReference nodes. 230 s := st.Module.Value.(string) 231 if s, err = normalize(s); err != nil { 232 return nil, fmt.Errorf("load() statement invalid: %w", err) 233 } 234 for i, nm := range st.To { 235 emit(nm.Name, st, &ExternalReference{ 236 ExternalName: st.From[i].Name, 237 Module: s, 238 }) 239 } 240 241 case *syntax.DefStmt: 242 // A function declaration: "def name(...)". 243 emit(st.Name.Name, st, &Function{ 244 docstring: extractDocstring(st.Body), 245 }) 246 247 case *syntax.AssignStmt: 248 // A top level assignment. We care only about <var> = ... (i.e. when LHS 249 // is a simple variable, not a tuple or anything like that). 250 if st.Op != syntax.EQ { 251 continue 252 } 253 lhs := matchSingleIdent(st.LHS) 254 if lhs == "" { 255 continue 256 } 257 if n := parseAssignmentRHS(st.RHS); n != nil { 258 emit(lhs, st, n) 259 } 260 } 261 } 262 263 return m, nil 264 } 265 266 // parseAssignmentRHS parses RHS of statements like "<var> = <expr>". 267 // 268 // Name of the returned node and Star/End/Comments should be populated by the 269 // caller. 270 // 271 // Only the following forms are recognized: 272 // 273 // Var: <var> = <literal>|<complex expr> 274 // Reference: <var> = <var>[.<field>]* 275 // Namespace: <var> = struct(...) 276 func parseAssignmentRHS(rhs syntax.Expr) Node { 277 // <var> = <literal>. 278 if literal := matchSingleLiteral(rhs); literal != nil { 279 return &Var{Value: literal} 280 } 281 282 // <var> = <var>[.<field>]*. 283 if path := matchRefPath(rhs); path != nil { 284 return &Reference{Path: path} 285 } 286 287 // <var> = <fn>(...). 288 if fn, args := matchSimpleCall(rhs); len(fn) != 0 { 289 // Pick all 'k=v' pairs from args and parse them as assignments. 290 var nodes []Node 291 for _, arg := range args { 292 if lhs, rhs := matchEqExpr(arg); lhs != "" { 293 if n := parseAssignmentRHS(rhs); n != nil { 294 n.populateFromAST(lhs, arg) 295 nodes = append(nodes, n) 296 } 297 } 298 } 299 300 // <var> = struct(...). 301 if len(fn) == 1 && fn[0] == "struct" { 302 return &Namespace{Nodes: nodes} 303 } 304 305 // <var> = ns.ns.func(arg1=..., arg2=...). 306 return &Invocation{Func: fn, Args: nodes} 307 } 308 309 // <var> = <expr>. 310 return &Var{Value: Ellipsis("...")} 311 } 312 313 // extractDocstring returns a doc string for the given body. 314 // 315 // A docstring is a string literal that comes first in the body, if any. 316 func extractDocstring(body []syntax.Stmt) string { 317 if len(body) == 0 { 318 return "" 319 } 320 expr, ok := body[0].(*syntax.ExprStmt) 321 if !ok { 322 return "" 323 } 324 literal, ok := expr.X.(*syntax.Literal) 325 if !ok || literal.Token != syntax.STRING { 326 return "" 327 } 328 return literal.Value.(string) 329 } 330 331 // matchSingleIdent matches an <Expr> to <Ident>, returning ident's name. 332 func matchSingleIdent(expr syntax.Expr) string { 333 if ident, ok := expr.(*syntax.Ident); ok { 334 return ident.Name 335 } 336 return "" 337 } 338 339 // matchSingleLiteral matches an <Expr> to <Literal>, returning literal's value. 340 // 341 // The returned value is string | int64 | *big.Int. 342 func matchSingleLiteral(expr syntax.Expr) any { 343 if literal, ok := expr.(*syntax.Literal); ok { 344 return literal.Value 345 } 346 return nil 347 } 348 349 // matchRefPath matches an <Expr> to <Ident>(.<Ident>)* returning identifier' 350 // names as a list of strings. 351 func matchRefPath(expr syntax.Expr) (path []string) { 352 loop: 353 for { 354 switch next := expr.(type) { 355 case *syntax.DotExpr: // next in chain 356 path = append(path, next.Name.Name) 357 expr = next.X 358 case *syntax.Ident: // last in chain 359 path = append(path, next.Name) 360 break loop 361 default: 362 return nil // not a simple ref path, has additional structure, give up 363 } 364 } 365 // Expr "a.b.c" results in ['c', 'b', 'a'], reverse. 366 for l, r := 0, len(path)-1; l < r; l, r = l+1, r-1 { 367 path[l], path[r] = path[r], path[l] 368 } 369 return 370 } 371 372 // matchSimpleCall matches an <Expr> to <Ident>(.<Ident>)*(<Expr>*), returning 373 // them. 374 func matchSimpleCall(expr syntax.Expr) (fn []string, args []syntax.Expr) { 375 call, ok := expr.(*syntax.CallExpr) 376 if !ok { 377 return nil, nil 378 } 379 if fn = matchRefPath(call.Fn); len(fn) == 0 { 380 return nil, nil 381 } 382 return fn, call.Args 383 } 384 385 // matchEqExpr matches an <Expr> to <Ident>=<Expr>, returning them. 386 func matchEqExpr(expr syntax.Expr) (lhs string, rhs syntax.Expr) { 387 bin, ok := expr.(*syntax.BinaryExpr) 388 if !ok || bin.Op != syntax.EQ { 389 return "", nil 390 } 391 if lhs = matchSingleIdent(bin.X); lhs == "" { 392 return "", nil 393 } 394 return lhs, bin.Y 395 }