go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/starlark/docgen/symbols/loader.go (about)

     1  // Copyright 2019 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package symbols
    16  
    17  import (
    18  	"go.chromium.org/luci/common/data/stringset"
    19  	"go.chromium.org/luci/common/errors"
    20  	"go.chromium.org/luci/starlark/docgen/ast"
    21  )
    22  
    23  // Loader knows how to load symbols from a starlark file, following references
    24  // to other file it may load (recursively).
    25  //
    26  // As a result it builds a symbol tree. Intermediate nodes in this tree are
    27  // struct-like definitions (which define namespaces), and leafs hold pointers
    28  // to ast.Nodes with concrete definitions of these symbols (after following
    29  // all possible aliases).
    30  //
    31  // Consider this module.star Starlark code, for example:
    32  //
    33  //	def _func():
    34  //	  """Doc string."""
    35  //	exported = struct(func = _func, const = 123)
    36  //
    37  // It will produce the following symbol tree:
    38  //
    39  //	Struct('module.star', *ast.Module, [
    40  //	  Term('_func', *ast.Function _func),
    41  //	  Struct('exported', *ast.Namespace exported, [
    42  //	    Term('func', *ast.Function _func),
    43  //	    Term('const', *ast.Var const),
    44  //	  ]),
    45  //	])
    46  //
    47  // Notice that both '_func' and 'exported.func' point to exact same AST node
    48  // where the function was actually defined.
    49  //
    50  // This allows to collect the documentation for all exported symbols even if
    51  // they are gathered from many internal modules via load(...) statements,
    52  // assignments and structs.
    53  type Loader struct {
    54  	// Normalize normalizes a load() statement relative to the parent.
    55  	Normalize func(parent, module string) (string, error)
    56  	// Source loads module's source code.
    57  	Source func(module string) (src string, err error)
    58  
    59  	loading stringset.Set      // set of modules being recursively loaded now
    60  	sources map[string]string  // all loaded source code, keyed by module name
    61  	symbols map[string]*Struct // symbols defined in the corresponding module
    62  }
    63  
    64  // init lazily initializes loader's guts.
    65  func (l *Loader) init() {
    66  	if l.loading == nil {
    67  		l.loading = stringset.New(1)
    68  		l.sources = make(map[string]string, 1)
    69  		l.symbols = make(map[string]*Struct, 1)
    70  	}
    71  }
    72  
    73  // Load loads the module and all modules it references, populating the
    74  // loader's state with information about exported symbols.
    75  //
    76  // Returns a struct with a list of symbols defined in the module.
    77  //
    78  // Can be called multiple times with different modules.
    79  //
    80  // The module string must be normalized.
    81  func (l *Loader) Load(module string) (syms *Struct, err error) {
    82  	defer func() {
    83  		err = errors.Annotate(err, "in %s", module).Err()
    84  	}()
    85  
    86  	l.init()
    87  	if !l.loading.Add(module) {
    88  		return nil, errors.New("recursive dependency")
    89  	}
    90  	defer l.loading.Del(module)
    91  
    92  	// Already processed it?
    93  	if syms, ok := l.symbols[module]; ok {
    94  		return syms, nil
    95  	}
    96  
    97  	// Load and parse the source code into a distilled AST.
    98  	src, err := l.Source(module)
    99  	if err != nil {
   100  		return nil, err
   101  	}
   102  	l.sources[module] = src
   103  	mod, err := ast.ParseModule(module, src, func(s string) (string, error) {
   104  		return l.Normalize(module, s)
   105  	})
   106  	if err != nil {
   107  		return nil, err
   108  	}
   109  
   110  	// Recursively resolve all references in 'mod' to their concrete definitions
   111  	// (perhaps in other modules). This returns a struct with a list of all
   112  	// symbols defined in the module.
   113  	var top *Struct
   114  	if top, err = l.resolveRefs(&mod.Namespace, nil); err != nil {
   115  		return nil, err
   116  	}
   117  	l.symbols[module] = top
   118  	return top, nil
   119  }
   120  
   121  // resolveRefs visits nodes in the namespace and follows References and
   122  // ExternalReferences to get the terminal definition of all symbols defined in
   123  // the namespace.
   124  //
   125  // resolveRefs puts them in a struct and returns it.
   126  //
   127  // 'top' struct represents the top module scope and it is used to lookup symbols
   128  // when following references. Pass nil when resolveRefs is used to resolve the
   129  // module scope itself.
   130  //
   131  // When resolving symbols in a struct(k=struct(k=...), ...), 'top' always
   132  // contains symbols from the top-level module scope. There's NO chaining of
   133  // scopes, because the following is NOT a valid definition:
   134  //
   135  //	struct(
   136  //	    k1 = v,
   137  //	    nested = struct(k2 = k1),  # k1 is undefined!
   138  //	)
   139  //
   140  // Only symbols defined at the module scope (e.g. variables) can be referenced
   141  // from inside struct definitions.
   142  func (l *Loader) resolveRefs(ns ast.EnumerableNode, top *Struct) (*Struct, error) {
   143  	cur := newStruct(ns.Name(), ns)
   144  	defer cur.freeze()
   145  
   146  	// When parsing the module scope, 'cur' IS the top-level scope. All symbols
   147  	// defined in 'cur' become immediately visible to all later definitions.
   148  	if top == nil {
   149  		top = cur
   150  	}
   151  
   152  	for _, n := range ns.EnumNodes() {
   153  		switch val := n.(type) {
   154  		case *ast.Reference:
   155  			// A reference to a symbol defined elsewhere. Follow it.
   156  			cur.addSymbol(NewAlias(val.Name(), Lookup(top, val.Path...)))
   157  
   158  		case *ast.ExternalReference:
   159  			// A reference to a symbol in another module. Load the module and follow
   160  			// the reference.
   161  			external, err := l.Load(val.Module)
   162  			if err != nil {
   163  				return nil, err
   164  			}
   165  			cur.addSymbol(NewAlias(val.Name(), Lookup(external, val.ExternalName)))
   166  
   167  		case *ast.Namespace:
   168  			// A struct(...) definition. Recursively resolve what's inside it. Allow
   169  			// it to reference the symbols in the top scope only. When one struct
   170  			// nests another, the inner struct doesn't have access to symbols defined
   171  			// in an outer struct. Only what's in the top-level scope.
   172  			inner, err := l.resolveRefs(val, top)
   173  			if err != nil {
   174  				return nil, err
   175  			}
   176  			cur.addSymbol(inner)
   177  
   178  		case *ast.Invocation:
   179  			// A statement like `var = ns1.func(arg1=...)`. Resolve the function
   180  			// symbol first, then recursively resolve the struct with the arguments.
   181  			fn := Lookup(top, val.Func...)
   182  			args, err := l.resolveRefs(val, top)
   183  			if err != nil {
   184  				return nil, err
   185  			}
   186  			cur.addSymbol(newInvocation(val.Name(), val, fn, args.Symbols()))
   187  
   188  		default:
   189  			// Something defined right in this namespace.
   190  			cur.addSymbol(newTerm(n.Name(), n))
   191  		}
   192  	}
   193  
   194  	return cur, nil
   195  }