github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/go/modindex/scan.go (about)

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package modindex
     6  
     7  import (
     8  	"encoding/json"
     9  	"errors"
    10  	"fmt"
    11  	"go/build"
    12  	"go/doc"
    13  	"go/scanner"
    14  	"go/token"
    15  	"io/fs"
    16  	"path/filepath"
    17  	"strings"
    18  
    19  	"github.com/go-asm/go/cmd/go/base"
    20  	"github.com/go-asm/go/cmd/go/fsys"
    21  	"github.com/go-asm/go/cmd/go/str"
    22  )
    23  
    24  // moduleWalkErr returns filepath.SkipDir if the directory isn't relevant
    25  // when indexing a module or generating a filehash, ErrNotIndexed,
    26  // if the module shouldn't be indexed, and nil otherwise.
    27  func moduleWalkErr(root string, path string, info fs.FileInfo, err error) error {
    28  	if err != nil {
    29  		return ErrNotIndexed
    30  	}
    31  	// stop at module boundaries
    32  	if info.IsDir() && path != root {
    33  		if fi, err := fsys.Stat(filepath.Join(path, "go.mod")); err == nil && !fi.IsDir() {
    34  			return filepath.SkipDir
    35  		}
    36  	}
    37  	if info.Mode()&fs.ModeSymlink != 0 {
    38  		if target, err := fsys.Stat(path); err == nil && target.IsDir() {
    39  			// return an error to make the module hash invalid.
    40  			// Symlink directories in modules are tricky, so we won't index
    41  			// modules that contain them.
    42  			// TODO(matloob): perhaps don't return this error if the symlink leads to
    43  			// a directory with a go.mod file.
    44  			return ErrNotIndexed
    45  		}
    46  	}
    47  	return nil
    48  }
    49  
    50  // indexModule indexes the module at the given directory and returns its
    51  // encoded representation. It returns ErrNotIndexed if the module can't
    52  // be indexed because it contains symlinks.
    53  func indexModule(modroot string) ([]byte, error) {
    54  	fsys.Trace("indexModule", modroot)
    55  	var packages []*rawPackage
    56  
    57  	// If the root itself is a symlink to a directory,
    58  	// we want to follow it (see https://go.dev/issue/50807).
    59  	// Add a trailing separator to force that to happen.
    60  	root := str.WithFilePathSeparator(modroot)
    61  	err := fsys.Walk(root, func(path string, info fs.FileInfo, err error) error {
    62  		if err := moduleWalkErr(root, path, info, err); err != nil {
    63  			return err
    64  		}
    65  
    66  		if !info.IsDir() {
    67  			return nil
    68  		}
    69  		if !strings.HasPrefix(path, root) {
    70  			panic(fmt.Errorf("path %v in walk doesn't have modroot %v as prefix", path, modroot))
    71  		}
    72  		rel := path[len(root):]
    73  		packages = append(packages, importRaw(modroot, rel))
    74  		return nil
    75  	})
    76  	if err != nil {
    77  		return nil, err
    78  	}
    79  	return encodeModuleBytes(packages), nil
    80  }
    81  
    82  // indexPackage indexes the package at the given directory and returns its
    83  // encoded representation. It returns ErrNotIndexed if the package can't
    84  // be indexed.
    85  func indexPackage(modroot, pkgdir string) []byte {
    86  	fsys.Trace("indexPackage", pkgdir)
    87  	p := importRaw(modroot, relPath(pkgdir, modroot))
    88  	return encodePackageBytes(p)
    89  }
    90  
    91  // rawPackage holds the information from each package that's needed to
    92  // fill a build.Package once the context is available.
    93  type rawPackage struct {
    94  	error string
    95  	dir   string // directory containing package sources, relative to the module root
    96  
    97  	// Source files
    98  	sourceFiles []*rawFile
    99  }
   100  
   101  type parseError struct {
   102  	ErrorList   *scanner.ErrorList
   103  	ErrorString string
   104  }
   105  
   106  // parseErrorToString converts the error from parsing the file into a string
   107  // representation. A nil error is converted to an empty string, and all other
   108  // errors are converted to a JSON-marshalled parseError struct, with ErrorList
   109  // set for errors of type scanner.ErrorList, and ErrorString set to the error's
   110  // string representation for all other errors.
   111  func parseErrorToString(err error) string {
   112  	if err == nil {
   113  		return ""
   114  	}
   115  	var p parseError
   116  	if e, ok := err.(scanner.ErrorList); ok {
   117  		p.ErrorList = &e
   118  	} else {
   119  		p.ErrorString = e.Error()
   120  	}
   121  	s, err := json.Marshal(p)
   122  	if err != nil {
   123  		panic(err) // This should be impossible because scanner.Error contains only strings and ints.
   124  	}
   125  	return string(s)
   126  }
   127  
   128  // parseErrorFromString converts a string produced by parseErrorToString back
   129  // to an error.  An empty string is converted to a nil error, and all
   130  // other strings are expected to be JSON-marshalled parseError structs.
   131  // The two functions are meant to preserve the structure of an
   132  // error of type scanner.ErrorList in a round trip, but may not preserve the
   133  // structure of other errors.
   134  func parseErrorFromString(s string) error {
   135  	if s == "" {
   136  		return nil
   137  	}
   138  	var p parseError
   139  	if err := json.Unmarshal([]byte(s), &p); err != nil {
   140  		base.Fatalf(`go: invalid parse error value in index: %q. This indicates a corrupted index. Run "go clean -cache" to reset the module cache.`, s)
   141  	}
   142  	if p.ErrorList != nil {
   143  		return *p.ErrorList
   144  	}
   145  	return errors.New(p.ErrorString)
   146  }
   147  
   148  // rawFile is the struct representation of the file holding all
   149  // information in its fields.
   150  type rawFile struct {
   151  	error      string
   152  	parseError string
   153  
   154  	name                 string
   155  	synopsis             string // doc.Synopsis of package comment... Compute synopsis on all of these?
   156  	pkgName              string
   157  	ignoreFile           bool   // starts with _ or . or should otherwise always be ignored
   158  	binaryOnly           bool   // cannot be rebuilt from source (has //go:binary-only-package comment)
   159  	cgoDirectives        string // the #cgo directive lines in the comment on import "C"
   160  	goBuildConstraint    string
   161  	plusBuildConstraints []string
   162  	imports              []rawImport
   163  	embeds               []embed
   164  	directives           []build.Directive
   165  }
   166  
   167  type rawImport struct {
   168  	path     string
   169  	position token.Position
   170  }
   171  
   172  type embed struct {
   173  	pattern  string
   174  	position token.Position
   175  }
   176  
   177  // importRaw fills the rawPackage from the package files in srcDir.
   178  // dir is the package's path relative to the modroot.
   179  func importRaw(modroot, reldir string) *rawPackage {
   180  	p := &rawPackage{
   181  		dir: reldir,
   182  	}
   183  
   184  	absdir := filepath.Join(modroot, reldir)
   185  
   186  	// We still haven't checked
   187  	// that p.dir directory exists. This is the right time to do that check.
   188  	// We can't do it earlier, because we want to gather partial information for the
   189  	// non-nil *build.Package returned when an error occurs.
   190  	// We need to do this before we return early on FindOnly flag.
   191  	if !isDir(absdir) {
   192  		// package was not found
   193  		p.error = fmt.Errorf("cannot find package in:\n\t%s", absdir).Error()
   194  		return p
   195  	}
   196  
   197  	entries, err := fsys.ReadDir(absdir)
   198  	if err != nil {
   199  		p.error = err.Error()
   200  		return p
   201  	}
   202  
   203  	fset := token.NewFileSet()
   204  	for _, d := range entries {
   205  		if d.IsDir() {
   206  			continue
   207  		}
   208  		if d.Mode()&fs.ModeSymlink != 0 {
   209  			if isDir(filepath.Join(absdir, d.Name())) {
   210  				// Symlinks to directories are not source files.
   211  				continue
   212  			}
   213  		}
   214  
   215  		name := d.Name()
   216  		ext := nameExt(name)
   217  
   218  		if strings.HasPrefix(name, "_") || strings.HasPrefix(name, ".") {
   219  			continue
   220  		}
   221  		info, err := getFileInfo(absdir, name, fset)
   222  		if err == errNonSource {
   223  			// not a source or object file. completely ignore in the index
   224  			continue
   225  		} else if err != nil {
   226  			p.sourceFiles = append(p.sourceFiles, &rawFile{name: name, error: err.Error()})
   227  			continue
   228  		} else if info == nil {
   229  			p.sourceFiles = append(p.sourceFiles, &rawFile{name: name, ignoreFile: true})
   230  			continue
   231  		}
   232  		rf := &rawFile{
   233  			name:                 name,
   234  			goBuildConstraint:    info.goBuildConstraint,
   235  			plusBuildConstraints: info.plusBuildConstraints,
   236  			binaryOnly:           info.binaryOnly,
   237  			directives:           info.directives,
   238  		}
   239  		if info.parsed != nil {
   240  			rf.pkgName = info.parsed.Name.Name
   241  		}
   242  
   243  		// Going to save the file. For non-Go files, can stop here.
   244  		p.sourceFiles = append(p.sourceFiles, rf)
   245  		if ext != ".go" {
   246  			continue
   247  		}
   248  
   249  		if info.parseErr != nil {
   250  			rf.parseError = parseErrorToString(info.parseErr)
   251  			// Fall through: we might still have a partial AST in info.Parsed,
   252  			// and we want to list files with parse errors anyway.
   253  		}
   254  
   255  		if info.parsed != nil && info.parsed.Doc != nil {
   256  			rf.synopsis = doc.Synopsis(info.parsed.Doc.Text())
   257  		}
   258  
   259  		var cgoDirectives []string
   260  		for _, imp := range info.imports {
   261  			if imp.path == "C" {
   262  				cgoDirectives = append(cgoDirectives, extractCgoDirectives(imp.doc.Text())...)
   263  			}
   264  			rf.imports = append(rf.imports, rawImport{path: imp.path, position: fset.Position(imp.pos)})
   265  		}
   266  		rf.cgoDirectives = strings.Join(cgoDirectives, "\n")
   267  		for _, emb := range info.embeds {
   268  			rf.embeds = append(rf.embeds, embed{emb.pattern, emb.pos})
   269  		}
   270  
   271  	}
   272  	return p
   273  }
   274  
   275  // extractCgoDirectives filters only the lines containing #cgo directives from the input,
   276  // which is the comment on import "C".
   277  func extractCgoDirectives(doc string) []string {
   278  	var out []string
   279  	for _, line := range strings.Split(doc, "\n") {
   280  		// Line is
   281  		//	#cgo [GOOS/GOARCH...] LDFLAGS: stuff
   282  		//
   283  		line = strings.TrimSpace(line)
   284  		if len(line) < 5 || line[:4] != "#cgo" || (line[4] != ' ' && line[4] != '\t') {
   285  			continue
   286  		}
   287  
   288  		out = append(out, line)
   289  	}
   290  	return out
   291  }