github.com/bazelbuild/bazel-gazelle@v0.36.1-0.20240520142334-61b277ba6fed/language/go/embed.go (about)

     1  /* Copyright 2021 The Bazel Authors. All rights reserved.
     2  
     3  Licensed under the Apache License, Version 2.0 (the "License");
     4  you may not use this file except in compliance with the License.
     5  You may obtain a copy of the License at
     6  
     7     http://www.apache.org/licenses/LICENSE-2.0
     8  
     9  Unless required by applicable law or agreed to in writing, software
    10  distributed under the License is distributed on an "AS IS" BASIS,
    11  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  See the License for the specific language governing permissions and
    13  limitations under the License.
    14  */
    15  
    16  package golang
    17  
    18  import (
    19  	"fmt"
    20  	"log"
    21  	"os"
    22  	"path"
    23  	"path/filepath"
    24  	"strings"
    25  	"unicode/utf8"
    26  
    27  	"golang.org/x/mod/module"
    28  )
    29  
    30  // embedResolver maps go:embed patterns in source files to lists of files that
    31  // should appear in embedsrcs attributes.
    32  type embedResolver struct {
    33  	// files is a list of embeddable files and directory trees, rooted in the
    34  	// package directory.
    35  	files []*embeddableNode
    36  }
    37  
    38  type embeddableNode struct {
    39  	path    string
    40  	entries []*embeddableNode // non-nil for directories
    41  }
    42  
    43  func (f *embeddableNode) isDir() bool {
    44  	return f.entries != nil
    45  }
    46  
    47  func (f *embeddableNode) isHidden() bool {
    48  	base := path.Base(f.path)
    49  	return strings.HasPrefix(base, ".") || strings.HasPrefix(base, "_")
    50  }
    51  
    52  // newEmbedResolver builds a set of files that may be embedded. This is
    53  // approximately all files in a Bazel package including explicitly declared
    54  // generated files and files in subdirectories without build files.
    55  // Files in other Bazel packages are not listed, since it might not be possible
    56  // to reference those files if they aren't listed in an export_files
    57  // declaration.
    58  //
    59  // This function walks subdirectory trees and may be expensive. Don't call it
    60  // unless a go:embed directive is actually present.
    61  //
    62  // dir is the absolute path to the directory containing the embed directive.
    63  //
    64  // rel is the relative path from the workspace root to the same directory
    65  // (or "" if the directory is the workspace root itself).
    66  //
    67  // validBuildFileNames is the configured list of recognized build file names.
    68  // These are used to identify Bazel packages in subdirectories that Gazelle
    69  // did not visit.
    70  //
    71  // pkgRels is a set of relative paths from the workspace root to directories
    72  // that contain (or will contain) build files. It doesn't need to contain
    73  // entries for the entire workspace, but it should contain entries for
    74  // subdirectories processed earlier (this avoids redundant O(n^2) I/O).
    75  //
    76  // subdirs, regFiles, and genFiles are lists of subdirectories, regular files,
    77  // and declared generated files in dir, respectively.
    78  func newEmbedResolver(dir, rel string, validBuildFileNames []string, pkgRels map[string]bool, subdirs, regFiles, genFiles []string) *embedResolver {
    79  	root := &embeddableNode{entries: []*embeddableNode{}}
    80  	index := make(map[string]*embeddableNode)
    81  
    82  	var add func(string, bool) *embeddableNode
    83  	add = func(rel string, isDir bool) *embeddableNode {
    84  		if n := index[rel]; n != nil {
    85  			return n
    86  		}
    87  		dir := path.Dir(rel)
    88  		parent := root
    89  		if dir != "." {
    90  			parent = add(dir, true)
    91  		}
    92  		f := &embeddableNode{path: rel}
    93  		if isDir {
    94  			f.entries = []*embeddableNode{}
    95  		}
    96  		parent.entries = append(parent.entries, f)
    97  		index[rel] = f
    98  		return f
    99  	}
   100  
   101  	for _, fs := range [...][]string{regFiles, genFiles} {
   102  		for _, f := range fs {
   103  			if !isBadEmbedName(f) {
   104  				add(f, false)
   105  			}
   106  		}
   107  	}
   108  
   109  	for _, subdir := range subdirs {
   110  		err := filepath.Walk(filepath.Join(dir, subdir), func(p string, info os.FileInfo, err error) error {
   111  			if err != nil {
   112  				return err
   113  			}
   114  			fileRel, _ := filepath.Rel(dir, p)
   115  			fileRel = filepath.ToSlash(fileRel)
   116  			base := filepath.Base(p)
   117  			if !info.IsDir() {
   118  				if !isBadEmbedName(base) {
   119  					add(fileRel, false)
   120  					return nil
   121  				}
   122  				return nil
   123  			}
   124  			if isBadEmbedName(base) {
   125  				return filepath.SkipDir
   126  			}
   127  			if pkgRels[path.Join(rel, fileRel)] {
   128  				// Directory contains a Go package and will contain a build file,
   129  				// if it doesn't already.
   130  				return filepath.SkipDir
   131  			}
   132  			for _, name := range validBuildFileNames {
   133  				if bFileInfo, err := os.Stat(filepath.Join(p, name)); err == nil && !bFileInfo.IsDir() {
   134  					// Directory already contains a build file.
   135  					return filepath.SkipDir
   136  				}
   137  			}
   138  			add(fileRel, true)
   139  			return nil
   140  		})
   141  		if err != nil {
   142  			log.Printf("listing embeddable files in %s: %v", dir, err)
   143  		}
   144  	}
   145  
   146  	return &embedResolver{files: root.entries}
   147  }
   148  
   149  // resolve expands a single go:embed pattern into a list of files that should
   150  // be included in embedsrcs. Directory paths are not included in the returned
   151  // list. This means there's no way to embed an empty directory.
   152  func (er *embedResolver) resolve(embed fileEmbed) (list []string, err error) {
   153  	defer func() {
   154  		if err != nil {
   155  			err = fmt.Errorf("%v: pattern %s: %w", embed.pos, embed.path, err)
   156  		}
   157  	}()
   158  
   159  	glob := embed.path
   160  	all := strings.HasPrefix(embed.path, "all:")
   161  	if all {
   162  		glob = strings.TrimPrefix(embed.path, "all:")
   163  	}
   164  
   165  	// Check whether the pattern is valid at all.
   166  	if _, err := path.Match(glob, ""); err != nil || !validEmbedPattern(glob) {
   167  		return nil, fmt.Errorf("invalid pattern syntax")
   168  	}
   169  
   170  	// Match the pattern against each path in the tree. If the pattern matches a
   171  	// directory, we need to include each file in that directory, even if the file
   172  	// doesn't match the pattern separate. By default, hidden files (starting
   173  	// with . or _) are excluded but all: prefix forces them to be included.
   174  	//
   175  	// For example, the pattern "*" matches "a", ".b", and "_c". If "a" is a
   176  	// directory, we would include "a/d", even though it doesn't match "*". We
   177  	// would not include "a/.e".
   178  	var visit func(*embeddableNode, bool)
   179  	visit = func(f *embeddableNode, add bool) {
   180  		convertedPath := filepath.ToSlash(f.path)
   181  		match, _ := path.Match(glob, convertedPath)
   182  		add = match || (add && (!f.isHidden() || all))
   183  		if !f.isDir() {
   184  			if add {
   185  				list = append(list, convertedPath)
   186  			}
   187  			return
   188  		}
   189  		for _, e := range f.entries {
   190  			visit(e, add)
   191  		}
   192  	}
   193  	for _, f := range er.files {
   194  		visit(f, false)
   195  	}
   196  	if len(list) == 0 {
   197  		return nil, fmt.Errorf("matched no files")
   198  	}
   199  	return list, nil
   200  }
   201  
   202  // Copied from cmd/go/internal/load.validEmbedPattern.
   203  func validEmbedPattern(pattern string) bool {
   204  	return pattern != "." && fsValidPath(pattern)
   205  }
   206  
   207  // fsValidPath reports whether the given path name
   208  // is valid for use in a call to Open.
   209  //
   210  // Path names passed to open are UTF-8-encoded,
   211  // unrooted, slash-separated sequences of path elements, like “x/y/z”.
   212  // Path names must not contain an element that is “.” or “..” or the empty string,
   213  // except for the special case that the root directory is named “.”.
   214  // Paths must not start or end with a slash: “/x” and “x/” are invalid.
   215  //
   216  // Note that paths are slash-separated on all systems, even Windows.
   217  // Paths containing other characters such as backslash and colon
   218  // are accepted as valid, but those characters must never be
   219  // interpreted by an FS implementation as path element separators.
   220  //
   221  // Copied from io/fs.ValidPath to avoid making go1.16 a build-time dependency
   222  // for Gazelle.
   223  func fsValidPath(name string) bool {
   224  	if !utf8.ValidString(name) {
   225  		return false
   226  	}
   227  
   228  	if name == "." {
   229  		// special case
   230  		return true
   231  	}
   232  
   233  	// Iterate over elements in name, checking each.
   234  	for {
   235  		i := 0
   236  		for i < len(name) && name[i] != '/' {
   237  			i++
   238  		}
   239  		elem := name[:i]
   240  		if elem == "" || elem == "." || elem == ".." {
   241  			return false
   242  		}
   243  		if i == len(name) {
   244  			return true // reached clean ending
   245  		}
   246  		name = name[i+1:]
   247  	}
   248  }
   249  
   250  // isBadEmbedName reports whether name is the base name of a file that
   251  // can't or won't be included in modules and therefore shouldn't be treated
   252  // as existing for embedding.
   253  //
   254  // Copied from cmd/go/internal/load.isBadEmbedName.
   255  func isBadEmbedName(name string) bool {
   256  	if err := module.CheckFilePath(name); err != nil {
   257  		return true
   258  	}
   259  	switch name {
   260  	// Empty string should be impossible but make it bad.
   261  	case "":
   262  		return true
   263  	// Version control directories won't be present in module.
   264  	case ".bzr", ".hg", ".git", ".svn":
   265  		return true
   266  	}
   267  	return false
   268  }