github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/go/modindex/build_read.go (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file is a lightly modified copy go/build/read.go with unused parts
     6  // removed.
     7  
     8  package modindex
     9  
    10  import (
    11  	"bufio"
    12  	"bytes"
    13  	"errors"
    14  	"fmt"
    15  	"go/ast"
    16  	"go/build"
    17  	"go/parser"
    18  	"go/token"
    19  	"io"
    20  	"strconv"
    21  	"strings"
    22  	"unicode"
    23  	"unicode/utf8"
    24  )
    25  
    26  type importReader struct {
    27  	b    *bufio.Reader
    28  	buf  []byte
    29  	peek byte
    30  	err  error
    31  	eof  bool
    32  	nerr int
    33  	pos  token.Position
    34  }
    35  
    36  var bom = []byte{0xef, 0xbb, 0xbf}
    37  
    38  func newImportReader(name string, r io.Reader) *importReader {
    39  	b := bufio.NewReader(r)
    40  	// Remove leading UTF-8 BOM.
    41  	// Per https://golang.org/ref/spec#Source_code_representation:
    42  	// a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
    43  	// if it is the first Unicode code point in the source text.
    44  	if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
    45  		b.Discard(3)
    46  	}
    47  	return &importReader{
    48  		b: b,
    49  		pos: token.Position{
    50  			Filename: name,
    51  			Line:     1,
    52  			Column:   1,
    53  		},
    54  	}
    55  }
    56  
    57  func isIdent(c byte) bool {
    58  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
    59  }
    60  
    61  var (
    62  	errSyntax = errors.New("syntax error")
    63  	errNUL    = errors.New("unexpected NUL in input")
    64  )
    65  
    66  // syntaxError records a syntax error, but only if an I/O error has not already been recorded.
    67  func (r *importReader) syntaxError() {
    68  	if r.err == nil {
    69  		r.err = errSyntax
    70  	}
    71  }
    72  
    73  // readByte reads the next byte from the input, saves it in buf, and returns it.
    74  // If an error occurs, readByte records the error in r.err and returns 0.
    75  func (r *importReader) readByte() byte {
    76  	c, err := r.b.ReadByte()
    77  	if err == nil {
    78  		r.buf = append(r.buf, c)
    79  		if c == 0 {
    80  			err = errNUL
    81  		}
    82  	}
    83  	if err != nil {
    84  		if err == io.EOF {
    85  			r.eof = true
    86  		} else if r.err == nil {
    87  			r.err = err
    88  		}
    89  		c = 0
    90  	}
    91  	return c
    92  }
    93  
    94  // readByteNoBuf is like readByte but doesn't buffer the byte.
    95  // It exhausts r.buf before reading from r.b.
    96  func (r *importReader) readByteNoBuf() byte {
    97  	var c byte
    98  	var err error
    99  	if len(r.buf) > 0 {
   100  		c = r.buf[0]
   101  		r.buf = r.buf[1:]
   102  	} else {
   103  		c, err = r.b.ReadByte()
   104  		if err == nil && c == 0 {
   105  			err = errNUL
   106  		}
   107  	}
   108  
   109  	if err != nil {
   110  		if err == io.EOF {
   111  			r.eof = true
   112  		} else if r.err == nil {
   113  			r.err = err
   114  		}
   115  		return 0
   116  	}
   117  	r.pos.Offset++
   118  	if c == '\n' {
   119  		r.pos.Line++
   120  		r.pos.Column = 1
   121  	} else {
   122  		r.pos.Column++
   123  	}
   124  	return c
   125  }
   126  
   127  // peekByte returns the next byte from the input reader but does not advance beyond it.
   128  // If skipSpace is set, peekByte skips leading spaces and comments.
   129  func (r *importReader) peekByte(skipSpace bool) byte {
   130  	if r.err != nil {
   131  		if r.nerr++; r.nerr > 10000 {
   132  			panic("go/build: import reader looping")
   133  		}
   134  		return 0
   135  	}
   136  
   137  	// Use r.peek as first input byte.
   138  	// Don't just return r.peek here: it might have been left by peekByte(false)
   139  	// and this might be peekByte(true).
   140  	c := r.peek
   141  	if c == 0 {
   142  		c = r.readByte()
   143  	}
   144  	for r.err == nil && !r.eof {
   145  		if skipSpace {
   146  			// For the purposes of this reader, semicolons are never necessary to
   147  			// understand the input and are treated as spaces.
   148  			switch c {
   149  			case ' ', '\f', '\t', '\r', '\n', ';':
   150  				c = r.readByte()
   151  				continue
   152  
   153  			case '/':
   154  				c = r.readByte()
   155  				if c == '/' {
   156  					for c != '\n' && r.err == nil && !r.eof {
   157  						c = r.readByte()
   158  					}
   159  				} else if c == '*' {
   160  					var c1 byte
   161  					for (c != '*' || c1 != '/') && r.err == nil {
   162  						if r.eof {
   163  							r.syntaxError()
   164  						}
   165  						c, c1 = c1, r.readByte()
   166  					}
   167  				} else {
   168  					r.syntaxError()
   169  				}
   170  				c = r.readByte()
   171  				continue
   172  			}
   173  		}
   174  		break
   175  	}
   176  	r.peek = c
   177  	return r.peek
   178  }
   179  
   180  // nextByte is like peekByte but advances beyond the returned byte.
   181  func (r *importReader) nextByte(skipSpace bool) byte {
   182  	c := r.peekByte(skipSpace)
   183  	r.peek = 0
   184  	return c
   185  }
   186  
   187  var goEmbed = []byte("go:embed")
   188  
   189  // findEmbed advances the input reader to the next //go:embed comment.
   190  // It reports whether it found a comment.
   191  // (Otherwise it found an error or EOF.)
   192  func (r *importReader) findEmbed(first bool) bool {
   193  	// The import block scan stopped after a non-space character,
   194  	// so the reader is not at the start of a line on the first call.
   195  	// After that, each //go:embed extraction leaves the reader
   196  	// at the end of a line.
   197  	startLine := !first
   198  	var c byte
   199  	for r.err == nil && !r.eof {
   200  		c = r.readByteNoBuf()
   201  	Reswitch:
   202  		switch c {
   203  		default:
   204  			startLine = false
   205  
   206  		case '\n':
   207  			startLine = true
   208  
   209  		case ' ', '\t':
   210  			// leave startLine alone
   211  
   212  		case '"':
   213  			startLine = false
   214  			for r.err == nil {
   215  				if r.eof {
   216  					r.syntaxError()
   217  				}
   218  				c = r.readByteNoBuf()
   219  				if c == '\\' {
   220  					r.readByteNoBuf()
   221  					if r.err != nil {
   222  						r.syntaxError()
   223  						return false
   224  					}
   225  					continue
   226  				}
   227  				if c == '"' {
   228  					c = r.readByteNoBuf()
   229  					goto Reswitch
   230  				}
   231  			}
   232  			goto Reswitch
   233  
   234  		case '`':
   235  			startLine = false
   236  			for r.err == nil {
   237  				if r.eof {
   238  					r.syntaxError()
   239  				}
   240  				c = r.readByteNoBuf()
   241  				if c == '`' {
   242  					c = r.readByteNoBuf()
   243  					goto Reswitch
   244  				}
   245  			}
   246  
   247  		case '\'':
   248  			startLine = false
   249  			for r.err == nil {
   250  				if r.eof {
   251  					r.syntaxError()
   252  				}
   253  				c = r.readByteNoBuf()
   254  				if c == '\\' {
   255  					r.readByteNoBuf()
   256  					if r.err != nil {
   257  						r.syntaxError()
   258  						return false
   259  					}
   260  					continue
   261  				}
   262  				if c == '\'' {
   263  					c = r.readByteNoBuf()
   264  					goto Reswitch
   265  				}
   266  			}
   267  
   268  		case '/':
   269  			c = r.readByteNoBuf()
   270  			switch c {
   271  			default:
   272  				startLine = false
   273  				goto Reswitch
   274  
   275  			case '*':
   276  				var c1 byte
   277  				for (c != '*' || c1 != '/') && r.err == nil {
   278  					if r.eof {
   279  						r.syntaxError()
   280  					}
   281  					c, c1 = c1, r.readByteNoBuf()
   282  				}
   283  				startLine = false
   284  
   285  			case '/':
   286  				if startLine {
   287  					// Try to read this as a //go:embed comment.
   288  					for i := range goEmbed {
   289  						c = r.readByteNoBuf()
   290  						if c != goEmbed[i] {
   291  							goto SkipSlashSlash
   292  						}
   293  					}
   294  					c = r.readByteNoBuf()
   295  					if c == ' ' || c == '\t' {
   296  						// Found one!
   297  						return true
   298  					}
   299  				}
   300  			SkipSlashSlash:
   301  				for c != '\n' && r.err == nil && !r.eof {
   302  					c = r.readByteNoBuf()
   303  				}
   304  				startLine = true
   305  			}
   306  		}
   307  	}
   308  	return false
   309  }
   310  
   311  // readKeyword reads the given keyword from the input.
   312  // If the keyword is not present, readKeyword records a syntax error.
   313  func (r *importReader) readKeyword(kw string) {
   314  	r.peekByte(true)
   315  	for i := 0; i < len(kw); i++ {
   316  		if r.nextByte(false) != kw[i] {
   317  			r.syntaxError()
   318  			return
   319  		}
   320  	}
   321  	if isIdent(r.peekByte(false)) {
   322  		r.syntaxError()
   323  	}
   324  }
   325  
   326  // readIdent reads an identifier from the input.
   327  // If an identifier is not present, readIdent records a syntax error.
   328  func (r *importReader) readIdent() {
   329  	c := r.peekByte(true)
   330  	if !isIdent(c) {
   331  		r.syntaxError()
   332  		return
   333  	}
   334  	for isIdent(r.peekByte(false)) {
   335  		r.peek = 0
   336  	}
   337  }
   338  
   339  // readString reads a quoted string literal from the input.
   340  // If an identifier is not present, readString records a syntax error.
   341  func (r *importReader) readString() {
   342  	switch r.nextByte(true) {
   343  	case '`':
   344  		for r.err == nil {
   345  			if r.nextByte(false) == '`' {
   346  				break
   347  			}
   348  			if r.eof {
   349  				r.syntaxError()
   350  			}
   351  		}
   352  	case '"':
   353  		for r.err == nil {
   354  			c := r.nextByte(false)
   355  			if c == '"' {
   356  				break
   357  			}
   358  			if r.eof || c == '\n' {
   359  				r.syntaxError()
   360  			}
   361  			if c == '\\' {
   362  				r.nextByte(false)
   363  			}
   364  		}
   365  	default:
   366  		r.syntaxError()
   367  	}
   368  }
   369  
   370  // readImport reads an import clause - optional identifier followed by quoted string -
   371  // from the input.
   372  func (r *importReader) readImport() {
   373  	c := r.peekByte(true)
   374  	if c == '.' {
   375  		r.peek = 0
   376  	} else if isIdent(c) {
   377  		r.readIdent()
   378  	}
   379  	r.readString()
   380  }
   381  
   382  // readComments is like io.ReadAll, except that it only reads the leading
   383  // block of comments in the file.
   384  func readComments(f io.Reader) ([]byte, error) {
   385  	r := newImportReader("", f)
   386  	r.peekByte(true)
   387  	if r.err == nil && !r.eof {
   388  		// Didn't reach EOF, so must have found a non-space byte. Remove it.
   389  		r.buf = r.buf[:len(r.buf)-1]
   390  	}
   391  	return r.buf, r.err
   392  }
   393  
   394  // readGoInfo expects a Go file as input and reads the file up to and including the import section.
   395  // It records what it learned in *info.
   396  // If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr,
   397  // info.imports and info.embeds.
   398  //
   399  // It only returns an error if there are problems reading the file,
   400  // not for syntax errors in the file itself.
   401  func readGoInfo(f io.Reader, info *fileInfo) error {
   402  	r := newImportReader(info.name, f)
   403  
   404  	r.readKeyword("package")
   405  	r.readIdent()
   406  	for r.peekByte(true) == 'i' {
   407  		r.readKeyword("import")
   408  		if r.peekByte(true) == '(' {
   409  			r.nextByte(false)
   410  			for r.peekByte(true) != ')' && r.err == nil {
   411  				r.readImport()
   412  			}
   413  			r.nextByte(false)
   414  		} else {
   415  			r.readImport()
   416  		}
   417  	}
   418  
   419  	info.header = r.buf
   420  
   421  	// If we stopped successfully before EOF, we read a byte that told us we were done.
   422  	// Return all but that last byte, which would cause a syntax error if we let it through.
   423  	if r.err == nil && !r.eof {
   424  		info.header = r.buf[:len(r.buf)-1]
   425  	}
   426  
   427  	// If we stopped for a syntax error, consume the whole file so that
   428  	// we are sure we don't change the errors that go/parser returns.
   429  	if r.err == errSyntax {
   430  		r.err = nil
   431  		for r.err == nil && !r.eof {
   432  			r.readByte()
   433  		}
   434  		info.header = r.buf
   435  	}
   436  	if r.err != nil {
   437  		return r.err
   438  	}
   439  
   440  	if info.fset == nil {
   441  		return nil
   442  	}
   443  
   444  	// Parse file header & record imports.
   445  	info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments)
   446  	if info.parseErr != nil {
   447  		return nil
   448  	}
   449  
   450  	hasEmbed := false
   451  	for _, decl := range info.parsed.Decls {
   452  		d, ok := decl.(*ast.GenDecl)
   453  		if !ok {
   454  			continue
   455  		}
   456  		for _, dspec := range d.Specs {
   457  			spec, ok := dspec.(*ast.ImportSpec)
   458  			if !ok {
   459  				continue
   460  			}
   461  			quoted := spec.Path.Value
   462  			path, err := strconv.Unquote(quoted)
   463  			if err != nil {
   464  				return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted)
   465  			}
   466  			if path == "embed" {
   467  				hasEmbed = true
   468  			}
   469  
   470  			doc := spec.Doc
   471  			if doc == nil && len(d.Specs) == 1 {
   472  				doc = d.Doc
   473  			}
   474  			info.imports = append(info.imports, fileImport{path, spec.Pos(), doc})
   475  		}
   476  	}
   477  
   478  	// Extract directives.
   479  	for _, group := range info.parsed.Comments {
   480  		if group.Pos() >= info.parsed.Package {
   481  			break
   482  		}
   483  		for _, c := range group.List {
   484  			if strings.HasPrefix(c.Text, "//go:") {
   485  				info.directives = append(info.directives, build.Directive{Text: c.Text, Pos: info.fset.Position(c.Slash)})
   486  			}
   487  		}
   488  	}
   489  
   490  	// If the file imports "embed",
   491  	// we have to look for //go:embed comments
   492  	// in the remainder of the file.
   493  	// The compiler will enforce the mapping of comments to
   494  	// declared variables. We just need to know the patterns.
   495  	// If there were //go:embed comments earlier in the file
   496  	// (near the package statement or imports), the compiler
   497  	// will reject them. They can be (and have already been) ignored.
   498  	if hasEmbed {
   499  		var line []byte
   500  		for first := true; r.findEmbed(first); first = false {
   501  			line = line[:0]
   502  			pos := r.pos
   503  			for {
   504  				c := r.readByteNoBuf()
   505  				if c == '\n' || r.err != nil || r.eof {
   506  					break
   507  				}
   508  				line = append(line, c)
   509  			}
   510  			// Add args if line is well-formed.
   511  			// Ignore badly-formed lines - the compiler will report them when it finds them,
   512  			// and we can pretend they are not there to help go list succeed with what it knows.
   513  			embs, err := parseGoEmbed(string(line), pos)
   514  			if err == nil {
   515  				info.embeds = append(info.embeds, embs...)
   516  			}
   517  		}
   518  	}
   519  
   520  	return nil
   521  }
   522  
   523  // parseGoEmbed parses the text following "//go:embed" to extract the glob patterns.
   524  // It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings.
   525  // This is based on a similar function in github.com/go-asm/go/cmd/compile/gc/noder.go;
   526  // this version calculates position information as well.
   527  func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) {
   528  	trimBytes := func(n int) {
   529  		pos.Offset += n
   530  		pos.Column += utf8.RuneCountInString(args[:n])
   531  		args = args[n:]
   532  	}
   533  	trimSpace := func() {
   534  		trim := strings.TrimLeftFunc(args, unicode.IsSpace)
   535  		trimBytes(len(args) - len(trim))
   536  	}
   537  
   538  	var list []fileEmbed
   539  	for trimSpace(); args != ""; trimSpace() {
   540  		var path string
   541  		pathPos := pos
   542  	Switch:
   543  		switch args[0] {
   544  		default:
   545  			i := len(args)
   546  			for j, c := range args {
   547  				if unicode.IsSpace(c) {
   548  					i = j
   549  					break
   550  				}
   551  			}
   552  			path = args[:i]
   553  			trimBytes(i)
   554  
   555  		case '`':
   556  			var ok bool
   557  			path, _, ok = strings.Cut(args[1:], "`")
   558  			if !ok {
   559  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   560  			}
   561  			trimBytes(1 + len(path) + 1)
   562  
   563  		case '"':
   564  			i := 1
   565  			for ; i < len(args); i++ {
   566  				if args[i] == '\\' {
   567  					i++
   568  					continue
   569  				}
   570  				if args[i] == '"' {
   571  					q, err := strconv.Unquote(args[:i+1])
   572  					if err != nil {
   573  						return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1])
   574  					}
   575  					path = q
   576  					trimBytes(i + 1)
   577  					break Switch
   578  				}
   579  			}
   580  			if i >= len(args) {
   581  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   582  			}
   583  		}
   584  
   585  		if args != "" {
   586  			r, _ := utf8.DecodeRuneInString(args)
   587  			if !unicode.IsSpace(r) {
   588  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   589  			}
   590  		}
   591  		list = append(list, fileEmbed{path, pathPos})
   592  	}
   593  	return list, nil
   594  }