golang.org/x/tools@v0.21.1-0.20240520172518-788d39e776b1/internal/pkgbits/decoder.go (about)

     1  // Copyright 2021 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package pkgbits
     6  
     7  import (
     8  	"encoding/binary"
     9  	"errors"
    10  	"fmt"
    11  	"go/constant"
    12  	"go/token"
    13  	"io"
    14  	"math/big"
    15  	"os"
    16  	"runtime"
    17  	"strings"
    18  )
    19  
    20  // A PkgDecoder provides methods for decoding a package's Unified IR
    21  // export data.
    22  type PkgDecoder struct {
    23  	// version is the file format version.
    24  	version uint32
    25  
    26  	// aliases determines whether types.Aliases should be created
    27  	aliases bool
    28  
    29  	// sync indicates whether the file uses sync markers.
    30  	sync bool
    31  
    32  	// pkgPath is the package path for the package to be decoded.
    33  	//
    34  	// TODO(mdempsky): Remove; unneeded since CL 391014.
    35  	pkgPath string
    36  
    37  	// elemData is the full data payload of the encoded package.
    38  	// Elements are densely and contiguously packed together.
    39  	//
    40  	// The last 8 bytes of elemData are the package fingerprint.
    41  	elemData string
    42  
    43  	// elemEnds stores the byte-offset end positions of element
    44  	// bitstreams within elemData.
    45  	//
    46  	// For example, element I's bitstream data starts at elemEnds[I-1]
    47  	// (or 0, if I==0) and ends at elemEnds[I].
    48  	//
    49  	// Note: elemEnds is indexed by absolute indices, not
    50  	// section-relative indices.
    51  	elemEnds []uint32
    52  
    53  	// elemEndsEnds stores the index-offset end positions of relocation
    54  	// sections within elemEnds.
    55  	//
    56  	// For example, section K's end positions start at elemEndsEnds[K-1]
    57  	// (or 0, if K==0) and end at elemEndsEnds[K].
    58  	elemEndsEnds [numRelocs]uint32
    59  
    60  	scratchRelocEnt []RelocEnt
    61  }
    62  
    63  // PkgPath returns the package path for the package
    64  //
    65  // TODO(mdempsky): Remove; unneeded since CL 391014.
    66  func (pr *PkgDecoder) PkgPath() string { return pr.pkgPath }
    67  
    68  // SyncMarkers reports whether pr uses sync markers.
    69  func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync }
    70  
    71  // NewPkgDecoder returns a PkgDecoder initialized to read the Unified
    72  // IR export data from input. pkgPath is the package path for the
    73  // compilation unit that produced the export data.
    74  //
    75  // TODO(mdempsky): Remove pkgPath parameter; unneeded since CL 391014.
    76  func NewPkgDecoder(pkgPath, input string) PkgDecoder {
    77  	pr := PkgDecoder{
    78  		pkgPath: pkgPath,
    79  		//aliases: aliases.Enabled(),
    80  	}
    81  
    82  	// TODO(mdempsky): Implement direct indexing of input string to
    83  	// avoid copying the position information.
    84  
    85  	r := strings.NewReader(input)
    86  
    87  	assert(binary.Read(r, binary.LittleEndian, &pr.version) == nil)
    88  
    89  	switch pr.version {
    90  	default:
    91  		panic(fmt.Errorf("unsupported version: %v", pr.version))
    92  	case 0:
    93  		// no flags
    94  	case 1:
    95  		var flags uint32
    96  		assert(binary.Read(r, binary.LittleEndian, &flags) == nil)
    97  		pr.sync = flags&flagSyncMarkers != 0
    98  	}
    99  
   100  	assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil)
   101  
   102  	pr.elemEnds = make([]uint32, pr.elemEndsEnds[len(pr.elemEndsEnds)-1])
   103  	assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil)
   104  
   105  	pos, err := r.Seek(0, io.SeekCurrent)
   106  	assert(err == nil)
   107  
   108  	pr.elemData = input[pos:]
   109  	assert(len(pr.elemData)-8 == int(pr.elemEnds[len(pr.elemEnds)-1]))
   110  
   111  	return pr
   112  }
   113  
   114  // NumElems returns the number of elements in section k.
   115  func (pr *PkgDecoder) NumElems(k RelocKind) int {
   116  	count := int(pr.elemEndsEnds[k])
   117  	if k > 0 {
   118  		count -= int(pr.elemEndsEnds[k-1])
   119  	}
   120  	return count
   121  }
   122  
   123  // TotalElems returns the total number of elements across all sections.
   124  func (pr *PkgDecoder) TotalElems() int {
   125  	return len(pr.elemEnds)
   126  }
   127  
   128  // Fingerprint returns the package fingerprint.
   129  func (pr *PkgDecoder) Fingerprint() [8]byte {
   130  	var fp [8]byte
   131  	copy(fp[:], pr.elemData[len(pr.elemData)-8:])
   132  	return fp
   133  }
   134  
   135  // AbsIdx returns the absolute index for the given (section, index)
   136  // pair.
   137  func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int {
   138  	absIdx := int(idx)
   139  	if k > 0 {
   140  		absIdx += int(pr.elemEndsEnds[k-1])
   141  	}
   142  	if absIdx >= int(pr.elemEndsEnds[k]) {
   143  		errorf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds)
   144  	}
   145  	return absIdx
   146  }
   147  
   148  // DataIdx returns the raw element bitstream for the given (section,
   149  // index) pair.
   150  func (pr *PkgDecoder) DataIdx(k RelocKind, idx Index) string {
   151  	absIdx := pr.AbsIdx(k, idx)
   152  
   153  	var start uint32
   154  	if absIdx > 0 {
   155  		start = pr.elemEnds[absIdx-1]
   156  	}
   157  	end := pr.elemEnds[absIdx]
   158  
   159  	return pr.elemData[start:end]
   160  }
   161  
   162  // StringIdx returns the string value for the given string index.
   163  func (pr *PkgDecoder) StringIdx(idx Index) string {
   164  	return pr.DataIdx(RelocString, idx)
   165  }
   166  
   167  // NewDecoder returns a Decoder for the given (section, index) pair,
   168  // and decodes the given SyncMarker from the element bitstream.
   169  func (pr *PkgDecoder) NewDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder {
   170  	r := pr.NewDecoderRaw(k, idx)
   171  	r.Sync(marker)
   172  	return r
   173  }
   174  
   175  // TempDecoder returns a Decoder for the given (section, index) pair,
   176  // and decodes the given SyncMarker from the element bitstream.
   177  // If possible the Decoder should be RetireDecoder'd when it is no longer
   178  // needed, this will avoid heap allocations.
   179  func (pr *PkgDecoder) TempDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder {
   180  	r := pr.TempDecoderRaw(k, idx)
   181  	r.Sync(marker)
   182  	return r
   183  }
   184  
   185  func (pr *PkgDecoder) RetireDecoder(d *Decoder) {
   186  	pr.scratchRelocEnt = d.Relocs
   187  	d.Relocs = nil
   188  }
   189  
   190  // NewDecoderRaw returns a Decoder for the given (section, index) pair.
   191  //
   192  // Most callers should use NewDecoder instead.
   193  func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder {
   194  	r := Decoder{
   195  		common: pr,
   196  		k:      k,
   197  		Idx:    idx,
   198  	}
   199  
   200  	// TODO(mdempsky) r.data.Reset(...) after #44505 is resolved.
   201  	r.Data = *strings.NewReader(pr.DataIdx(k, idx))
   202  
   203  	r.Sync(SyncRelocs)
   204  	r.Relocs = make([]RelocEnt, r.Len())
   205  	for i := range r.Relocs {
   206  		r.Sync(SyncReloc)
   207  		r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())}
   208  	}
   209  
   210  	return r
   211  }
   212  
   213  func (pr *PkgDecoder) TempDecoderRaw(k RelocKind, idx Index) Decoder {
   214  	r := Decoder{
   215  		common: pr,
   216  		k:      k,
   217  		Idx:    idx,
   218  	}
   219  
   220  	r.Data.Reset(pr.DataIdx(k, idx))
   221  	r.Sync(SyncRelocs)
   222  	l := r.Len()
   223  	if cap(pr.scratchRelocEnt) >= l {
   224  		r.Relocs = pr.scratchRelocEnt[:l]
   225  		pr.scratchRelocEnt = nil
   226  	} else {
   227  		r.Relocs = make([]RelocEnt, l)
   228  	}
   229  	for i := range r.Relocs {
   230  		r.Sync(SyncReloc)
   231  		r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())}
   232  	}
   233  
   234  	return r
   235  }
   236  
   237  // A Decoder provides methods for decoding an individual element's
   238  // bitstream data.
   239  type Decoder struct {
   240  	common *PkgDecoder
   241  
   242  	Relocs []RelocEnt
   243  	Data   strings.Reader
   244  
   245  	k   RelocKind
   246  	Idx Index
   247  }
   248  
   249  func (r *Decoder) checkErr(err error) {
   250  	if err != nil {
   251  		errorf("unexpected decoding error: %w", err)
   252  	}
   253  }
   254  
   255  func (r *Decoder) rawUvarint() uint64 {
   256  	x, err := readUvarint(&r.Data)
   257  	r.checkErr(err)
   258  	return x
   259  }
   260  
   261  // readUvarint is a type-specialized copy of encoding/binary.ReadUvarint.
   262  // This avoids the interface conversion and thus has better escape properties,
   263  // which flows up the stack.
   264  func readUvarint(r *strings.Reader) (uint64, error) {
   265  	var x uint64
   266  	var s uint
   267  	for i := 0; i < binary.MaxVarintLen64; i++ {
   268  		b, err := r.ReadByte()
   269  		if err != nil {
   270  			if i > 0 && err == io.EOF {
   271  				err = io.ErrUnexpectedEOF
   272  			}
   273  			return x, err
   274  		}
   275  		if b < 0x80 {
   276  			if i == binary.MaxVarintLen64-1 && b > 1 {
   277  				return x, overflow
   278  			}
   279  			return x | uint64(b)<<s, nil
   280  		}
   281  		x |= uint64(b&0x7f) << s
   282  		s += 7
   283  	}
   284  	return x, overflow
   285  }
   286  
   287  var overflow = errors.New("pkgbits: readUvarint overflows a 64-bit integer")
   288  
   289  func (r *Decoder) rawVarint() int64 {
   290  	ux := r.rawUvarint()
   291  
   292  	// Zig-zag decode.
   293  	x := int64(ux >> 1)
   294  	if ux&1 != 0 {
   295  		x = ^x
   296  	}
   297  	return x
   298  }
   299  
   300  func (r *Decoder) rawReloc(k RelocKind, idx int) Index {
   301  	e := r.Relocs[idx]
   302  	assert(e.Kind == k)
   303  	return e.Idx
   304  }
   305  
   306  // Sync decodes a sync marker from the element bitstream and asserts
   307  // that it matches the expected marker.
   308  //
   309  // If r.common.sync is false, then Sync is a no-op.
   310  func (r *Decoder) Sync(mWant SyncMarker) {
   311  	if !r.common.sync {
   312  		return
   313  	}
   314  
   315  	pos, _ := r.Data.Seek(0, io.SeekCurrent)
   316  	mHave := SyncMarker(r.rawUvarint())
   317  	writerPCs := make([]int, r.rawUvarint())
   318  	for i := range writerPCs {
   319  		writerPCs[i] = int(r.rawUvarint())
   320  	}
   321  
   322  	if mHave == mWant {
   323  		return
   324  	}
   325  
   326  	// There's some tension here between printing:
   327  	//
   328  	// (1) full file paths that tools can recognize (e.g., so emacs
   329  	//     hyperlinks the "file:line" text for easy navigation), or
   330  	//
   331  	// (2) short file paths that are easier for humans to read (e.g., by
   332  	//     omitting redundant or irrelevant details, so it's easier to
   333  	//     focus on the useful bits that remain).
   334  	//
   335  	// The current formatting favors the former, as it seems more
   336  	// helpful in practice. But perhaps the formatting could be improved
   337  	// to better address both concerns. For example, use relative file
   338  	// paths if they would be shorter, or rewrite file paths to contain
   339  	// "$GOROOT" (like objabi.AbsFile does) if tools can be taught how
   340  	// to reliably expand that again.
   341  
   342  	fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos)
   343  
   344  	fmt.Printf("\nfound %v, written at:\n", mHave)
   345  	if len(writerPCs) == 0 {
   346  		fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath)
   347  	}
   348  	for _, pc := range writerPCs {
   349  		fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(RelocString, pc)))
   350  	}
   351  
   352  	fmt.Printf("\nexpected %v, reading at:\n", mWant)
   353  	var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size?
   354  	n := runtime.Callers(2, readerPCs[:])
   355  	for _, pc := range fmtFrames(readerPCs[:n]...) {
   356  		fmt.Printf("\t%s\n", pc)
   357  	}
   358  
   359  	// We already printed a stack trace for the reader, so now we can
   360  	// simply exit. Printing a second one with panic or base.Fatalf
   361  	// would just be noise.
   362  	os.Exit(1)
   363  }
   364  
   365  // Bool decodes and returns a bool value from the element bitstream.
   366  func (r *Decoder) Bool() bool {
   367  	r.Sync(SyncBool)
   368  	x, err := r.Data.ReadByte()
   369  	r.checkErr(err)
   370  	assert(x < 2)
   371  	return x != 0
   372  }
   373  
   374  // Int64 decodes and returns an int64 value from the element bitstream.
   375  func (r *Decoder) Int64() int64 {
   376  	r.Sync(SyncInt64)
   377  	return r.rawVarint()
   378  }
   379  
   380  // Uint64 decodes and returns a uint64 value from the element bitstream.
   381  func (r *Decoder) Uint64() uint64 {
   382  	r.Sync(SyncUint64)
   383  	return r.rawUvarint()
   384  }
   385  
   386  // Len decodes and returns a non-negative int value from the element bitstream.
   387  func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v }
   388  
   389  // Int decodes and returns an int value from the element bitstream.
   390  func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v }
   391  
   392  // Uint decodes and returns a uint value from the element bitstream.
   393  func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v }
   394  
   395  // Code decodes a Code value from the element bitstream and returns
   396  // its ordinal value. It's the caller's responsibility to convert the
   397  // result to an appropriate Code type.
   398  //
   399  // TODO(mdempsky): Ideally this method would have signature "Code[T
   400  // Code] T" instead, but we don't allow generic methods and the
   401  // compiler can't depend on generics yet anyway.
   402  func (r *Decoder) Code(mark SyncMarker) int {
   403  	r.Sync(mark)
   404  	return r.Len()
   405  }
   406  
   407  // Reloc decodes a relocation of expected section k from the element
   408  // bitstream and returns an index to the referenced element.
   409  func (r *Decoder) Reloc(k RelocKind) Index {
   410  	r.Sync(SyncUseReloc)
   411  	return r.rawReloc(k, r.Len())
   412  }
   413  
   414  // String decodes and returns a string value from the element
   415  // bitstream.
   416  func (r *Decoder) String() string {
   417  	r.Sync(SyncString)
   418  	return r.common.StringIdx(r.Reloc(RelocString))
   419  }
   420  
   421  // Strings decodes and returns a variable-length slice of strings from
   422  // the element bitstream.
   423  func (r *Decoder) Strings() []string {
   424  	res := make([]string, r.Len())
   425  	for i := range res {
   426  		res[i] = r.String()
   427  	}
   428  	return res
   429  }
   430  
   431  // Value decodes and returns a constant.Value from the element
   432  // bitstream.
   433  func (r *Decoder) Value() constant.Value {
   434  	r.Sync(SyncValue)
   435  	isComplex := r.Bool()
   436  	val := r.scalar()
   437  	if isComplex {
   438  		val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar()))
   439  	}
   440  	return val
   441  }
   442  
   443  func (r *Decoder) scalar() constant.Value {
   444  	switch tag := CodeVal(r.Code(SyncVal)); tag {
   445  	default:
   446  		panic(fmt.Errorf("unexpected scalar tag: %v", tag))
   447  
   448  	case ValBool:
   449  		return constant.MakeBool(r.Bool())
   450  	case ValString:
   451  		return constant.MakeString(r.String())
   452  	case ValInt64:
   453  		return constant.MakeInt64(r.Int64())
   454  	case ValBigInt:
   455  		return constant.Make(r.bigInt())
   456  	case ValBigRat:
   457  		num := r.bigInt()
   458  		denom := r.bigInt()
   459  		return constant.Make(new(big.Rat).SetFrac(num, denom))
   460  	case ValBigFloat:
   461  		return constant.Make(r.bigFloat())
   462  	}
   463  }
   464  
   465  func (r *Decoder) bigInt() *big.Int {
   466  	v := new(big.Int).SetBytes([]byte(r.String()))
   467  	if r.Bool() {
   468  		v.Neg(v)
   469  	}
   470  	return v
   471  }
   472  
   473  func (r *Decoder) bigFloat() *big.Float {
   474  	v := new(big.Float).SetPrec(512)
   475  	assert(v.UnmarshalText([]byte(r.String())) == nil)
   476  	return v
   477  }
   478  
   479  // @@@ Helpers
   480  
   481  // TODO(mdempsky): These should probably be removed. I think they're a
   482  // smell that the export data format is not yet quite right.
   483  
   484  // PeekPkgPath returns the package path for the specified package
   485  // index.
   486  func (pr *PkgDecoder) PeekPkgPath(idx Index) string {
   487  	var path string
   488  	{
   489  		r := pr.TempDecoder(RelocPkg, idx, SyncPkgDef)
   490  		path = r.String()
   491  		pr.RetireDecoder(&r)
   492  	}
   493  	if path == "" {
   494  		path = pr.pkgPath
   495  	}
   496  	return path
   497  }
   498  
   499  // PeekObj returns the package path, object name, and CodeObj for the
   500  // specified object index.
   501  func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) {
   502  	var ridx Index
   503  	var name string
   504  	var rcode int
   505  	{
   506  		r := pr.TempDecoder(RelocName, idx, SyncObject1)
   507  		r.Sync(SyncSym)
   508  		r.Sync(SyncPkg)
   509  		ridx = r.Reloc(RelocPkg)
   510  		name = r.String()
   511  		rcode = r.Code(SyncCodeObj)
   512  		pr.RetireDecoder(&r)
   513  	}
   514  
   515  	path := pr.PeekPkgPath(ridx)
   516  	assert(name != "")
   517  
   518  	tag := CodeObj(rcode)
   519  
   520  	return path, name, tag
   521  }