golang.org/x/tools@v0.21.1-0.20240520172518-788d39e776b1/internal/pkgbits/encoder.go (about)

     1  // Copyright 2021 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package pkgbits
     6  
     7  import (
     8  	"bytes"
     9  	"crypto/md5"
    10  	"encoding/binary"
    11  	"go/constant"
    12  	"io"
    13  	"math/big"
    14  	"runtime"
    15  )
    16  
    17  // currentVersion is the current version number.
    18  //
    19  //   - v0: initial prototype
    20  //
    21  //   - v1: adds the flags uint32 word
    22  const currentVersion uint32 = 1
    23  
    24  // A PkgEncoder provides methods for encoding a package's Unified IR
    25  // export data.
    26  type PkgEncoder struct {
    27  	// elems holds the bitstream for previously encoded elements.
    28  	elems [numRelocs][]string
    29  
    30  	// stringsIdx maps previously encoded strings to their index within
    31  	// the RelocString section, to allow deduplication. That is,
    32  	// elems[RelocString][stringsIdx[s]] == s (if present).
    33  	stringsIdx map[string]Index
    34  
    35  	// syncFrames is the number of frames to write at each sync
    36  	// marker. A negative value means sync markers are omitted.
    37  	syncFrames int
    38  }
    39  
    40  // SyncMarkers reports whether pw uses sync markers.
    41  func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 }
    42  
    43  // NewPkgEncoder returns an initialized PkgEncoder.
    44  //
    45  // syncFrames is the number of caller frames that should be serialized
    46  // at Sync points. Serializing additional frames results in larger
    47  // export data files, but can help diagnosing desync errors in
    48  // higher-level Unified IR reader/writer code. If syncFrames is
    49  // negative, then sync markers are omitted entirely.
    50  func NewPkgEncoder(syncFrames int) PkgEncoder {
    51  	return PkgEncoder{
    52  		stringsIdx: make(map[string]Index),
    53  		syncFrames: syncFrames,
    54  	}
    55  }
    56  
    57  // DumpTo writes the package's encoded data to out0 and returns the
    58  // package fingerprint.
    59  func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) {
    60  	h := md5.New()
    61  	out := io.MultiWriter(out0, h)
    62  
    63  	writeUint32 := func(x uint32) {
    64  		assert(binary.Write(out, binary.LittleEndian, x) == nil)
    65  	}
    66  
    67  	writeUint32(currentVersion)
    68  
    69  	var flags uint32
    70  	if pw.SyncMarkers() {
    71  		flags |= flagSyncMarkers
    72  	}
    73  	writeUint32(flags)
    74  
    75  	// Write elemEndsEnds.
    76  	var sum uint32
    77  	for _, elems := range &pw.elems {
    78  		sum += uint32(len(elems))
    79  		writeUint32(sum)
    80  	}
    81  
    82  	// Write elemEnds.
    83  	sum = 0
    84  	for _, elems := range &pw.elems {
    85  		for _, elem := range elems {
    86  			sum += uint32(len(elem))
    87  			writeUint32(sum)
    88  		}
    89  	}
    90  
    91  	// Write elemData.
    92  	for _, elems := range &pw.elems {
    93  		for _, elem := range elems {
    94  			_, err := io.WriteString(out, elem)
    95  			assert(err == nil)
    96  		}
    97  	}
    98  
    99  	// Write fingerprint.
   100  	copy(fingerprint[:], h.Sum(nil))
   101  	_, err := out0.Write(fingerprint[:])
   102  	assert(err == nil)
   103  
   104  	return
   105  }
   106  
   107  // StringIdx adds a string value to the strings section, if not
   108  // already present, and returns its index.
   109  func (pw *PkgEncoder) StringIdx(s string) Index {
   110  	if idx, ok := pw.stringsIdx[s]; ok {
   111  		assert(pw.elems[RelocString][idx] == s)
   112  		return idx
   113  	}
   114  
   115  	idx := Index(len(pw.elems[RelocString]))
   116  	pw.elems[RelocString] = append(pw.elems[RelocString], s)
   117  	pw.stringsIdx[s] = idx
   118  	return idx
   119  }
   120  
   121  // NewEncoder returns an Encoder for a new element within the given
   122  // section, and encodes the given SyncMarker as the start of the
   123  // element bitstream.
   124  func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder {
   125  	e := pw.NewEncoderRaw(k)
   126  	e.Sync(marker)
   127  	return e
   128  }
   129  
   130  // NewEncoderRaw returns an Encoder for a new element within the given
   131  // section.
   132  //
   133  // Most callers should use NewEncoder instead.
   134  func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder {
   135  	idx := Index(len(pw.elems[k]))
   136  	pw.elems[k] = append(pw.elems[k], "") // placeholder
   137  
   138  	return Encoder{
   139  		p:   pw,
   140  		k:   k,
   141  		Idx: idx,
   142  	}
   143  }
   144  
   145  // An Encoder provides methods for encoding an individual element's
   146  // bitstream data.
   147  type Encoder struct {
   148  	p *PkgEncoder
   149  
   150  	Relocs   []RelocEnt
   151  	RelocMap map[RelocEnt]uint32
   152  	Data     bytes.Buffer // accumulated element bitstream data
   153  
   154  	encodingRelocHeader bool
   155  
   156  	k   RelocKind
   157  	Idx Index // index within relocation section
   158  }
   159  
   160  // Flush finalizes the element's bitstream and returns its Index.
   161  func (w *Encoder) Flush() Index {
   162  	var sb bytes.Buffer // TODO(mdempsky): strings.Builder after #44505 is resolved
   163  
   164  	// Backup the data so we write the relocations at the front.
   165  	var tmp bytes.Buffer
   166  	io.Copy(&tmp, &w.Data)
   167  
   168  	// TODO(mdempsky): Consider writing these out separately so they're
   169  	// easier to strip, along with function bodies, so that we can prune
   170  	// down to just the data that's relevant to go/types.
   171  	if w.encodingRelocHeader {
   172  		panic("encodingRelocHeader already true; recursive flush?")
   173  	}
   174  	w.encodingRelocHeader = true
   175  	w.Sync(SyncRelocs)
   176  	w.Len(len(w.Relocs))
   177  	for _, rEnt := range w.Relocs {
   178  		w.Sync(SyncReloc)
   179  		w.Len(int(rEnt.Kind))
   180  		w.Len(int(rEnt.Idx))
   181  	}
   182  
   183  	io.Copy(&sb, &w.Data)
   184  	io.Copy(&sb, &tmp)
   185  	w.p.elems[w.k][w.Idx] = sb.String()
   186  
   187  	return w.Idx
   188  }
   189  
   190  func (w *Encoder) checkErr(err error) {
   191  	if err != nil {
   192  		errorf("unexpected encoding error: %v", err)
   193  	}
   194  }
   195  
   196  func (w *Encoder) rawUvarint(x uint64) {
   197  	var buf [binary.MaxVarintLen64]byte
   198  	n := binary.PutUvarint(buf[:], x)
   199  	_, err := w.Data.Write(buf[:n])
   200  	w.checkErr(err)
   201  }
   202  
   203  func (w *Encoder) rawVarint(x int64) {
   204  	// Zig-zag encode.
   205  	ux := uint64(x) << 1
   206  	if x < 0 {
   207  		ux = ^ux
   208  	}
   209  
   210  	w.rawUvarint(ux)
   211  }
   212  
   213  func (w *Encoder) rawReloc(r RelocKind, idx Index) int {
   214  	e := RelocEnt{r, idx}
   215  	if w.RelocMap != nil {
   216  		if i, ok := w.RelocMap[e]; ok {
   217  			return int(i)
   218  		}
   219  	} else {
   220  		w.RelocMap = make(map[RelocEnt]uint32)
   221  	}
   222  
   223  	i := len(w.Relocs)
   224  	w.RelocMap[e] = uint32(i)
   225  	w.Relocs = append(w.Relocs, e)
   226  	return i
   227  }
   228  
   229  func (w *Encoder) Sync(m SyncMarker) {
   230  	if !w.p.SyncMarkers() {
   231  		return
   232  	}
   233  
   234  	// Writing out stack frame string references requires working
   235  	// relocations, but writing out the relocations themselves involves
   236  	// sync markers. To prevent infinite recursion, we simply trim the
   237  	// stack frame for sync markers within the relocation header.
   238  	var frames []string
   239  	if !w.encodingRelocHeader && w.p.syncFrames > 0 {
   240  		pcs := make([]uintptr, w.p.syncFrames)
   241  		n := runtime.Callers(2, pcs)
   242  		frames = fmtFrames(pcs[:n]...)
   243  	}
   244  
   245  	// TODO(mdempsky): Save space by writing out stack frames as a
   246  	// linked list so we can share common stack frames.
   247  	w.rawUvarint(uint64(m))
   248  	w.rawUvarint(uint64(len(frames)))
   249  	for _, frame := range frames {
   250  		w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame))))
   251  	}
   252  }
   253  
   254  // Bool encodes and writes a bool value into the element bitstream,
   255  // and then returns the bool value.
   256  //
   257  // For simple, 2-alternative encodings, the idiomatic way to call Bool
   258  // is something like:
   259  //
   260  //	if w.Bool(x != 0) {
   261  //		// alternative #1
   262  //	} else {
   263  //		// alternative #2
   264  //	}
   265  //
   266  // For multi-alternative encodings, use Code instead.
   267  func (w *Encoder) Bool(b bool) bool {
   268  	w.Sync(SyncBool)
   269  	var x byte
   270  	if b {
   271  		x = 1
   272  	}
   273  	err := w.Data.WriteByte(x)
   274  	w.checkErr(err)
   275  	return b
   276  }
   277  
   278  // Int64 encodes and writes an int64 value into the element bitstream.
   279  func (w *Encoder) Int64(x int64) {
   280  	w.Sync(SyncInt64)
   281  	w.rawVarint(x)
   282  }
   283  
   284  // Uint64 encodes and writes a uint64 value into the element bitstream.
   285  func (w *Encoder) Uint64(x uint64) {
   286  	w.Sync(SyncUint64)
   287  	w.rawUvarint(x)
   288  }
   289  
   290  // Len encodes and writes a non-negative int value into the element bitstream.
   291  func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) }
   292  
   293  // Int encodes and writes an int value into the element bitstream.
   294  func (w *Encoder) Int(x int) { w.Int64(int64(x)) }
   295  
   296  // Uint encodes and writes a uint value into the element bitstream.
   297  func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) }
   298  
   299  // Reloc encodes and writes a relocation for the given (section,
   300  // index) pair into the element bitstream.
   301  //
   302  // Note: Only the index is formally written into the element
   303  // bitstream, so bitstream decoders must know from context which
   304  // section an encoded relocation refers to.
   305  func (w *Encoder) Reloc(r RelocKind, idx Index) {
   306  	w.Sync(SyncUseReloc)
   307  	w.Len(w.rawReloc(r, idx))
   308  }
   309  
   310  // Code encodes and writes a Code value into the element bitstream.
   311  func (w *Encoder) Code(c Code) {
   312  	w.Sync(c.Marker())
   313  	w.Len(c.Value())
   314  }
   315  
   316  // String encodes and writes a string value into the element
   317  // bitstream.
   318  //
   319  // Internally, strings are deduplicated by adding them to the strings
   320  // section (if not already present), and then writing a relocation
   321  // into the element bitstream.
   322  func (w *Encoder) String(s string) {
   323  	w.Sync(SyncString)
   324  	w.Reloc(RelocString, w.p.StringIdx(s))
   325  }
   326  
   327  // Strings encodes and writes a variable-length slice of strings into
   328  // the element bitstream.
   329  func (w *Encoder) Strings(ss []string) {
   330  	w.Len(len(ss))
   331  	for _, s := range ss {
   332  		w.String(s)
   333  	}
   334  }
   335  
   336  // Value encodes and writes a constant.Value into the element
   337  // bitstream.
   338  func (w *Encoder) Value(val constant.Value) {
   339  	w.Sync(SyncValue)
   340  	if w.Bool(val.Kind() == constant.Complex) {
   341  		w.scalar(constant.Real(val))
   342  		w.scalar(constant.Imag(val))
   343  	} else {
   344  		w.scalar(val)
   345  	}
   346  }
   347  
   348  func (w *Encoder) scalar(val constant.Value) {
   349  	switch v := constant.Val(val).(type) {
   350  	default:
   351  		errorf("unhandled %v (%v)", val, val.Kind())
   352  	case bool:
   353  		w.Code(ValBool)
   354  		w.Bool(v)
   355  	case string:
   356  		w.Code(ValString)
   357  		w.String(v)
   358  	case int64:
   359  		w.Code(ValInt64)
   360  		w.Int64(v)
   361  	case *big.Int:
   362  		w.Code(ValBigInt)
   363  		w.bigInt(v)
   364  	case *big.Rat:
   365  		w.Code(ValBigRat)
   366  		w.bigInt(v.Num())
   367  		w.bigInt(v.Denom())
   368  	case *big.Float:
   369  		w.Code(ValBigFloat)
   370  		w.bigFloat(v)
   371  	}
   372  }
   373  
   374  func (w *Encoder) bigInt(v *big.Int) {
   375  	b := v.Bytes()
   376  	w.String(string(b)) // TODO: More efficient encoding.
   377  	w.Bool(v.Sign() < 0)
   378  }
   379  
   380  func (w *Encoder) bigFloat(v *big.Float) {
   381  	b := v.Append(nil, 'p', -1)
   382  	w.String(string(b)) // TODO: More efficient encoding.
   383  }