github.com/JimmyHuang454/JLS-go@v0.0.0-20230831150107-90d536585ba0/internal/pkgbits/encoder.go (about) 1 // Copyright 2021 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package pkgbits 6 7 import ( 8 "bytes" 9 "crypto/md5" 10 "encoding/binary" 11 "go/constant" 12 "io" 13 "math/big" 14 "runtime" 15 "strings" 16 ) 17 18 // currentVersion is the current version number. 19 // 20 // - v0: initial prototype 21 // 22 // - v1: adds the flags uint32 word 23 // 24 // TODO(mdempsky): For the next version bump: 25 // - remove the legacy "has init" bool from the public root 26 // - remove obj's "derived func instance" bool 27 const currentVersion uint32 = 1 28 29 // A PkgEncoder provides methods for encoding a package's Unified IR 30 // export data. 31 type PkgEncoder struct { 32 // elems holds the bitstream for previously encoded elements. 33 elems [numRelocs][]string 34 35 // stringsIdx maps previously encoded strings to their index within 36 // the RelocString section, to allow deduplication. That is, 37 // elems[RelocString][stringsIdx[s]] == s (if present). 38 stringsIdx map[string]Index 39 40 // syncFrames is the number of frames to write at each sync 41 // marker. A negative value means sync markers are omitted. 42 syncFrames int 43 } 44 45 // SyncMarkers reports whether pw uses sync markers. 46 func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 } 47 48 // NewPkgEncoder returns an initialized PkgEncoder. 49 // 50 // syncFrames is the number of caller frames that should be serialized 51 // at Sync points. Serializing additional frames results in larger 52 // export data files, but can help diagnosing desync errors in 53 // higher-level Unified IR reader/writer code. If syncFrames is 54 // negative, then sync markers are omitted entirely. 55 func NewPkgEncoder(syncFrames int) PkgEncoder { 56 return PkgEncoder{ 57 stringsIdx: make(map[string]Index), 58 syncFrames: syncFrames, 59 } 60 } 61 62 // DumpTo writes the package's encoded data to out0 and returns the 63 // package fingerprint. 64 func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) { 65 h := md5.New() 66 out := io.MultiWriter(out0, h) 67 68 writeUint32 := func(x uint32) { 69 assert(binary.Write(out, binary.LittleEndian, x) == nil) 70 } 71 72 writeUint32(currentVersion) 73 74 var flags uint32 75 if pw.SyncMarkers() { 76 flags |= flagSyncMarkers 77 } 78 writeUint32(flags) 79 80 // Write elemEndsEnds. 81 var sum uint32 82 for _, elems := range &pw.elems { 83 sum += uint32(len(elems)) 84 writeUint32(sum) 85 } 86 87 // Write elemEnds. 88 sum = 0 89 for _, elems := range &pw.elems { 90 for _, elem := range elems { 91 sum += uint32(len(elem)) 92 writeUint32(sum) 93 } 94 } 95 96 // Write elemData. 97 for _, elems := range &pw.elems { 98 for _, elem := range elems { 99 _, err := io.WriteString(out, elem) 100 assert(err == nil) 101 } 102 } 103 104 // Write fingerprint. 105 copy(fingerprint[:], h.Sum(nil)) 106 _, err := out0.Write(fingerprint[:]) 107 assert(err == nil) 108 109 return 110 } 111 112 // StringIdx adds a string value to the strings section, if not 113 // already present, and returns its index. 114 func (pw *PkgEncoder) StringIdx(s string) Index { 115 if idx, ok := pw.stringsIdx[s]; ok { 116 assert(pw.elems[RelocString][idx] == s) 117 return idx 118 } 119 120 idx := Index(len(pw.elems[RelocString])) 121 pw.elems[RelocString] = append(pw.elems[RelocString], s) 122 pw.stringsIdx[s] = idx 123 return idx 124 } 125 126 // NewEncoder returns an Encoder for a new element within the given 127 // section, and encodes the given SyncMarker as the start of the 128 // element bitstream. 129 func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder { 130 e := pw.NewEncoderRaw(k) 131 e.Sync(marker) 132 return e 133 } 134 135 // NewEncoderRaw returns an Encoder for a new element within the given 136 // section. 137 // 138 // Most callers should use NewEncoder instead. 139 func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder { 140 idx := Index(len(pw.elems[k])) 141 pw.elems[k] = append(pw.elems[k], "") // placeholder 142 143 return Encoder{ 144 p: pw, 145 k: k, 146 Idx: idx, 147 } 148 } 149 150 // An Encoder provides methods for encoding an individual element's 151 // bitstream data. 152 type Encoder struct { 153 p *PkgEncoder 154 155 Relocs []RelocEnt 156 RelocMap map[RelocEnt]uint32 157 Data bytes.Buffer // accumulated element bitstream data 158 159 encodingRelocHeader bool 160 161 k RelocKind 162 Idx Index // index within relocation section 163 } 164 165 // Flush finalizes the element's bitstream and returns its Index. 166 func (w *Encoder) Flush() Index { 167 var sb strings.Builder 168 169 // Backup the data so we write the relocations at the front. 170 var tmp bytes.Buffer 171 io.Copy(&tmp, &w.Data) 172 173 // TODO(mdempsky): Consider writing these out separately so they're 174 // easier to strip, along with function bodies, so that we can prune 175 // down to just the data that's relevant to go/types. 176 if w.encodingRelocHeader { 177 panic("encodingRelocHeader already true; recursive flush?") 178 } 179 w.encodingRelocHeader = true 180 w.Sync(SyncRelocs) 181 w.Len(len(w.Relocs)) 182 for _, rEnt := range w.Relocs { 183 w.Sync(SyncReloc) 184 w.Len(int(rEnt.Kind)) 185 w.Len(int(rEnt.Idx)) 186 } 187 188 io.Copy(&sb, &w.Data) 189 io.Copy(&sb, &tmp) 190 w.p.elems[w.k][w.Idx] = sb.String() 191 192 return w.Idx 193 } 194 195 func (w *Encoder) checkErr(err error) { 196 if err != nil { 197 errorf("unexpected encoding error: %v", err) 198 } 199 } 200 201 func (w *Encoder) rawUvarint(x uint64) { 202 var buf [binary.MaxVarintLen64]byte 203 n := binary.PutUvarint(buf[:], x) 204 _, err := w.Data.Write(buf[:n]) 205 w.checkErr(err) 206 } 207 208 func (w *Encoder) rawVarint(x int64) { 209 // Zig-zag encode. 210 ux := uint64(x) << 1 211 if x < 0 { 212 ux = ^ux 213 } 214 215 w.rawUvarint(ux) 216 } 217 218 func (w *Encoder) rawReloc(r RelocKind, idx Index) int { 219 e := RelocEnt{r, idx} 220 if w.RelocMap != nil { 221 if i, ok := w.RelocMap[e]; ok { 222 return int(i) 223 } 224 } else { 225 w.RelocMap = make(map[RelocEnt]uint32) 226 } 227 228 i := len(w.Relocs) 229 w.RelocMap[e] = uint32(i) 230 w.Relocs = append(w.Relocs, e) 231 return i 232 } 233 234 func (w *Encoder) Sync(m SyncMarker) { 235 if !w.p.SyncMarkers() { 236 return 237 } 238 239 // Writing out stack frame string references requires working 240 // relocations, but writing out the relocations themselves involves 241 // sync markers. To prevent infinite recursion, we simply trim the 242 // stack frame for sync markers within the relocation header. 243 var frames []string 244 if !w.encodingRelocHeader && w.p.syncFrames > 0 { 245 pcs := make([]uintptr, w.p.syncFrames) 246 n := runtime.Callers(2, pcs) 247 frames = fmtFrames(pcs[:n]...) 248 } 249 250 // TODO(mdempsky): Save space by writing out stack frames as a 251 // linked list so we can share common stack frames. 252 w.rawUvarint(uint64(m)) 253 w.rawUvarint(uint64(len(frames))) 254 for _, frame := range frames { 255 w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame)))) 256 } 257 } 258 259 // Bool encodes and writes a bool value into the element bitstream, 260 // and then returns the bool value. 261 // 262 // For simple, 2-alternative encodings, the idiomatic way to call Bool 263 // is something like: 264 // 265 // if w.Bool(x != 0) { 266 // // alternative #1 267 // } else { 268 // // alternative #2 269 // } 270 // 271 // For multi-alternative encodings, use Code instead. 272 func (w *Encoder) Bool(b bool) bool { 273 w.Sync(SyncBool) 274 var x byte 275 if b { 276 x = 1 277 } 278 err := w.Data.WriteByte(x) 279 w.checkErr(err) 280 return b 281 } 282 283 // Int64 encodes and writes an int64 value into the element bitstream. 284 func (w *Encoder) Int64(x int64) { 285 w.Sync(SyncInt64) 286 w.rawVarint(x) 287 } 288 289 // Uint64 encodes and writes a uint64 value into the element bitstream. 290 func (w *Encoder) Uint64(x uint64) { 291 w.Sync(SyncUint64) 292 w.rawUvarint(x) 293 } 294 295 // Len encodes and writes a non-negative int value into the element bitstream. 296 func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) } 297 298 // Int encodes and writes an int value into the element bitstream. 299 func (w *Encoder) Int(x int) { w.Int64(int64(x)) } 300 301 // Len encodes and writes a uint value into the element bitstream. 302 func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) } 303 304 // Reloc encodes and writes a relocation for the given (section, 305 // index) pair into the element bitstream. 306 // 307 // Note: Only the index is formally written into the element 308 // bitstream, so bitstream decoders must know from context which 309 // section an encoded relocation refers to. 310 func (w *Encoder) Reloc(r RelocKind, idx Index) { 311 w.Sync(SyncUseReloc) 312 w.Len(w.rawReloc(r, idx)) 313 } 314 315 // Code encodes and writes a Code value into the element bitstream. 316 func (w *Encoder) Code(c Code) { 317 w.Sync(c.Marker()) 318 w.Len(c.Value()) 319 } 320 321 // String encodes and writes a string value into the element 322 // bitstream. 323 // 324 // Internally, strings are deduplicated by adding them to the strings 325 // section (if not already present), and then writing a relocation 326 // into the element bitstream. 327 func (w *Encoder) String(s string) { 328 w.StringRef(w.p.StringIdx(s)) 329 } 330 331 // StringRef writes a reference to the given index, which must be a 332 // previously encoded string value. 333 func (w *Encoder) StringRef(idx Index) { 334 w.Sync(SyncString) 335 w.Reloc(RelocString, idx) 336 } 337 338 // Strings encodes and writes a variable-length slice of strings into 339 // the element bitstream. 340 func (w *Encoder) Strings(ss []string) { 341 w.Len(len(ss)) 342 for _, s := range ss { 343 w.String(s) 344 } 345 } 346 347 // Value encodes and writes a constant.Value into the element 348 // bitstream. 349 func (w *Encoder) Value(val constant.Value) { 350 w.Sync(SyncValue) 351 if w.Bool(val.Kind() == constant.Complex) { 352 w.scalar(constant.Real(val)) 353 w.scalar(constant.Imag(val)) 354 } else { 355 w.scalar(val) 356 } 357 } 358 359 func (w *Encoder) scalar(val constant.Value) { 360 switch v := constant.Val(val).(type) { 361 default: 362 errorf("unhandled %v (%v)", val, val.Kind()) 363 case bool: 364 w.Code(ValBool) 365 w.Bool(v) 366 case string: 367 w.Code(ValString) 368 w.String(v) 369 case int64: 370 w.Code(ValInt64) 371 w.Int64(v) 372 case *big.Int: 373 w.Code(ValBigInt) 374 w.bigInt(v) 375 case *big.Rat: 376 w.Code(ValBigRat) 377 w.bigInt(v.Num()) 378 w.bigInt(v.Denom()) 379 case *big.Float: 380 w.Code(ValBigFloat) 381 w.bigFloat(v) 382 } 383 } 384 385 func (w *Encoder) bigInt(v *big.Int) { 386 b := v.Bytes() 387 w.String(string(b)) // TODO: More efficient encoding. 388 w.Bool(v.Sign() < 0) 389 } 390 391 func (w *Encoder) bigFloat(v *big.Float) { 392 b := v.Append(nil, 'p', -1) 393 w.String(string(b)) // TODO: More efficient encoding. 394 }