github.com/bir3/gocompiler@v0.9.2202/src/internal/pkgbits/decoder.go (about) 1 // Copyright 2021 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package pkgbits 6 7 import ( 8 "encoding/binary" 9 "errors" 10 "fmt" 11 "github.com/bir3/gocompiler/src/go/constant" 12 "github.com/bir3/gocompiler/src/go/token" 13 "io" 14 "math/big" 15 "os" 16 "runtime" 17 "strings" 18 ) 19 20 // A PkgDecoder provides methods for decoding a package's Unified IR 21 // export data. 22 type PkgDecoder struct { 23 // version is the file format version. 24 version uint32 25 26 // sync indicates whether the file uses sync markers. 27 sync bool 28 29 // pkgPath is the package path for the package to be decoded. 30 // 31 // TODO(mdempsky): Remove; unneeded since CL 391014. 32 pkgPath string 33 34 // elemData is the full data payload of the encoded package. 35 // Elements are densely and contiguously packed together. 36 // 37 // The last 8 bytes of elemData are the package fingerprint. 38 elemData string 39 40 // elemEnds stores the byte-offset end positions of element 41 // bitstreams within elemData. 42 // 43 // For example, element I's bitstream data starts at elemEnds[I-1] 44 // (or 0, if I==0) and ends at elemEnds[I]. 45 // 46 // Note: elemEnds is indexed by absolute indices, not 47 // section-relative indices. 48 elemEnds []uint32 49 50 // elemEndsEnds stores the index-offset end positions of relocation 51 // sections within elemEnds. 52 // 53 // For example, section K's end positions start at elemEndsEnds[K-1] 54 // (or 0, if K==0) and end at elemEndsEnds[K]. 55 elemEndsEnds [numRelocs]uint32 56 57 scratchRelocEnt []RelocEnt 58 } 59 60 // PkgPath returns the package path for the package 61 // 62 // TODO(mdempsky): Remove; unneeded since CL 391014. 63 func (pr *PkgDecoder) PkgPath() string { return pr.pkgPath } 64 65 // SyncMarkers reports whether pr uses sync markers. 66 func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync } 67 68 // NewPkgDecoder returns a PkgDecoder initialized to read the Unified 69 // IR export data from input. pkgPath is the package path for the 70 // compilation unit that produced the export data. 71 // 72 // TODO(mdempsky): Remove pkgPath parameter; unneeded since CL 391014. 73 func NewPkgDecoder(pkgPath, input string) PkgDecoder { 74 pr := PkgDecoder{ 75 pkgPath: pkgPath, 76 } 77 78 // TODO(mdempsky): Implement direct indexing of input string to 79 // avoid copying the position information. 80 81 r := strings.NewReader(input) 82 83 assert(binary.Read(r, binary.LittleEndian, &pr.version) == nil) 84 85 switch pr.version { 86 default: 87 panic(fmt.Errorf("unsupported version: %v", pr.version)) 88 case 0: 89 // no flags 90 case 1: 91 var flags uint32 92 assert(binary.Read(r, binary.LittleEndian, &flags) == nil) 93 pr.sync = flags&flagSyncMarkers != 0 94 } 95 96 assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil) 97 98 pr.elemEnds = make([]uint32, pr.elemEndsEnds[len(pr.elemEndsEnds)-1]) 99 assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil) 100 101 pos, err := r.Seek(0, io.SeekCurrent) 102 assert(err == nil) 103 104 pr.elemData = input[pos:] 105 assert(len(pr.elemData)-8 == int(pr.elemEnds[len(pr.elemEnds)-1])) 106 107 return pr 108 } 109 110 // NumElems returns the number of elements in section k. 111 func (pr *PkgDecoder) NumElems(k RelocKind) int { 112 count := int(pr.elemEndsEnds[k]) 113 if k > 0 { 114 count -= int(pr.elemEndsEnds[k-1]) 115 } 116 return count 117 } 118 119 // TotalElems returns the total number of elements across all sections. 120 func (pr *PkgDecoder) TotalElems() int { 121 return len(pr.elemEnds) 122 } 123 124 // Fingerprint returns the package fingerprint. 125 func (pr *PkgDecoder) Fingerprint() [8]byte { 126 var fp [8]byte 127 copy(fp[:], pr.elemData[len(pr.elemData)-8:]) 128 return fp 129 } 130 131 // AbsIdx returns the absolute index for the given (section, index) 132 // pair. 133 func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int { 134 absIdx := int(idx) 135 if k > 0 { 136 absIdx += int(pr.elemEndsEnds[k-1]) 137 } 138 if absIdx >= int(pr.elemEndsEnds[k]) { 139 errorf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds) 140 } 141 return absIdx 142 } 143 144 // DataIdx returns the raw element bitstream for the given (section, 145 // index) pair. 146 func (pr *PkgDecoder) DataIdx(k RelocKind, idx Index) string { 147 absIdx := pr.AbsIdx(k, idx) 148 149 var start uint32 150 if absIdx > 0 { 151 start = pr.elemEnds[absIdx-1] 152 } 153 end := pr.elemEnds[absIdx] 154 155 return pr.elemData[start:end] 156 } 157 158 // StringIdx returns the string value for the given string index. 159 func (pr *PkgDecoder) StringIdx(idx Index) string { 160 return pr.DataIdx(RelocString, idx) 161 } 162 163 // NewDecoder returns a Decoder for the given (section, index) pair, 164 // and decodes the given SyncMarker from the element bitstream. 165 func (pr *PkgDecoder) NewDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder { 166 r := pr.NewDecoderRaw(k, idx) 167 r.Sync(marker) 168 return r 169 } 170 171 // TempDecoder returns a Decoder for the given (section, index) pair, 172 // and decodes the given SyncMarker from the element bitstream. 173 // If possible the Decoder should be RetireDecoder'd when it is no longer 174 // needed, this will avoid heap allocations. 175 func (pr *PkgDecoder) TempDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder { 176 r := pr.TempDecoderRaw(k, idx) 177 r.Sync(marker) 178 return r 179 } 180 181 func (pr *PkgDecoder) RetireDecoder(d *Decoder) { 182 pr.scratchRelocEnt = d.Relocs 183 d.Relocs = nil 184 } 185 186 // NewDecoderRaw returns a Decoder for the given (section, index) pair. 187 // 188 // Most callers should use NewDecoder instead. 189 func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder { 190 r := Decoder{ 191 common: pr, 192 k: k, 193 Idx: idx, 194 } 195 196 r.Data.Reset(pr.DataIdx(k, idx)) 197 r.Sync(SyncRelocs) 198 r.Relocs = make([]RelocEnt, r.Len()) 199 for i := range r.Relocs { 200 r.Sync(SyncReloc) 201 r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())} 202 } 203 204 return r 205 } 206 207 func (pr *PkgDecoder) TempDecoderRaw(k RelocKind, idx Index) Decoder { 208 r := Decoder{ 209 common: pr, 210 k: k, 211 Idx: idx, 212 } 213 214 r.Data.Reset(pr.DataIdx(k, idx)) 215 r.Sync(SyncRelocs) 216 l := r.Len() 217 if cap(pr.scratchRelocEnt) >= l { 218 r.Relocs = pr.scratchRelocEnt[:l] 219 pr.scratchRelocEnt = nil 220 } else { 221 r.Relocs = make([]RelocEnt, l) 222 } 223 for i := range r.Relocs { 224 r.Sync(SyncReloc) 225 r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())} 226 } 227 228 return r 229 } 230 231 // A Decoder provides methods for decoding an individual element's 232 // bitstream data. 233 type Decoder struct { 234 common *PkgDecoder 235 236 Relocs []RelocEnt 237 Data strings.Reader 238 239 k RelocKind 240 Idx Index 241 } 242 243 func (r *Decoder) checkErr(err error) { 244 if err != nil { 245 errorf("unexpected decoding error: %w", err) 246 } 247 } 248 249 func (r *Decoder) rawUvarint() uint64 { 250 x, err := readUvarint(&r.Data) 251 r.checkErr(err) 252 return x 253 } 254 255 // readUvarint is a type-specialized copy of encoding/binary.ReadUvarint. 256 // This avoids the interface conversion and thus has better escape properties, 257 // which flows up the stack. 258 func readUvarint(r *strings.Reader) (uint64, error) { 259 var x uint64 260 var s uint 261 for i := 0; i < binary.MaxVarintLen64; i++ { 262 b, err := r.ReadByte() 263 if err != nil { 264 if i > 0 && err == io.EOF { 265 err = io.ErrUnexpectedEOF 266 } 267 return x, err 268 } 269 if b < 0x80 { 270 if i == binary.MaxVarintLen64-1 && b > 1 { 271 return x, overflow 272 } 273 return x | uint64(b)<<s, nil 274 } 275 x |= uint64(b&0x7f) << s 276 s += 7 277 } 278 return x, overflow 279 } 280 281 var overflow = errors.New("pkgbits: readUvarint overflows a 64-bit integer") 282 283 func (r *Decoder) rawVarint() int64 { 284 ux := r.rawUvarint() 285 286 // Zig-zag decode. 287 x := int64(ux >> 1) 288 if ux&1 != 0 { 289 x = ^x 290 } 291 return x 292 } 293 294 func (r *Decoder) rawReloc(k RelocKind, idx int) Index { 295 e := r.Relocs[idx] 296 assert(e.Kind == k) 297 return e.Idx 298 } 299 300 // Sync decodes a sync marker from the element bitstream and asserts 301 // that it matches the expected marker. 302 // 303 // If EnableSync is false, then Sync is a no-op. 304 func (r *Decoder) Sync(mWant SyncMarker) { 305 if !r.common.sync { 306 return 307 } 308 309 pos, _ := r.Data.Seek(0, io.SeekCurrent) 310 mHave := SyncMarker(r.rawUvarint()) 311 writerPCs := make([]int, r.rawUvarint()) 312 for i := range writerPCs { 313 writerPCs[i] = int(r.rawUvarint()) 314 } 315 316 if mHave == mWant { 317 return 318 } 319 320 // There's some tension here between printing: 321 // 322 // (1) full file paths that tools can recognize (e.g., so emacs 323 // hyperlinks the "file:line" text for easy navigation), or 324 // 325 // (2) short file paths that are easier for humans to read (e.g., by 326 // omitting redundant or irrelevant details, so it's easier to 327 // focus on the useful bits that remain). 328 // 329 // The current formatting favors the former, as it seems more 330 // helpful in practice. But perhaps the formatting could be improved 331 // to better address both concerns. For example, use relative file 332 // paths if they would be shorter, or rewrite file paths to contain 333 // "$GOROOT" (like objabi.AbsFile does) if tools can be taught how 334 // to reliably expand that again. 335 336 fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos) 337 338 fmt.Printf("\nfound %v, written at:\n", mHave) 339 if len(writerPCs) == 0 { 340 fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath) 341 } 342 for _, pc := range writerPCs { 343 fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(RelocString, pc))) 344 } 345 346 fmt.Printf("\nexpected %v, reading at:\n", mWant) 347 var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size? 348 n := runtime.Callers(2, readerPCs[:]) 349 for _, pc := range fmtFrames(readerPCs[:n]...) { 350 fmt.Printf("\t%s\n", pc) 351 } 352 353 // We already printed a stack trace for the reader, so now we can 354 // simply exit. Printing a second one with panic or base.Fatalf 355 // would just be noise. 356 os.Exit(1) 357 } 358 359 // Bool decodes and returns a bool value from the element bitstream. 360 func (r *Decoder) Bool() bool { 361 r.Sync(SyncBool) 362 x, err := r.Data.ReadByte() 363 r.checkErr(err) 364 assert(x < 2) 365 return x != 0 366 } 367 368 // Int64 decodes and returns an int64 value from the element bitstream. 369 func (r *Decoder) Int64() int64 { 370 r.Sync(SyncInt64) 371 return r.rawVarint() 372 } 373 374 // Int64 decodes and returns a uint64 value from the element bitstream. 375 func (r *Decoder) Uint64() uint64 { 376 r.Sync(SyncUint64) 377 return r.rawUvarint() 378 } 379 380 // Len decodes and returns a non-negative int value from the element bitstream. 381 func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v } 382 383 // Int decodes and returns an int value from the element bitstream. 384 func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v } 385 386 // Uint decodes and returns a uint value from the element bitstream. 387 func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v } 388 389 // Code decodes a Code value from the element bitstream and returns 390 // its ordinal value. It's the caller's responsibility to convert the 391 // result to an appropriate Code type. 392 // 393 // TODO(mdempsky): Ideally this method would have signature "Code[T 394 // Code] T" instead, but we don't allow generic methods and the 395 // compiler can't depend on generics yet anyway. 396 func (r *Decoder) Code(mark SyncMarker) int { 397 r.Sync(mark) 398 return r.Len() 399 } 400 401 // Reloc decodes a relocation of expected section k from the element 402 // bitstream and returns an index to the referenced element. 403 func (r *Decoder) Reloc(k RelocKind) Index { 404 r.Sync(SyncUseReloc) 405 return r.rawReloc(k, r.Len()) 406 } 407 408 // String decodes and returns a string value from the element 409 // bitstream. 410 func (r *Decoder) String() string { 411 r.Sync(SyncString) 412 return r.common.StringIdx(r.Reloc(RelocString)) 413 } 414 415 // Strings decodes and returns a variable-length slice of strings from 416 // the element bitstream. 417 func (r *Decoder) Strings() []string { 418 res := make([]string, r.Len()) 419 for i := range res { 420 res[i] = r.String() 421 } 422 return res 423 } 424 425 // Value decodes and returns a constant.Value from the element 426 // bitstream. 427 func (r *Decoder) Value() constant.Value { 428 r.Sync(SyncValue) 429 isComplex := r.Bool() 430 val := r.scalar() 431 if isComplex { 432 val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar())) 433 } 434 return val 435 } 436 437 func (r *Decoder) scalar() constant.Value { 438 switch tag := CodeVal(r.Code(SyncVal)); tag { 439 default: 440 panic(fmt.Errorf("unexpected scalar tag: %v", tag)) 441 442 case ValBool: 443 return constant.MakeBool(r.Bool()) 444 case ValString: 445 return constant.MakeString(r.String()) 446 case ValInt64: 447 return constant.MakeInt64(r.Int64()) 448 case ValBigInt: 449 return constant.Make(r.bigInt()) 450 case ValBigRat: 451 num := r.bigInt() 452 denom := r.bigInt() 453 return constant.Make(new(big.Rat).SetFrac(num, denom)) 454 case ValBigFloat: 455 return constant.Make(r.bigFloat()) 456 } 457 } 458 459 func (r *Decoder) bigInt() *big.Int { 460 v := new(big.Int).SetBytes([]byte(r.String())) 461 if r.Bool() { 462 v.Neg(v) 463 } 464 return v 465 } 466 467 func (r *Decoder) bigFloat() *big.Float { 468 v := new(big.Float).SetPrec(512) 469 assert(v.UnmarshalText([]byte(r.String())) == nil) 470 return v 471 } 472 473 // @@@ Helpers 474 475 // TODO(mdempsky): These should probably be removed. I think they're a 476 // smell that the export data format is not yet quite right. 477 478 // PeekPkgPath returns the package path for the specified package 479 // index. 480 func (pr *PkgDecoder) PeekPkgPath(idx Index) string { 481 var path string 482 { 483 r := pr.TempDecoder(RelocPkg, idx, SyncPkgDef) 484 path = r.String() 485 pr.RetireDecoder(&r) 486 } 487 if path == "" { 488 path = pr.pkgPath 489 } 490 return path 491 } 492 493 // PeekObj returns the package path, object name, and CodeObj for the 494 // specified object index. 495 func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) { 496 var ridx Index 497 var name string 498 var rcode int 499 { 500 r := pr.TempDecoder(RelocName, idx, SyncObject1) 501 r.Sync(SyncSym) 502 r.Sync(SyncPkg) 503 ridx = r.Reloc(RelocPkg) 504 name = r.String() 505 rcode = r.Code(SyncCodeObj) 506 pr.RetireDecoder(&r) 507 } 508 509 path := pr.PeekPkgPath(ridx) 510 assert(name != "") 511 512 tag := CodeObj(rcode) 513 514 return path, name, tag 515 }