golang.org/x/tools@v0.21.1-0.20240520172518-788d39e776b1/internal/pkgbits/decoder.go (about) 1 // Copyright 2021 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package pkgbits 6 7 import ( 8 "encoding/binary" 9 "errors" 10 "fmt" 11 "go/constant" 12 "go/token" 13 "io" 14 "math/big" 15 "os" 16 "runtime" 17 "strings" 18 ) 19 20 // A PkgDecoder provides methods for decoding a package's Unified IR 21 // export data. 22 type PkgDecoder struct { 23 // version is the file format version. 24 version uint32 25 26 // aliases determines whether types.Aliases should be created 27 aliases bool 28 29 // sync indicates whether the file uses sync markers. 30 sync bool 31 32 // pkgPath is the package path for the package to be decoded. 33 // 34 // TODO(mdempsky): Remove; unneeded since CL 391014. 35 pkgPath string 36 37 // elemData is the full data payload of the encoded package. 38 // Elements are densely and contiguously packed together. 39 // 40 // The last 8 bytes of elemData are the package fingerprint. 41 elemData string 42 43 // elemEnds stores the byte-offset end positions of element 44 // bitstreams within elemData. 45 // 46 // For example, element I's bitstream data starts at elemEnds[I-1] 47 // (or 0, if I==0) and ends at elemEnds[I]. 48 // 49 // Note: elemEnds is indexed by absolute indices, not 50 // section-relative indices. 51 elemEnds []uint32 52 53 // elemEndsEnds stores the index-offset end positions of relocation 54 // sections within elemEnds. 55 // 56 // For example, section K's end positions start at elemEndsEnds[K-1] 57 // (or 0, if K==0) and end at elemEndsEnds[K]. 58 elemEndsEnds [numRelocs]uint32 59 60 scratchRelocEnt []RelocEnt 61 } 62 63 // PkgPath returns the package path for the package 64 // 65 // TODO(mdempsky): Remove; unneeded since CL 391014. 66 func (pr *PkgDecoder) PkgPath() string { return pr.pkgPath } 67 68 // SyncMarkers reports whether pr uses sync markers. 69 func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync } 70 71 // NewPkgDecoder returns a PkgDecoder initialized to read the Unified 72 // IR export data from input. pkgPath is the package path for the 73 // compilation unit that produced the export data. 74 // 75 // TODO(mdempsky): Remove pkgPath parameter; unneeded since CL 391014. 76 func NewPkgDecoder(pkgPath, input string) PkgDecoder { 77 pr := PkgDecoder{ 78 pkgPath: pkgPath, 79 //aliases: aliases.Enabled(), 80 } 81 82 // TODO(mdempsky): Implement direct indexing of input string to 83 // avoid copying the position information. 84 85 r := strings.NewReader(input) 86 87 assert(binary.Read(r, binary.LittleEndian, &pr.version) == nil) 88 89 switch pr.version { 90 default: 91 panic(fmt.Errorf("unsupported version: %v", pr.version)) 92 case 0: 93 // no flags 94 case 1: 95 var flags uint32 96 assert(binary.Read(r, binary.LittleEndian, &flags) == nil) 97 pr.sync = flags&flagSyncMarkers != 0 98 } 99 100 assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil) 101 102 pr.elemEnds = make([]uint32, pr.elemEndsEnds[len(pr.elemEndsEnds)-1]) 103 assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil) 104 105 pos, err := r.Seek(0, io.SeekCurrent) 106 assert(err == nil) 107 108 pr.elemData = input[pos:] 109 assert(len(pr.elemData)-8 == int(pr.elemEnds[len(pr.elemEnds)-1])) 110 111 return pr 112 } 113 114 // NumElems returns the number of elements in section k. 115 func (pr *PkgDecoder) NumElems(k RelocKind) int { 116 count := int(pr.elemEndsEnds[k]) 117 if k > 0 { 118 count -= int(pr.elemEndsEnds[k-1]) 119 } 120 return count 121 } 122 123 // TotalElems returns the total number of elements across all sections. 124 func (pr *PkgDecoder) TotalElems() int { 125 return len(pr.elemEnds) 126 } 127 128 // Fingerprint returns the package fingerprint. 129 func (pr *PkgDecoder) Fingerprint() [8]byte { 130 var fp [8]byte 131 copy(fp[:], pr.elemData[len(pr.elemData)-8:]) 132 return fp 133 } 134 135 // AbsIdx returns the absolute index for the given (section, index) 136 // pair. 137 func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int { 138 absIdx := int(idx) 139 if k > 0 { 140 absIdx += int(pr.elemEndsEnds[k-1]) 141 } 142 if absIdx >= int(pr.elemEndsEnds[k]) { 143 errorf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds) 144 } 145 return absIdx 146 } 147 148 // DataIdx returns the raw element bitstream for the given (section, 149 // index) pair. 150 func (pr *PkgDecoder) DataIdx(k RelocKind, idx Index) string { 151 absIdx := pr.AbsIdx(k, idx) 152 153 var start uint32 154 if absIdx > 0 { 155 start = pr.elemEnds[absIdx-1] 156 } 157 end := pr.elemEnds[absIdx] 158 159 return pr.elemData[start:end] 160 } 161 162 // StringIdx returns the string value for the given string index. 163 func (pr *PkgDecoder) StringIdx(idx Index) string { 164 return pr.DataIdx(RelocString, idx) 165 } 166 167 // NewDecoder returns a Decoder for the given (section, index) pair, 168 // and decodes the given SyncMarker from the element bitstream. 169 func (pr *PkgDecoder) NewDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder { 170 r := pr.NewDecoderRaw(k, idx) 171 r.Sync(marker) 172 return r 173 } 174 175 // TempDecoder returns a Decoder for the given (section, index) pair, 176 // and decodes the given SyncMarker from the element bitstream. 177 // If possible the Decoder should be RetireDecoder'd when it is no longer 178 // needed, this will avoid heap allocations. 179 func (pr *PkgDecoder) TempDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder { 180 r := pr.TempDecoderRaw(k, idx) 181 r.Sync(marker) 182 return r 183 } 184 185 func (pr *PkgDecoder) RetireDecoder(d *Decoder) { 186 pr.scratchRelocEnt = d.Relocs 187 d.Relocs = nil 188 } 189 190 // NewDecoderRaw returns a Decoder for the given (section, index) pair. 191 // 192 // Most callers should use NewDecoder instead. 193 func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder { 194 r := Decoder{ 195 common: pr, 196 k: k, 197 Idx: idx, 198 } 199 200 // TODO(mdempsky) r.data.Reset(...) after #44505 is resolved. 201 r.Data = *strings.NewReader(pr.DataIdx(k, idx)) 202 203 r.Sync(SyncRelocs) 204 r.Relocs = make([]RelocEnt, r.Len()) 205 for i := range r.Relocs { 206 r.Sync(SyncReloc) 207 r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())} 208 } 209 210 return r 211 } 212 213 func (pr *PkgDecoder) TempDecoderRaw(k RelocKind, idx Index) Decoder { 214 r := Decoder{ 215 common: pr, 216 k: k, 217 Idx: idx, 218 } 219 220 r.Data.Reset(pr.DataIdx(k, idx)) 221 r.Sync(SyncRelocs) 222 l := r.Len() 223 if cap(pr.scratchRelocEnt) >= l { 224 r.Relocs = pr.scratchRelocEnt[:l] 225 pr.scratchRelocEnt = nil 226 } else { 227 r.Relocs = make([]RelocEnt, l) 228 } 229 for i := range r.Relocs { 230 r.Sync(SyncReloc) 231 r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())} 232 } 233 234 return r 235 } 236 237 // A Decoder provides methods for decoding an individual element's 238 // bitstream data. 239 type Decoder struct { 240 common *PkgDecoder 241 242 Relocs []RelocEnt 243 Data strings.Reader 244 245 k RelocKind 246 Idx Index 247 } 248 249 func (r *Decoder) checkErr(err error) { 250 if err != nil { 251 errorf("unexpected decoding error: %w", err) 252 } 253 } 254 255 func (r *Decoder) rawUvarint() uint64 { 256 x, err := readUvarint(&r.Data) 257 r.checkErr(err) 258 return x 259 } 260 261 // readUvarint is a type-specialized copy of encoding/binary.ReadUvarint. 262 // This avoids the interface conversion and thus has better escape properties, 263 // which flows up the stack. 264 func readUvarint(r *strings.Reader) (uint64, error) { 265 var x uint64 266 var s uint 267 for i := 0; i < binary.MaxVarintLen64; i++ { 268 b, err := r.ReadByte() 269 if err != nil { 270 if i > 0 && err == io.EOF { 271 err = io.ErrUnexpectedEOF 272 } 273 return x, err 274 } 275 if b < 0x80 { 276 if i == binary.MaxVarintLen64-1 && b > 1 { 277 return x, overflow 278 } 279 return x | uint64(b)<<s, nil 280 } 281 x |= uint64(b&0x7f) << s 282 s += 7 283 } 284 return x, overflow 285 } 286 287 var overflow = errors.New("pkgbits: readUvarint overflows a 64-bit integer") 288 289 func (r *Decoder) rawVarint() int64 { 290 ux := r.rawUvarint() 291 292 // Zig-zag decode. 293 x := int64(ux >> 1) 294 if ux&1 != 0 { 295 x = ^x 296 } 297 return x 298 } 299 300 func (r *Decoder) rawReloc(k RelocKind, idx int) Index { 301 e := r.Relocs[idx] 302 assert(e.Kind == k) 303 return e.Idx 304 } 305 306 // Sync decodes a sync marker from the element bitstream and asserts 307 // that it matches the expected marker. 308 // 309 // If r.common.sync is false, then Sync is a no-op. 310 func (r *Decoder) Sync(mWant SyncMarker) { 311 if !r.common.sync { 312 return 313 } 314 315 pos, _ := r.Data.Seek(0, io.SeekCurrent) 316 mHave := SyncMarker(r.rawUvarint()) 317 writerPCs := make([]int, r.rawUvarint()) 318 for i := range writerPCs { 319 writerPCs[i] = int(r.rawUvarint()) 320 } 321 322 if mHave == mWant { 323 return 324 } 325 326 // There's some tension here between printing: 327 // 328 // (1) full file paths that tools can recognize (e.g., so emacs 329 // hyperlinks the "file:line" text for easy navigation), or 330 // 331 // (2) short file paths that are easier for humans to read (e.g., by 332 // omitting redundant or irrelevant details, so it's easier to 333 // focus on the useful bits that remain). 334 // 335 // The current formatting favors the former, as it seems more 336 // helpful in practice. But perhaps the formatting could be improved 337 // to better address both concerns. For example, use relative file 338 // paths if they would be shorter, or rewrite file paths to contain 339 // "$GOROOT" (like objabi.AbsFile does) if tools can be taught how 340 // to reliably expand that again. 341 342 fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos) 343 344 fmt.Printf("\nfound %v, written at:\n", mHave) 345 if len(writerPCs) == 0 { 346 fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath) 347 } 348 for _, pc := range writerPCs { 349 fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(RelocString, pc))) 350 } 351 352 fmt.Printf("\nexpected %v, reading at:\n", mWant) 353 var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size? 354 n := runtime.Callers(2, readerPCs[:]) 355 for _, pc := range fmtFrames(readerPCs[:n]...) { 356 fmt.Printf("\t%s\n", pc) 357 } 358 359 // We already printed a stack trace for the reader, so now we can 360 // simply exit. Printing a second one with panic or base.Fatalf 361 // would just be noise. 362 os.Exit(1) 363 } 364 365 // Bool decodes and returns a bool value from the element bitstream. 366 func (r *Decoder) Bool() bool { 367 r.Sync(SyncBool) 368 x, err := r.Data.ReadByte() 369 r.checkErr(err) 370 assert(x < 2) 371 return x != 0 372 } 373 374 // Int64 decodes and returns an int64 value from the element bitstream. 375 func (r *Decoder) Int64() int64 { 376 r.Sync(SyncInt64) 377 return r.rawVarint() 378 } 379 380 // Uint64 decodes and returns a uint64 value from the element bitstream. 381 func (r *Decoder) Uint64() uint64 { 382 r.Sync(SyncUint64) 383 return r.rawUvarint() 384 } 385 386 // Len decodes and returns a non-negative int value from the element bitstream. 387 func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v } 388 389 // Int decodes and returns an int value from the element bitstream. 390 func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v } 391 392 // Uint decodes and returns a uint value from the element bitstream. 393 func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v } 394 395 // Code decodes a Code value from the element bitstream and returns 396 // its ordinal value. It's the caller's responsibility to convert the 397 // result to an appropriate Code type. 398 // 399 // TODO(mdempsky): Ideally this method would have signature "Code[T 400 // Code] T" instead, but we don't allow generic methods and the 401 // compiler can't depend on generics yet anyway. 402 func (r *Decoder) Code(mark SyncMarker) int { 403 r.Sync(mark) 404 return r.Len() 405 } 406 407 // Reloc decodes a relocation of expected section k from the element 408 // bitstream and returns an index to the referenced element. 409 func (r *Decoder) Reloc(k RelocKind) Index { 410 r.Sync(SyncUseReloc) 411 return r.rawReloc(k, r.Len()) 412 } 413 414 // String decodes and returns a string value from the element 415 // bitstream. 416 func (r *Decoder) String() string { 417 r.Sync(SyncString) 418 return r.common.StringIdx(r.Reloc(RelocString)) 419 } 420 421 // Strings decodes and returns a variable-length slice of strings from 422 // the element bitstream. 423 func (r *Decoder) Strings() []string { 424 res := make([]string, r.Len()) 425 for i := range res { 426 res[i] = r.String() 427 } 428 return res 429 } 430 431 // Value decodes and returns a constant.Value from the element 432 // bitstream. 433 func (r *Decoder) Value() constant.Value { 434 r.Sync(SyncValue) 435 isComplex := r.Bool() 436 val := r.scalar() 437 if isComplex { 438 val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar())) 439 } 440 return val 441 } 442 443 func (r *Decoder) scalar() constant.Value { 444 switch tag := CodeVal(r.Code(SyncVal)); tag { 445 default: 446 panic(fmt.Errorf("unexpected scalar tag: %v", tag)) 447 448 case ValBool: 449 return constant.MakeBool(r.Bool()) 450 case ValString: 451 return constant.MakeString(r.String()) 452 case ValInt64: 453 return constant.MakeInt64(r.Int64()) 454 case ValBigInt: 455 return constant.Make(r.bigInt()) 456 case ValBigRat: 457 num := r.bigInt() 458 denom := r.bigInt() 459 return constant.Make(new(big.Rat).SetFrac(num, denom)) 460 case ValBigFloat: 461 return constant.Make(r.bigFloat()) 462 } 463 } 464 465 func (r *Decoder) bigInt() *big.Int { 466 v := new(big.Int).SetBytes([]byte(r.String())) 467 if r.Bool() { 468 v.Neg(v) 469 } 470 return v 471 } 472 473 func (r *Decoder) bigFloat() *big.Float { 474 v := new(big.Float).SetPrec(512) 475 assert(v.UnmarshalText([]byte(r.String())) == nil) 476 return v 477 } 478 479 // @@@ Helpers 480 481 // TODO(mdempsky): These should probably be removed. I think they're a 482 // smell that the export data format is not yet quite right. 483 484 // PeekPkgPath returns the package path for the specified package 485 // index. 486 func (pr *PkgDecoder) PeekPkgPath(idx Index) string { 487 var path string 488 { 489 r := pr.TempDecoder(RelocPkg, idx, SyncPkgDef) 490 path = r.String() 491 pr.RetireDecoder(&r) 492 } 493 if path == "" { 494 path = pr.pkgPath 495 } 496 return path 497 } 498 499 // PeekObj returns the package path, object name, and CodeObj for the 500 // specified object index. 501 func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) { 502 var ridx Index 503 var name string 504 var rcode int 505 { 506 r := pr.TempDecoder(RelocName, idx, SyncObject1) 507 r.Sync(SyncSym) 508 r.Sync(SyncPkg) 509 ridx = r.Reloc(RelocPkg) 510 name = r.String() 511 rcode = r.Code(SyncCodeObj) 512 pr.RetireDecoder(&r) 513 } 514 515 path := pr.PeekPkgPath(ridx) 516 assert(name != "") 517 518 tag := CodeObj(rcode) 519 520 return path, name, tag 521 }