github.com/flyinox/gosm@v0.0.0-20171117061539-16768cb62077/src/debug/dwarf/line.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package dwarf 6 7 import ( 8 "errors" 9 "fmt" 10 "io" 11 "path" 12 "strings" 13 ) 14 15 // A LineReader reads a sequence of LineEntry structures from a DWARF 16 // "line" section for a single compilation unit. LineEntries occur in 17 // order of increasing PC and each LineEntry gives metadata for the 18 // instructions from that LineEntry's PC to just before the next 19 // LineEntry's PC. The last entry will have its EndSequence field set. 20 type LineReader struct { 21 buf buf 22 23 // Original .debug_line section data. Used by Seek. 24 section []byte 25 26 // Header information 27 version uint16 28 minInstructionLength int 29 maxOpsPerInstruction int 30 defaultIsStmt bool 31 lineBase int 32 lineRange int 33 opcodeBase int 34 opcodeLengths []int 35 directories []string 36 fileEntries []*LineFile 37 38 programOffset Offset // section offset of line number program 39 endOffset Offset // section offset of byte following program 40 41 initialFileEntries int // initial length of fileEntries 42 43 // Current line number program state machine registers 44 state LineEntry // public state 45 fileIndex int // private state 46 } 47 48 // A LineEntry is a row in a DWARF line table. 49 type LineEntry struct { 50 // Address is the program-counter value of a machine 51 // instruction generated by the compiler. This LineEntry 52 // applies to each instruction from Address to just before the 53 // Address of the next LineEntry. 54 Address uint64 55 56 // OpIndex is the index of an operation within a VLIW 57 // instruction. The index of the first operation is 0. For 58 // non-VLIW architectures, it will always be 0. Address and 59 // OpIndex together form an operation pointer that can 60 // reference any individual operation within the instruction 61 // stream. 62 OpIndex int 63 64 // File is the source file corresponding to these 65 // instructions. 66 File *LineFile 67 68 // Line is the source code line number corresponding to these 69 // instructions. Lines are numbered beginning at 1. It may be 70 // 0 if these instructions cannot be attributed to any source 71 // line. 72 Line int 73 74 // Column is the column number within the source line of these 75 // instructions. Columns are numbered beginning at 1. It may 76 // be 0 to indicate the "left edge" of the line. 77 Column int 78 79 // IsStmt indicates that Address is a recommended breakpoint 80 // location, such as the beginning of a line, statement, or a 81 // distinct subpart of a statement. 82 IsStmt bool 83 84 // BasicBlock indicates that Address is the beginning of a 85 // basic block. 86 BasicBlock bool 87 88 // PrologueEnd indicates that Address is one (of possibly 89 // many) PCs where execution should be suspended for a 90 // breakpoint on entry to the containing function. 91 // 92 // Added in DWARF 3. 93 PrologueEnd bool 94 95 // EpilogueBegin indicates that Address is one (of possibly 96 // many) PCs where execution should be suspended for a 97 // breakpoint on exit from this function. 98 // 99 // Added in DWARF 3. 100 EpilogueBegin bool 101 102 // ISA is the instruction set architecture for these 103 // instructions. Possible ISA values should be defined by the 104 // applicable ABI specification. 105 // 106 // Added in DWARF 3. 107 ISA int 108 109 // Discriminator is an arbitrary integer indicating the block 110 // to which these instructions belong. It serves to 111 // distinguish among multiple blocks that may all have with 112 // the same source file, line, and column. Where only one 113 // block exists for a given source position, it should be 0. 114 // 115 // Added in DWARF 3. 116 Discriminator int 117 118 // EndSequence indicates that Address is the first byte after 119 // the end of a sequence of target machine instructions. If it 120 // is set, only this and the Address field are meaningful. A 121 // line number table may contain information for multiple 122 // potentially disjoint instruction sequences. The last entry 123 // in a line table should always have EndSequence set. 124 EndSequence bool 125 } 126 127 // A LineFile is a source file referenced by a DWARF line table entry. 128 type LineFile struct { 129 Name string 130 Mtime uint64 // Implementation defined modification time, or 0 if unknown 131 Length int // File length, or 0 if unknown 132 } 133 134 // LineReader returns a new reader for the line table of compilation 135 // unit cu, which must be an Entry with tag TagCompileUnit. 136 // 137 // If this compilation unit has no line table, it returns nil, nil. 138 func (d *Data) LineReader(cu *Entry) (*LineReader, error) { 139 if d.line == nil { 140 // No line tables available. 141 return nil, nil 142 } 143 144 // Get line table information from cu. 145 off, ok := cu.Val(AttrStmtList).(int64) 146 if !ok { 147 // cu has no line table. 148 return nil, nil 149 } 150 if off > int64(len(d.line)) { 151 return nil, errors.New("AttrStmtList value out of range") 152 } 153 // AttrCompDir is optional if all file names are absolute. Use 154 // the empty string if it's not present. 155 compDir, _ := cu.Val(AttrCompDir).(string) 156 157 // Create the LineReader. 158 u := &d.unit[d.offsetToUnit(cu.Offset)] 159 buf := makeBuf(d, u, "line", Offset(off), d.line[off:]) 160 // The compilation directory is implicitly directories[0]. 161 r := LineReader{buf: buf, section: d.line, directories: []string{compDir}} 162 163 // Read the header. 164 if err := r.readHeader(); err != nil { 165 return nil, err 166 } 167 168 // Initialize line reader state. 169 r.Reset() 170 171 return &r, nil 172 } 173 174 // readHeader reads the line number program header from r.buf and sets 175 // all of the header fields in r. 176 func (r *LineReader) readHeader() error { 177 buf := &r.buf 178 179 // Read basic header fields [DWARF2 6.2.4]. 180 hdrOffset := buf.off 181 unitLength, dwarf64 := buf.unitLength() 182 r.endOffset = buf.off + unitLength 183 if r.endOffset > buf.off+Offset(len(buf.data)) { 184 return DecodeError{"line", hdrOffset, fmt.Sprintf("line table end %d exceeds section size %d", r.endOffset, buf.off+Offset(len(buf.data)))} 185 } 186 r.version = buf.uint16() 187 if buf.err == nil && (r.version < 2 || r.version > 4) { 188 // DWARF goes to all this effort to make new opcodes 189 // backward-compatible, and then adds fields right in 190 // the middle of the header in new versions, so we're 191 // picky about only supporting known line table 192 // versions. 193 return DecodeError{"line", hdrOffset, fmt.Sprintf("unknown line table version %d", r.version)} 194 } 195 var headerLength Offset 196 if dwarf64 { 197 headerLength = Offset(buf.uint64()) 198 } else { 199 headerLength = Offset(buf.uint32()) 200 } 201 r.programOffset = buf.off + headerLength 202 r.minInstructionLength = int(buf.uint8()) 203 if r.version >= 4 { 204 // [DWARF4 6.2.4] 205 r.maxOpsPerInstruction = int(buf.uint8()) 206 } else { 207 r.maxOpsPerInstruction = 1 208 } 209 r.defaultIsStmt = buf.uint8() != 0 210 r.lineBase = int(int8(buf.uint8())) 211 r.lineRange = int(buf.uint8()) 212 213 // Validate header. 214 if buf.err != nil { 215 return buf.err 216 } 217 if r.maxOpsPerInstruction == 0 { 218 return DecodeError{"line", hdrOffset, "invalid maximum operations per instruction: 0"} 219 } 220 if r.lineRange == 0 { 221 return DecodeError{"line", hdrOffset, "invalid line range: 0"} 222 } 223 224 // Read standard opcode length table. This table starts with opcode 1. 225 r.opcodeBase = int(buf.uint8()) 226 r.opcodeLengths = make([]int, r.opcodeBase) 227 for i := 1; i < r.opcodeBase; i++ { 228 r.opcodeLengths[i] = int(buf.uint8()) 229 } 230 231 // Validate opcode lengths. 232 if buf.err != nil { 233 return buf.err 234 } 235 for i, length := range r.opcodeLengths { 236 if known, ok := knownOpcodeLengths[i]; ok && known != length { 237 return DecodeError{"line", hdrOffset, fmt.Sprintf("opcode %d expected to have length %d, but has length %d", i, known, length)} 238 } 239 } 240 241 // Read include directories table. The caller already set 242 // directories[0] to the compilation directory. 243 for { 244 directory := buf.string() 245 if buf.err != nil { 246 return buf.err 247 } 248 if len(directory) == 0 { 249 break 250 } 251 if !pathIsAbs(directory) { 252 // Relative paths are implicitly relative to 253 // the compilation directory. 254 directory = pathJoin(r.directories[0], directory) 255 } 256 r.directories = append(r.directories, directory) 257 } 258 259 // Read file name list. File numbering starts with 1, so leave 260 // the first entry nil. 261 r.fileEntries = make([]*LineFile, 1) 262 for { 263 if done, err := r.readFileEntry(); err != nil { 264 return err 265 } else if done { 266 break 267 } 268 } 269 r.initialFileEntries = len(r.fileEntries) 270 271 return buf.err 272 } 273 274 // readFileEntry reads a file entry from either the header or a 275 // DW_LNE_define_file extended opcode and adds it to r.fileEntries. A 276 // true return value indicates that there are no more entries to read. 277 func (r *LineReader) readFileEntry() (bool, error) { 278 name := r.buf.string() 279 if r.buf.err != nil { 280 return false, r.buf.err 281 } 282 if len(name) == 0 { 283 return true, nil 284 } 285 off := r.buf.off 286 dirIndex := int(r.buf.uint()) 287 if !pathIsAbs(name) { 288 if dirIndex >= len(r.directories) { 289 return false, DecodeError{"line", off, "directory index too large"} 290 } 291 name = pathJoin(r.directories[dirIndex], name) 292 } 293 mtime := r.buf.uint() 294 length := int(r.buf.uint()) 295 296 r.fileEntries = append(r.fileEntries, &LineFile{name, mtime, length}) 297 return false, nil 298 } 299 300 // updateFile updates r.state.File after r.fileIndex has 301 // changed or r.fileEntries has changed. 302 func (r *LineReader) updateFile() { 303 if r.fileIndex < len(r.fileEntries) { 304 r.state.File = r.fileEntries[r.fileIndex] 305 } else { 306 r.state.File = nil 307 } 308 } 309 310 // Next sets *entry to the next row in this line table and moves to 311 // the next row. If there are no more entries and the line table is 312 // properly terminated, it returns io.EOF. 313 // 314 // Rows are always in order of increasing entry.Address, but 315 // entry.Line may go forward or backward. 316 func (r *LineReader) Next(entry *LineEntry) error { 317 if r.buf.err != nil { 318 return r.buf.err 319 } 320 321 // Execute opcodes until we reach an opcode that emits a line 322 // table entry. 323 for { 324 if len(r.buf.data) == 0 { 325 return io.EOF 326 } 327 emit := r.step(entry) 328 if r.buf.err != nil { 329 return r.buf.err 330 } 331 if emit { 332 return nil 333 } 334 } 335 } 336 337 // knownOpcodeLengths gives the opcode lengths (in varint arguments) 338 // of known standard opcodes. 339 var knownOpcodeLengths = map[int]int{ 340 lnsCopy: 0, 341 lnsAdvancePC: 1, 342 lnsAdvanceLine: 1, 343 lnsSetFile: 1, 344 lnsNegateStmt: 0, 345 lnsSetBasicBlock: 0, 346 lnsConstAddPC: 0, 347 lnsSetPrologueEnd: 0, 348 lnsSetEpilogueBegin: 0, 349 lnsSetISA: 1, 350 // lnsFixedAdvancePC takes a uint8 rather than a varint; it's 351 // unclear what length the header is supposed to claim, so 352 // ignore it. 353 } 354 355 // step processes the next opcode and updates r.state. If the opcode 356 // emits a row in the line table, this updates *entry and returns 357 // true. 358 func (r *LineReader) step(entry *LineEntry) bool { 359 opcode := int(r.buf.uint8()) 360 361 if opcode >= r.opcodeBase { 362 // Special opcode [DWARF2 6.2.5.1, DWARF4 6.2.5.1] 363 adjustedOpcode := opcode - r.opcodeBase 364 r.advancePC(adjustedOpcode / r.lineRange) 365 lineDelta := r.lineBase + adjustedOpcode%r.lineRange 366 r.state.Line += lineDelta 367 goto emit 368 } 369 370 switch opcode { 371 case 0: 372 // Extended opcode [DWARF2 6.2.5.3] 373 length := Offset(r.buf.uint()) 374 startOff := r.buf.off 375 opcode := r.buf.uint8() 376 377 switch opcode { 378 case lneEndSequence: 379 r.state.EndSequence = true 380 *entry = r.state 381 r.resetState() 382 383 case lneSetAddress: 384 r.state.Address = r.buf.addr() 385 386 case lneDefineFile: 387 if done, err := r.readFileEntry(); err != nil { 388 r.buf.err = err 389 return false 390 } else if done { 391 r.buf.err = DecodeError{"line", startOff, "malformed DW_LNE_define_file operation"} 392 return false 393 } 394 r.updateFile() 395 396 case lneSetDiscriminator: 397 // [DWARF4 6.2.5.3] 398 r.state.Discriminator = int(r.buf.uint()) 399 } 400 401 r.buf.skip(int(startOff + length - r.buf.off)) 402 403 if opcode == lneEndSequence { 404 return true 405 } 406 407 // Standard opcodes [DWARF2 6.2.5.2] 408 case lnsCopy: 409 goto emit 410 411 case lnsAdvancePC: 412 r.advancePC(int(r.buf.uint())) 413 414 case lnsAdvanceLine: 415 r.state.Line += int(r.buf.int()) 416 417 case lnsSetFile: 418 r.fileIndex = int(r.buf.uint()) 419 r.updateFile() 420 421 case lnsSetColumn: 422 r.state.Column = int(r.buf.uint()) 423 424 case lnsNegateStmt: 425 r.state.IsStmt = !r.state.IsStmt 426 427 case lnsSetBasicBlock: 428 r.state.BasicBlock = true 429 430 case lnsConstAddPC: 431 r.advancePC((255 - r.opcodeBase) / r.lineRange) 432 433 case lnsFixedAdvancePC: 434 r.state.Address += uint64(r.buf.uint16()) 435 436 // DWARF3 standard opcodes [DWARF3 6.2.5.2] 437 case lnsSetPrologueEnd: 438 r.state.PrologueEnd = true 439 440 case lnsSetEpilogueBegin: 441 r.state.EpilogueBegin = true 442 443 case lnsSetISA: 444 r.state.ISA = int(r.buf.uint()) 445 446 default: 447 // Unhandled standard opcode. Skip the number of 448 // arguments that the prologue says this opcode has. 449 for i := 0; i < r.opcodeLengths[opcode]; i++ { 450 r.buf.uint() 451 } 452 } 453 return false 454 455 emit: 456 *entry = r.state 457 r.state.BasicBlock = false 458 r.state.PrologueEnd = false 459 r.state.EpilogueBegin = false 460 r.state.Discriminator = 0 461 return true 462 } 463 464 // advancePC advances "operation pointer" (the combination of Address 465 // and OpIndex) in r.state by opAdvance steps. 466 func (r *LineReader) advancePC(opAdvance int) { 467 opIndex := r.state.OpIndex + opAdvance 468 r.state.Address += uint64(r.minInstructionLength * (opIndex / r.maxOpsPerInstruction)) 469 r.state.OpIndex = opIndex % r.maxOpsPerInstruction 470 } 471 472 // A LineReaderPos represents a position in a line table. 473 type LineReaderPos struct { 474 // off is the current offset in the DWARF line section. 475 off Offset 476 // numFileEntries is the length of fileEntries. 477 numFileEntries int 478 // state and fileIndex are the statement machine state at 479 // offset off. 480 state LineEntry 481 fileIndex int 482 } 483 484 // Tell returns the current position in the line table. 485 func (r *LineReader) Tell() LineReaderPos { 486 return LineReaderPos{r.buf.off, len(r.fileEntries), r.state, r.fileIndex} 487 } 488 489 // Seek restores the line table reader to a position returned by Tell. 490 // 491 // The argument pos must have been returned by a call to Tell on this 492 // line table. 493 func (r *LineReader) Seek(pos LineReaderPos) { 494 r.buf.off = pos.off 495 r.buf.data = r.section[r.buf.off:r.endOffset] 496 r.fileEntries = r.fileEntries[:pos.numFileEntries] 497 r.state = pos.state 498 r.fileIndex = pos.fileIndex 499 } 500 501 // Reset repositions the line table reader at the beginning of the 502 // line table. 503 func (r *LineReader) Reset() { 504 // Reset buffer to the line number program offset. 505 r.buf.off = r.programOffset 506 r.buf.data = r.section[r.buf.off:r.endOffset] 507 508 // Reset file entries list. 509 r.fileEntries = r.fileEntries[:r.initialFileEntries] 510 511 // Reset line number program state. 512 r.resetState() 513 } 514 515 // resetState resets r.state to its default values 516 func (r *LineReader) resetState() { 517 // Reset the state machine registers to the defaults given in 518 // [DWARF4 6.2.2]. 519 r.state = LineEntry{ 520 Address: 0, 521 OpIndex: 0, 522 File: nil, 523 Line: 1, 524 Column: 0, 525 IsStmt: r.defaultIsStmt, 526 BasicBlock: false, 527 PrologueEnd: false, 528 EpilogueBegin: false, 529 ISA: 0, 530 Discriminator: 0, 531 } 532 r.fileIndex = 1 533 r.updateFile() 534 } 535 536 // ErrUnknownPC is the error returned by LineReader.ScanPC when the 537 // seek PC is not covered by any entry in the line table. 538 var ErrUnknownPC = errors.New("ErrUnknownPC") 539 540 // SeekPC sets *entry to the LineEntry that includes pc and positions 541 // the reader on the next entry in the line table. If necessary, this 542 // will seek backwards to find pc. 543 // 544 // If pc is not covered by any entry in this line table, SeekPC 545 // returns ErrUnknownPC. In this case, *entry and the final seek 546 // position are unspecified. 547 // 548 // Note that DWARF line tables only permit sequential, forward scans. 549 // Hence, in the worst case, this takes time linear in the size of the 550 // line table. If the caller wishes to do repeated fast PC lookups, it 551 // should build an appropriate index of the line table. 552 func (r *LineReader) SeekPC(pc uint64, entry *LineEntry) error { 553 if err := r.Next(entry); err != nil { 554 return err 555 } 556 if entry.Address > pc { 557 // We're too far. Start at the beginning of the table. 558 r.Reset() 559 if err := r.Next(entry); err != nil { 560 return err 561 } 562 if entry.Address > pc { 563 // The whole table starts after pc. 564 r.Reset() 565 return ErrUnknownPC 566 } 567 } 568 569 // Scan until we pass pc, then back up one. 570 for { 571 var next LineEntry 572 pos := r.Tell() 573 if err := r.Next(&next); err != nil { 574 if err == io.EOF { 575 return ErrUnknownPC 576 } 577 return err 578 } 579 if next.Address > pc { 580 if entry.EndSequence { 581 // pc is in a hole in the table. 582 return ErrUnknownPC 583 } 584 // entry is the desired entry. Back up the 585 // cursor to "next" and return success. 586 r.Seek(pos) 587 return nil 588 } 589 *entry = next 590 } 591 } 592 593 // pathIsAbs returns whether path is an absolute path (or "full path 594 // name" in DWARF parlance). This is in "whatever form makes sense for 595 // the host system", so this accepts both UNIX-style and DOS-style 596 // absolute paths. We avoid the filepath package because we want this 597 // to behave the same regardless of our host system and because we 598 // don't know what system the paths came from. 599 func pathIsAbs(path string) bool { 600 _, path = splitDrive(path) 601 return len(path) > 0 && (path[0] == '/' || path[0] == '\\') 602 } 603 604 // pathJoin joins dirname and filename. filename must be relative. 605 // DWARF paths can be UNIX-style or DOS-style, so this handles both. 606 func pathJoin(dirname, filename string) string { 607 if len(dirname) == 0 { 608 return filename 609 } 610 // dirname should be absolute, which means we can determine 611 // whether it's a DOS path reasonably reliably by looking for 612 // a drive letter or UNC path. 613 drive, dirname := splitDrive(dirname) 614 if drive == "" { 615 // UNIX-style path. 616 return path.Join(dirname, filename) 617 } 618 // DOS-style path. 619 drive2, filename := splitDrive(filename) 620 if drive2 != "" { 621 if strings.ToLower(drive) != strings.ToLower(drive2) { 622 // Different drives. There's not much we can 623 // do here, so just ignore the directory. 624 return drive2 + filename 625 } 626 // Drives are the same. Ignore drive on filename. 627 } 628 if !(strings.HasSuffix(dirname, "/") || strings.HasSuffix(dirname, `\`)) && dirname != "" { 629 dirname += `\` 630 } 631 return drive + dirname + filename 632 } 633 634 // splitDrive splits the DOS drive letter or UNC share point from 635 // path, if any. path == drive + rest 636 func splitDrive(path string) (drive, rest string) { 637 if len(path) >= 2 && path[1] == ':' { 638 if c := path[0]; 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' { 639 return path[:2], path[2:] 640 } 641 } 642 if len(path) > 3 && (path[0] == '\\' || path[0] == '/') && (path[1] == '\\' || path[1] == '/') { 643 // Normalize the path so we can search for just \ below. 644 npath := strings.Replace(path, "/", `\`, -1) 645 // Get the host part, which must be non-empty. 646 slash1 := strings.IndexByte(npath[2:], '\\') + 2 647 if slash1 > 2 { 648 // Get the mount-point part, which must be non-empty. 649 slash2 := strings.IndexByte(npath[slash1+1:], '\\') + slash1 + 1 650 if slash2 > slash1 { 651 return path[:slash2], path[slash2:] 652 } 653 } 654 } 655 return "", path 656 }