golang.org/x/arch@v0.17.0/x86/x86spec/parse.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package main 6 7 import ( 8 "bytes" 9 "errors" 10 "fmt" 11 "io" 12 "log" 13 "math" 14 "os" 15 "reflect" 16 "regexp" 17 "sort" 18 "strconv" 19 "strings" 20 "time" 21 22 "rsc.io/pdf" 23 ) 24 25 // listing holds information about one or more parsed manual pages 26 // concerning a single instruction listing. 27 type listing struct { 28 pageNum int 29 name string // instruction heading 30 mtables [][][]string // mnemonic tables (at most one per page) 31 enctables [][][]string // encoding tables (at most one per page) 32 compat string 33 } 34 35 type logReaderAt struct { 36 f io.ReaderAt 37 } 38 39 func (l *logReaderAt) ReadAt(x []byte, off int64) (int, error) { 40 log.Printf("read %d @ %d", len(x), off) 41 return l.f.ReadAt(x, off) 42 } 43 44 const ( 45 cacheBlockSize = 64 * 1024 46 numCacheBlock = 16 47 ) 48 49 type cachedReaderAt struct { 50 r io.ReaderAt 51 cache *cacheBlock 52 } 53 54 type cacheBlock struct { 55 next *cacheBlock 56 buf []byte 57 offset int64 58 err error 59 } 60 61 func newCachedReaderAt(r io.ReaderAt) *cachedReaderAt { 62 c := &cachedReaderAt{ 63 r: r, 64 } 65 for i := 0; i < numCacheBlock; i++ { 66 c.cache = &cacheBlock{next: c.cache} 67 } 68 return c 69 } 70 71 func (c *cachedReaderAt) ReadAt(p []byte, offset int64) (n int, err error) { 72 // Assume large reads indicate a caller that doesn't need caching. 73 if len(p) >= cacheBlockSize { 74 return c.r.ReadAt(p, offset) 75 } 76 77 for n < len(p) { 78 o := offset + int64(n) 79 f := o & (cacheBlockSize - 1) 80 b := c.readBlock(o - f) 81 n += copy(p[n:], b.buf[f:]) 82 if n < len(p) && b.err != nil { 83 return n, b.err 84 } 85 } 86 return n, nil 87 } 88 89 var errShortRead = errors.New("short read") 90 91 func (c *cachedReaderAt) readBlock(offset int64) *cacheBlock { 92 if offset&(cacheBlockSize-1) != 0 { 93 panic("misuse of cachedReaderAt.readBlock") 94 } 95 96 // Look in cache. 97 var b, prev *cacheBlock 98 for b = c.cache; ; prev, b = b, b.next { 99 if b.buf != nil && b.offset == offset { 100 // Move to front. 101 if prev != nil { 102 prev.next = b.next 103 b.next = c.cache 104 c.cache = b 105 } 106 return b 107 } 108 if b.next == nil { 109 break 110 } 111 } 112 113 // Otherwise b is LRU block in cache, prev points at b. 114 if b.buf == nil { 115 b.buf = make([]byte, cacheBlockSize) 116 } 117 b.offset = offset 118 n, err := c.r.ReadAt(b.buf[:cacheBlockSize], offset) 119 b.buf = b.buf[:n] 120 b.err = err 121 if n > 0 { 122 // Move to front. 123 prev.next = nil 124 b.next = c.cache 125 c.cache = b 126 } 127 return b 128 } 129 130 func pdfOpen(name string) (*pdf.Reader, error) { 131 f, err := os.Open(name) 132 if err != nil { 133 return nil, err 134 } 135 fi, err := f.Stat() 136 if err != nil { 137 f.Close() 138 return nil, err 139 } 140 return pdf.NewReader(newCachedReaderAt(f), fi.Size()) 141 } 142 143 func parse() []*instruction { 144 var insts []*instruction 145 146 f, err := pdfOpen(*flagFile) 147 if err != nil { 148 log.Fatal(err) 149 } 150 151 // Find instruction set reference in outline, to build instruction list. 152 instList := instHeadings(f.Outline()) 153 if len(instList) < 200 { 154 log.Fatalf("only found %d instructions in table of contents", len(instList)) 155 } 156 157 // Scan document looking for instructions. 158 // Must find exactly the ones in the outline. 159 n := f.NumPage() 160 var current *listing 161 finishInstruction := func() { 162 if current == nil { 163 return 164 } 165 if len(current.mtables) == 0 || len(current.mtables[0]) <= 1 { 166 fmt.Fprintf(os.Stderr, "p.%d: no mnemonics for instruction %q\n", current.pageNum, current.name) 167 } 168 processListing(current, &insts) 169 current = nil 170 } 171 172 for pageNum := 1; pageNum <= n; pageNum++ { 173 if onlySomePages && !isDebugPage(pageNum) { 174 continue 175 } 176 p := f.Page(pageNum) 177 parsed := parsePage(p, pageNum) 178 if parsed.name != "" { 179 finishInstruction() 180 for j, headline := range instList { 181 if parsed.name == headline { 182 instList[j] = "" 183 current = parsed 184 break 185 } 186 } 187 if current == nil { 188 fmt.Fprintf(os.Stderr, "p.%d: unexpected instruction %q\n", pageNum, parsed.name) 189 } 190 continue 191 } 192 if current != nil { 193 merge(current, parsed) 194 continue 195 } 196 if parsed.mtables != nil { 197 fmt.Fprintf(os.Stderr, "p.%d: unexpected mnemonic table\n", pageNum) 198 } 199 if parsed.enctables != nil { 200 fmt.Fprintf(os.Stderr, "p.%d: unexpected encoding table\n", pageNum) 201 } 202 if parsed.compat != "" { 203 fmt.Fprintf(os.Stderr, "p.%d: unexpected compatibility statement\n", pageNum) 204 } 205 } 206 finishInstruction() 207 208 if !onlySomePages { 209 for _, headline := range instList { 210 if headline != "" { 211 fmt.Fprintf(os.Stderr, "missing instruction %q\n", headline) 212 } 213 } 214 } 215 216 return insts 217 } 218 219 // isDebugPage reports whether the -debugpage flag mentions page n. 220 // The argument is a comma-separated list of pages. 221 // Maybe some day it will support ranges. 222 func isDebugPage(n int) bool { 223 s := *flagDebugPage 224 var k int 225 for i := 0; ; i++ { 226 if i == len(s) || s[i] == ',' { 227 if n == k { 228 return true 229 } 230 k = 0 231 } 232 if i == len(s) { 233 break 234 } 235 if '0' <= s[i] && s[i] <= '9' { 236 k = k*10 + int(s[i]) - '0' 237 } 238 } 239 return false 240 } 241 242 // merge merges the content of y into the running collection in x. 243 func merge(x, y *listing) { 244 if y.name != "" { 245 fmt.Fprintf(os.Stderr, "p.%d: merging page incorrectly\n", y.pageNum) 246 return 247 } 248 249 x.mtables = append(x.mtables, y.mtables...) 250 x.enctables = append(x.enctables, y.enctables...) 251 x.compat += y.compat 252 } 253 254 // instHeadings returns the list of instruction headings from the table of contents. 255 // When we parse the pages we expect to find every one of these. 256 func instHeadings(outline pdf.Outline) []string { 257 return appendInstHeadings(outline, nil) 258 } 259 260 var instRE = regexp.MustCompile(`\d Instructions \([A-Z]-[A-Z]\)|VMX Instructions|Instruction SET Reference|SHA Extensions Reference`) 261 262 // The headings are inconsistent about dash and superscript usage. Normalize. 263 var fixDash = strings.NewReplacer( 264 "Compute 2 –1", "Compute 2^x-1", 265 "Compute 2x-1", "Compute 2^x-1", 266 "Compute 2x–1", "Compute 2^x-1", 267 "/ FUCOMI", "/FUCOMI", 268 "Compute y ∗ log x", "Compute y * log₂x", 269 "Compute y * log2x", "Compute y * log₂x", 270 "Compute y * log2(x +1)", "Compute y * log₂(x+1)", 271 "Compute y ∗ log (x +1)", "Compute y * log₂(x+1)", 272 " — ", "-", 273 "— ", "-", 274 " —", "-", 275 "—", "-", 276 " – ", "-", 277 " –", "-", 278 "– ", "-", 279 "–", "-", 280 " - ", "-", 281 "- ", "-", 282 " -", "-", 283 ) 284 285 func appendInstHeadings(outline pdf.Outline, list []string) []string { 286 if instRE.MatchString(outline.Title) { 287 for _, child := range outline.Child { 288 list = append(list, fixDash.Replace(child.Title)) 289 } 290 } 291 for _, child := range outline.Child { 292 list = appendInstHeadings(child, list) 293 } 294 return list 295 } 296 297 var dateRE = regexp.MustCompile(`\b(January|February|March|April|May|June|July|August|September|October|November|December) ((19|20)[0-9][0-9])\b`) 298 299 // parsePage parses a single PDF page and returns the content it found. 300 func parsePage(p pdf.Page, pageNum int) *listing { 301 if debugging { 302 fmt.Fprintf(os.Stderr, "DEBUG: parsing page %d\n", pageNum) 303 } 304 305 parsed := new(listing) 306 parsed.pageNum = pageNum 307 308 content := p.Content() 309 310 for i, t := range content.Text { 311 if match(t, "Symbol", 11, "≠") { 312 t.Font = "NeoSansIntel" 313 t.FontSize = 9 314 content.Text[i] = t 315 } 316 if t.S == "*" || t.S == "**" || t.S == "***" || t.S == "," && t.Font == "Arial" && t.FontSize < 9 || t.S == "1" && t.Font == "Arial" { 317 t.Font = "NeoSansIntel" 318 t.FontSize = 9 319 if i+1 < len(content.Text) { 320 t.Y = content.Text[i+1].Y 321 } 322 content.Text[i] = t 323 } 324 } 325 326 text := findWords(content.Text) 327 328 for i, t := range text { 329 if match(t, "NeoSansIntel", 8, ".WIG") || match(t, "NeoSansIntel", 8, "AVX2") { 330 t.FontSize = 9 331 text[i] = t 332 } 333 if t.Font == "NeoSansIntel-Medium" { 334 t.Font = "NeoSansIntelMedium" 335 text[i] = t 336 } 337 if t.Font == "NeoSansIntel-Italic" { 338 t.Font = "NeoSansIntel,Italic" 339 text[i] = t 340 } 341 } 342 343 if debugging { 344 for _, t := range text { 345 fmt.Println(t) 346 } 347 } 348 349 if pageNum == 1 { 350 var buf bytes.Buffer 351 for _, t := range text { 352 buf.WriteString(t.S + "\n") 353 } 354 all := buf.String() 355 m := regexp.MustCompile(`Order Number: ([\w-\-]+)`).FindStringSubmatch(all) 356 num := "???" 357 if m != nil { 358 num = m[1] 359 } 360 date := dateRE.FindString(all) 361 if date == "" { 362 date = "???" 363 } 364 365 fmt.Printf("# x86 instruction set description version %s, %s\n", 366 specFormatVersion, time.Now().Format("2006-01-02")) 367 fmt.Printf("# Based on Intel Instruction Set Reference #%s, %s.\n", num, date) 368 fmt.Printf("# https://golang.org/x/arch/x86/x86spec\n") 369 } 370 371 // Remove text we should ignore. 372 out := text[:0] 373 for _, t := range text { 374 if shouldIgnore(t) { 375 continue 376 } 377 out = append(out, t) 378 } 379 text = out 380 381 // Page header must say instruction set reference. 382 if len(text) == 0 { 383 return parsed 384 } 385 if (!match(text[0], "NeoSansIntel", 9, "INSTRUCTION") || !match(text[0], "NeoSansIntel", 9, "REFERENCE")) && 386 !match(text[0], "NeoSansIntel", 9, "EXTENSIONS") { 387 return parsed 388 } 389 text = text[1:] 390 391 enctable := findEncodingTable(text) 392 if enctable != nil { 393 parsed.enctables = append(parsed.enctables, enctable) 394 } 395 396 parsed.compat = findCompat(text) 397 398 // Narrow scope for finding mnemonic table. 399 // Must be last, since it trims text. 400 // Next line is headline. Can wrap to multiple lines. 401 if len(text) == 0 || !match(text[0], "NeoSansIntelMedium", 12, "") || !isInstHeadline(text[0].S) { 402 if debugging { 403 fmt.Fprintf(os.Stderr, "non-inst-headline: %v\n", text[0]) 404 } 405 } else { 406 parsed.name = text[0].S 407 text = text[1:] 408 for len(text) > 0 && match(text[0], "NeoSansIntelMedium", 12, "") { 409 parsed.name += " " + text[0].S 410 text = text[1:] 411 } 412 parsed.name = fixDash.Replace(parsed.name) 413 } 414 415 // Table follows; heading is NeoSansIntelMedium and rows are NeoSansIntel. 416 i := 0 417 for i < len(text) && match(text[i], "NeoSansIntelMedium", 9, "") { 418 i++ 419 } 420 for i < len(text) && match(text[i], "NeoSansIntel", 9, "") && text[i].S != "NOTES:" { 421 i++ 422 } 423 424 mtable := findMnemonicTable(text[:i]) 425 if mtable != nil { 426 parsed.mtables = append(parsed.mtables, mtable) 427 } 428 429 return parsed 430 } 431 432 func match(t pdf.Text, font string, size float64, substr string) bool { 433 return t.Font == font && math.Abs(t.FontSize-size) < 0.1 && strings.Contains(t.S, substr) 434 } 435 436 func shouldIgnore(t pdf.Text) bool { 437 // Ignore footnote stars, which are in Arial. 438 // Also, the page describing MOVS has a tiny 2pt Arial backslash. 439 if (t.S == "*" || t.S == "\\") && strings.HasPrefix(t.Font, "Arial") { 440 return true 441 } 442 443 // Ignore superscript numbers, superscript ST(0), and superscript x. 444 if len(t.S) == 1 && '1' <= t.S[0] && t.S[0] <= '9' || t.S == "ST(0)" || t.S == "x" { 445 if match(t, "NeoSansIntel", 7.2, "") || match(t, "NeoSansIntel", 5.6, "") || match(t, "NeoSansIntelMedium", 8, "") || match(t, "NeoSansIntelMedium", 9.6, "") { 446 return true 447 } 448 } 449 450 return false 451 } 452 453 func isInstHeadline(s string) bool { 454 return strings.Contains(s, "—") || 455 strings.Contains(s, " - ") || 456 strings.Contains(s, "PTEST- Logical Compare") 457 } 458 459 func findWords(chars []pdf.Text) (words []pdf.Text) { 460 // Sort by Y coordinate and normalize. 461 const nudge = 1 462 sort.Sort(pdf.TextVertical(chars)) 463 old := -100000.0 464 for i, c := range chars { 465 if c.Y != old && math.Abs(old-c.Y) < nudge { 466 chars[i].Y = old 467 } else { 468 old = c.Y 469 } 470 } 471 472 // Sort by Y coordinate, breaking ties with X. 473 // This will bring letters in a single word together. 474 sort.Sort(pdf.TextVertical(chars)) 475 476 // Loop over chars. 477 for i := 0; i < len(chars); { 478 // Find all chars on line. 479 j := i + 1 480 for j < len(chars) && chars[j].Y == chars[i].Y { 481 j++ 482 } 483 var end float64 484 // Split line into words (really, phrases). 485 for k := i; k < j; { 486 ck := &chars[k] 487 s := ck.S 488 end = ck.X + ck.W 489 charSpace := ck.FontSize / 6 490 wordSpace := ck.FontSize * 2 / 3 491 l := k + 1 492 for l < j { 493 // Grow word. 494 cl := &chars[l] 495 if sameFont(cl.Font, ck.Font) && cl.FontSize == ck.FontSize && cl.X <= end+charSpace { 496 s += cl.S 497 end = cl.X + cl.W 498 l++ 499 continue 500 } 501 // Add space to phrase before next word. 502 if sameFont(cl.Font, ck.Font) && cl.FontSize == ck.FontSize && cl.X <= end+wordSpace { 503 s += " " + cl.S 504 end = cl.X + cl.W 505 l++ 506 continue 507 } 508 break 509 } 510 f := ck.Font 511 f = strings.TrimSuffix(f, ",Italic") 512 f = strings.TrimSuffix(f, "-Italic") 513 words = append(words, pdf.Text{ 514 Font: f, 515 FontSize: ck.FontSize, 516 X: ck.X, 517 Y: ck.Y, 518 W: end, 519 S: s, 520 }) 521 k = l 522 } 523 i = j 524 } 525 526 return words 527 } 528 529 func sameFont(f1, f2 string) bool { 530 f1 = strings.TrimSuffix(f1, ",Italic") 531 f1 = strings.TrimSuffix(f1, "-Italic") 532 f2 = strings.TrimSuffix(f1, ",Italic") 533 f2 = strings.TrimSuffix(f1, "-Italic") 534 return strings.TrimSuffix(f1, ",Italic") == strings.TrimSuffix(f2, ",Italic") || f1 == "Symbol" || f2 == "Symbol" || f1 == "TimesNewRoman" || f2 == "TimesNewRoman" 535 } 536 537 func findMnemonicTable(text []pdf.Text) [][]string { 538 sort.Sort(pdf.TextHorizontal(text)) 539 540 const nudge = 1 541 542 old := -100000.0 543 var col []float64 544 for i, t := range text { 545 if t.Font != "NeoSansIntelMedium" { // only headings count 546 continue 547 } 548 if t.X != old && math.Abs(old-t.X) < nudge { 549 text[i].X = old 550 } else if t.X != old { 551 old = t.X 552 col = append(col, old) 553 } 554 } 555 sort.Sort(pdf.TextVertical(text)) 556 557 if len(col) == 0 { 558 return nil 559 } 560 561 y := -100000.0 562 var table [][]string 563 var line []string 564 bold := -1 565 for _, t := range text { 566 if t.Y != y { 567 table = append(table, make([]string, len(col))) 568 line = table[len(table)-1] 569 y = t.Y 570 if t.Font == "NeoSansIntelMedium" { 571 bold = len(table) - 1 572 } 573 } 574 i := 0 575 for i+1 < len(col) && col[i+1] <= t.X+nudge { 576 i++ 577 } 578 if line[i] != "" { 579 line[i] += " " 580 } 581 line[i] += t.S 582 } 583 584 var mtable [][]string 585 for i, t := range table { 586 if 0 < i && i <= bold || bold < i && halfMissing(t) { 587 // merge with earlier line 588 last := mtable[len(mtable)-1] 589 for j, s := range t { 590 if s != "" { 591 last[j] += "\n" + s 592 } 593 } 594 } else { 595 mtable = append(mtable, t) 596 } 597 } 598 599 if bold >= 0 { 600 heading := mtable[0] 601 for i, x := range heading { 602 heading[i] = fixHeading.Replace(x) 603 } 604 } 605 606 return mtable 607 } 608 609 var fixHeading = strings.NewReplacer( 610 "64/32-\nbit\nMode", "64/32-Bit Mode", 611 "64/32-\nbit Mode", "64/32-Bit Mode", 612 "64/32-bit\nMode", "64/32-Bit Mode", 613 "64/3\n2-bit\nMode", "64/32-Bit Mode", 614 "64/32 bit\nMode\nSupport", "64/32-Bit Mode", 615 "64/32bit\nMode\nSupport", "64/32-Bit Mode", 616 "64/32\n-bit\nMode", "64/32-Bit Mode", 617 "64/32\nbit Mode\nSupport", "64/32-Bit Mode", 618 "64-Bit\nMode", "64-Bit Mode", 619 "64-bit\nMode", "64-Bit Mode", 620 621 "Op/ En", "Op/En", 622 "Op/\nEn", "Op/En", 623 "Op/\nEN", "Op/En", 624 "Op /\nEn", "Op/En", 625 "Opcode***", "Opcode", 626 "Opcode**", "Opcode", 627 "Opcode*", "Opcode", 628 "/\nInstruction", "/Instruction", 629 630 "CPUID Fea-\nture Flag", "CPUID Feature Flag", 631 "CPUID\nFeature\nFlag", "CPUID Feature Flag", 632 "CPUID\nFeature Flag", "CPUID Feature Flag", 633 "CPUIDFeature\nFlag", "CPUID Feature Flag", 634 635 "Compat/\nLeg Mode*", "Compat/Leg Mode", 636 "Compat/\nLeg Mode", "Compat/Leg Mode", 637 "Compat/ *\nLeg Mode", "Compat/Leg Mode", 638 ) 639 640 func halfMissing(x []string) bool { 641 n := 0 642 for _, s := range x { 643 if s == "" { 644 n++ 645 } 646 } 647 return n >= len(x)/2 648 } 649 650 func findEncodingTable(text []pdf.Text) [][]string { 651 // Look for operand encoding table. 652 sort.Sort(pdf.TextVertical(text)) 653 var col []float64 654 sawTitle := false 655 656 center := func(t pdf.Text) float64 { 657 return t.X + t.W/2 658 } 659 660 start := 0 661 end := len(text) 662 for i, t := range text { 663 if match(t, "NeoSansIntelMedium", 10, "Instruction Operand Encoding") { 664 sawTitle = true 665 start = i + 1 666 continue 667 } 668 if !sawTitle { 669 continue 670 } 671 if match(t, "NeoSansIntel", 9, "Op/En") || match(t, "NeoSansIntel", 9, "Operand") { 672 if debugging { 673 fmt.Printf("column %d at %.2f: %v\n", len(col), center(t), t) 674 } 675 col = append(col, center(t)) 676 } 677 if match(t, "NeoSansIntelMedium", 10, "Description") { 678 end = i 679 break 680 } 681 } 682 text = text[start:end] 683 684 if len(col) == 0 { 685 return nil 686 } 687 688 const nudge = 20 689 690 y := -100000.0 691 var table [][]string 692 var line []string 693 for _, t := range text { 694 if t.Y != y { 695 table = append(table, make([]string, len(col))) 696 line = table[len(table)-1] 697 y = t.Y 698 } 699 i := 0 700 x := center(t) 701 for i+1 < len(col) && col[i+1] <= x+nudge { 702 i++ 703 } 704 if debugging { 705 fmt.Printf("text at %.2f: %v => %d\n", x, t, i) 706 } 707 if line[i] != "" { 708 line[i] += " " 709 } 710 line[i] += t.S 711 } 712 713 out := table[:0] 714 for _, line := range table { 715 if strings.HasPrefix(line[len(line)-1], "Vol. 2") { // page footer 716 continue 717 } 718 if line[0] == "" && len(out) > 0 { 719 last := out[len(out)-1] 720 for i, col := range line { 721 if col != "" { 722 last[i] += " " + col 723 } 724 } 725 continue 726 } 727 out = append(out, line) 728 } 729 table = out 730 731 return table 732 } 733 734 func findCompat(text []pdf.Text) string { 735 sort.Sort(pdf.TextVertical(text)) 736 737 inCompat := false 738 out := "" 739 for _, t := range text { 740 if match(t, "NeoSansIntelMedium", 10, "") { 741 inCompat = strings.Contains(t.S, "Architecture Compatibility") 742 if inCompat { 743 out += t.S + "\n" 744 } 745 } 746 if inCompat && match(t, "Verdana", 9, "") || strings.Contains(t.S, "were introduced") { 747 out += t.S + "\n" 748 } 749 } 750 return out 751 } 752 753 func processListing(p *listing, insts *[]*instruction) { 754 if debugging { 755 for _, table := range p.mtables { 756 fmt.Printf("table:\n") 757 for _, row := range table { 758 fmt.Printf("%q\n", row) 759 } 760 } 761 fmt.Printf("enctable:\n") 762 for _, table := range p.enctables { 763 for _, row := range table { 764 fmt.Printf("%q\n", row) 765 } 766 } 767 fmt.Printf("compat:\n%s", p.compat) 768 } 769 770 if *flagCompat && p.compat != "" { 771 fmt.Printf("# p.%d: %s\n#\t%s\n", p.pageNum, p.name, strings.Replace(p.compat, "\n", "\n#\t", -1)) 772 } 773 774 encs := make(map[string][]string) 775 for _, table := range p.enctables { 776 for _, row := range table[1:] { 777 for len(row) > 1 && (row[len(row)-1] == "NA" || row[len(row)-1] == "" || row[len(row)-1] == " source") { 778 row = row[:len(row)-1] 779 } 780 encs[row[0]] = row[1:] 781 } 782 } 783 784 var wrong string 785 for _, table := range p.mtables { 786 heading := table[0] 787 for _, row := range table[1:] { 788 if row[0] == heading[0] && reflect.DeepEqual(row, heading) { 789 continue 790 } 791 if len(row) >= 5 && row[1] == "CMOVG r64, r/m64" && row[3] == "V/N.E." && row[4] == "NA" { 792 row[3] = "V" 793 row[4] = "N.E." 794 } 795 inst := new(instruction) 796 inst.page = p.pageNum 797 inst.compat = strings.Join(strings.Fields(p.compat), " ") 798 for i, hdr := range heading { 799 x := row[i] 800 x = strings.Replace(x, "\n", " ", -1) 801 switch strings.TrimSpace(hdr) { 802 default: 803 wrong = "unexpected header: " + strconv.Quote(hdr) 804 goto BadTable 805 case "Opcode/Instruction": 806 x = row[i] 807 if strings.HasPrefix(x, "\nVEX") { 808 x = x[1:] 809 row[i] = x 810 } 811 if strings.Contains(x, "\n/r ") { 812 x = strings.Replace(x, "\n/r ", " /r ", -1) 813 row[i] = x 814 } 815 if strings.Contains(x, ",\nimm") { 816 x = strings.Replace(x, ",\nimm", ", imm", -1) 817 row[i] = x 818 } 819 if strings.Count(x, "\n") < 1 { 820 wrong = "bad Opcode/Instruction pairing: " + strconv.Quote(x) 821 goto BadTable 822 } 823 i := strings.Index(x, "\n") 824 inst.opcode = x[:i] 825 inst.syntax = strings.Replace(x[i+1:], "\n", " ", -1) 826 827 case "Opcode": 828 inst.opcode = x 829 830 case "Instruction": 831 inst.syntax = x 832 833 case "Op/En": 834 inst.args = encs[x] 835 if inst.args == nil && len(encs) == 1 && encs["A"] != nil { 836 inst.args = encs["A"] 837 } 838 // In the December 2015 manual, PREFETCHW says 839 // encoding A but the table gives encoding M. 840 if inst.args == nil && inst.syntax == "PREFETCHW m8" && x == "A" && len(encs) == 1 && encs["M"] != nil { 841 inst.args = encs["M"] 842 } 843 844 case "64-Bit Mode": 845 x, ok := parseMode(x) 846 if !ok { 847 wrong = "unexpected value for 64-Bit Mode column: " + x 848 goto BadTable 849 } 850 inst.valid64 = x 851 852 case "Compat/Leg Mode": 853 x, ok := parseMode(x) 854 if !ok { 855 wrong = "unexpected value for Compat/Leg Mode column: " + x 856 goto BadTable 857 } 858 inst.valid32 = x 859 860 case "64/32-Bit Mode": 861 i := strings.Index(x, "/") 862 if i < 0 { 863 wrong = "unexpected value for 64/32-Bit Mode column: " + x 864 goto BadTable 865 } 866 x1, ok1 := parseMode(x[:i]) 867 x2, ok2 := parseMode(x[i+1:]) 868 if !ok1 || !ok2 { 869 wrong = "unexpected value for 64/32-Bit Mode column: " + x 870 goto BadTable 871 } 872 inst.valid64 = x1 873 inst.valid32 = x2 874 875 case "CPUID Feature Flag": 876 inst.cpuid = x 877 878 case "Description": 879 if inst.desc != "" { 880 inst.desc += " " 881 } 882 inst.desc += x 883 } 884 } 885 886 // Fixup various typos or bugs in opcode descriptions. 887 if inst.opcode == "VEX.128.66.0F.W0 6E /" { 888 inst.opcode += "r" 889 } 890 fix := func(old, new string) { 891 inst.opcode = strings.Replace(inst.opcode, old, new, -1) 892 } 893 fix(" imm8", " ib") 894 fix("REX.w", "REX.W") 895 fix("REX.W+", "REX.W +") 896 fix(" 0f ", " 0F ") 897 fix(". 0F38", ".0F38") 898 fix("0F .WIG", "0F.WIG") 899 fix("0F38 .WIG", "0F38.WIG") 900 fix("NDS .LZ", "NDS.LZ") 901 fix("58+ r", "58+r") 902 fix("B0+ ", "B0+") 903 fix("B8+ ", "B8+") 904 fix("40+ ", "40+") 905 fix("*", "") 906 fix(",", " ") 907 fix("/", " /") 908 fix("REX.W +", "REX.W") 909 fix("REX +", "REX") 910 fix("REX 0F BE", "REX.W 0F BE") 911 fix("REX 0F B2", "REX.W 0F B2") 912 fix("REX 0F B4", "REX.W 0F B4") 913 fix("REX 0F B5", "REX.W 0F B5") 914 fix("0F38.0", "0F38.W0") 915 fix(".660F.", ".66.0F.") 916 fix("VEX128", "VEX.128") 917 fix("0F3A.W0.1D", "0F3A.W0 1D") 918 919 inst.opcode = strings.Join(strings.Fields(inst.opcode), " ") 920 921 fix = func(old, new string) { 922 inst.syntax = strings.Replace(inst.syntax, old, new, -1) 923 } 924 fix("xmm1 xmm2", "xmm1, xmm2") 925 fix("r16/m16", "r/m16") 926 fix("r32/m161", "r32/m16") // really r32/m16¹ (footnote) 927 fix("r32/m32", "r/m32") 928 fix("r64/m64", "r/m64") 929 fix("\u2013", "-") 930 fix("mm3 /m", "mm3/m") 931 fix("mm3/.m", "mm3/m") 932 inst.syntax = joinSyntax(splitSyntax(inst.syntax)) 933 934 fix = func(old, new string) { 935 inst.cpuid = strings.Replace(inst.cpuid, old, new, -1) 936 } 937 fix("PCLMUL- QDQ", "PCLMULQDQ") 938 fix("PCL- MULQDQ", "PCLMULQDQ") 939 fix("Both PCLMULQDQ and AVX flags", "PCLMULQDQ+AVX") 940 941 if !instBlacklist[inst.syntax] { 942 *insts = append(*insts, inst) 943 } 944 } 945 } 946 return 947 948 BadTable: 949 fmt.Fprintf(os.Stderr, "p.%d: reading %v: %v\n", p.pageNum, p.name, wrong) 950 for _, table := range p.mtables { 951 for _, t := range table { 952 fmt.Fprintf(os.Stderr, "\t%q\n", t) 953 } 954 } 955 fmt.Fprintf(os.Stderr, "\n") 956 } 957 958 func parseMode(s string) (string, bool) { 959 switch strings.TrimSpace(s) { 960 case "Invalid", "Invalid*", "Inv.", "I", "i": 961 return "I", true 962 case "Valid", "Valid*", "V": 963 return "V", true 964 case "N.E.", "NE", "N. E.": 965 return "N.E.", true 966 case "N.P.", "N. P.": 967 return "N.P.", true 968 case "N.S.", "N. S.": 969 return "N.S.", true 970 case "N.I.", "N. I.": 971 return "N.I.", true 972 } 973 return s, false 974 } 975 976 func splitSyntax(syntax string) (op string, args []string) { 977 i := strings.Index(syntax, " ") 978 if i < 0 { 979 return syntax, nil 980 } 981 op, syntax = syntax[:i], syntax[i+1:] 982 args = strings.Split(syntax, ",") 983 for i, arg := range args { 984 arg = strings.TrimSpace(arg) 985 arg = strings.TrimRight(arg, "*") 986 args[i] = arg 987 } 988 return 989 } 990 991 func joinSyntax(op string, args []string) string { 992 if len(args) == 0 { 993 return op 994 } 995 return op + " " + strings.Join(args, ", ") 996 }