golang.org/x/arch@v0.17.0/x86/x86map/map.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // X86map constructs the x86 opcode map from the instruction set CSV file. 6 // 7 // Usage: 8 // 9 // x86map [-fmt=format] x86.csv 10 // 11 // The known output formats are: 12 // 13 // text (default) - print decoding tree in text form 14 // decoder - print decoding tables for the x86asm package 15 // scanner - print scanning tables for x86scan package 16 package main 17 18 import ( 19 "bufio" 20 "bytes" 21 "encoding/csv" 22 "flag" 23 "fmt" 24 "io" 25 "log" 26 "os" 27 "sort" 28 "strconv" 29 "strings" 30 ) 31 32 var format = flag.String("fmt", "text", "output format: text, decoder") 33 34 var inputFile string 35 36 func usage() { 37 fmt.Fprintf(os.Stderr, "usage: x86map [-fmt=format] x86.csv\n") 38 os.Exit(2) 39 } 40 41 func main() { 42 log.SetFlags(0) 43 log.SetPrefix("x86map: ") 44 45 flag.Usage = usage 46 flag.Parse() 47 if flag.NArg() != 1 { 48 usage() 49 } 50 51 inputFile = flag.Arg(0) 52 53 var print func(*Prog) 54 switch *format { 55 default: 56 log.Fatalf("unknown output format %q", *format) 57 case "text": 58 print = printText 59 case "decoder": 60 print = printDecoder 61 case "scanner": 62 print = printScanner 63 } 64 65 p, err := readCSV(flag.Arg(0)) 66 if err != nil { 67 log.Fatal(err) 68 } 69 70 //p = mergeTail(p) 71 72 print(p) 73 } 74 75 // readCSV reads the CSV file and returns the corresponding Prog. 76 // It may print details about problems to standard error using the log package. 77 func readCSV(file string) (*Prog, error) { 78 // Read input. 79 // Skip leading blank and # comment lines. 80 f, err := os.Open(file) 81 if err != nil { 82 return nil, err 83 } 84 b := bufio.NewReader(f) 85 for { 86 c, err := b.ReadByte() 87 if err != nil { 88 break 89 } 90 if c == '\n' { 91 continue 92 } 93 if c == '#' { 94 b.ReadBytes('\n') 95 continue 96 } 97 b.UnreadByte() 98 break 99 } 100 table, err := csv.NewReader(b).ReadAll() 101 if err != nil { 102 return nil, fmt.Errorf("parsing %s: %v", file, err) 103 } 104 if len(table) == 0 { 105 return nil, fmt.Errorf("empty csv input") 106 } 107 if len(table[0]) < 6 { 108 return nil, fmt.Errorf("csv too narrow: need at least six columns") 109 } 110 111 p := &Prog{} 112 for _, row := range table { 113 add(p, row[0], row[1], row[2], row[3], row[4], row[5]) 114 } 115 116 check(p) 117 118 return p, nil 119 } 120 121 // A Prog is a single node in the tree representing the instruction format. 122 // Collectively the tree of nodes form a kind of program for decoding. 123 // Each Prog has a single action, identifying the kind of node it is, 124 // and then children to be executed according to the action. 125 // For example, the Prog with Action="decode" has children named for each 126 // possible next byte in the input, and those children are the decoding 127 // tree to execute for the corresponding bytes. 128 type Prog struct { 129 Path string 130 Action string 131 Child map[string]*Prog 132 PC int 133 TailID int 134 } 135 136 // keys returns the child keys in sorted order. 137 func (p *Prog) keys() []string { 138 var keys []string 139 for key := range p.Child { 140 keys = append(keys, key) 141 } 142 sort.Strings(keys) 143 return keys 144 } 145 146 // findChildLeaf finds a leaf node in the subtree rooted at p 147 // and returns that node's full path. The path is useful in error 148 // messages as an example of where a particular subtree is headed. 149 func (p *Prog) findChildLeaf() string { 150 for { 151 if len(p.Child) == 0 { 152 return p.Path 153 } 154 p = p.Child[p.keys()[0]] 155 } 156 } 157 158 // walk advances from p to apply the given action and key. 159 // If p has no action yet, the action is recorded as p.Action. 160 // Otherwise the action must match p's action: every node in the 161 // tree can have at most one action, although possibly with many 162 // alternative keys. 163 // If p already has an alternative with the given key, walk returns 164 // that preexisting subtree. Otherwise walk allocates a new Prog 165 // representing that subtree and returns that node. 166 func (p *Prog) walk(action, key, text, opcode string) *Prog { 167 if p.Action == "" { 168 p.Action = action 169 } else if p.Action != action { 170 log.Printf("%s; %s: conflicting paths %s and %s|%s %s\n", text, opcode, p.findChildLeaf(), p.Path, action, key) 171 return new(Prog) 172 } 173 q := p.Child[key] 174 if q == nil { 175 if p.Child == nil { 176 p.Child = make(map[string]*Prog) 177 } 178 q = new(Prog) 179 q.Path = fmt.Sprintf("%s|%s %s", p.Path, action, key) 180 p.Child[key] = q 181 } 182 return q 183 } 184 185 // add adds a single instructions to the tree rooted at root. 186 // The string arguments match the CSV: instruction mnemonic, 187 // opcode encoding, validity in 32- and 64-bit modes, CPUID 188 // feature set (ignored), and additional tags. 189 // 190 // In effect, add adds a new path through the tree leading to 191 // the given instruction, but it reuses as much of the existing 192 // tree structure as possible. For example if there have already 193 // been instructions added starting with 0F and this instruction 194 // also starts with 0F, that 0F subtree node is reused instead of 195 // allocating a parallel one. To maximize the reuse, the check action 196 // sequence along the path being added is the same for every instruction: 197 // encoding pieces needed to make a decision, 64-bit mode check, 198 // rex check, prefix check, address size check, data size check, 199 // register vs memory argument check. Once all those checks have 200 // been applied, the assumption is that we have uniquely identified 201 // an instruction, and at that point it is okay to diverge from the 202 // uniform pattern to set the opcode and read the specific arguments 203 // corresponding to the instruction at hand. 204 // 205 // The maximimal reuse of the existing tree means that the tree 206 // resulting from all adds have been done amounts to a decision tree. 207 // There is one detail that makes it non-deterministic: some checks 208 // do not matter to some instructions and those are recorded as "any" keys. 209 // If you are decoding and there is a key for the specific thing you are 210 // seeing as well as the "any" key, both must be considered. To avoid 211 // adding complexity to the decoder execution, the 'check' function 212 // removes this case by merging "any" trees into specific keys when 213 // present. 214 func add(root *Prog, text, opcode, valid32, valid64, cpuid, tags string) { 215 // These are not real instructions: they are either 216 // prefixes for other instructions, composite instructions 217 // built from multiple individual instructions, or alternate 218 // mnemonics of other encodings. 219 // Discard for disassembly, because we want a unique decoding. 220 if strings.Contains(tags, "pseudo") { 221 return 222 } 223 224 // Treat REX.W + opcode as being like having an "operand64" tag. 225 // The REX.W flag sets the operand size to 64 bits; in this way it is 226 // not much different than the 66 prefix that inverts 32 vs 16 bits. 227 if strings.Contains(opcode, "REX.W") { 228 if !strings.Contains(tags, "operand64") { 229 if tags != "" { 230 tags += "," 231 } 232 tags += "operand64" 233 } 234 } 235 236 // If there is more than one operand size given, we need to do 237 // a separate add for each size, because we need multiple 238 // keys to be added in the operand size branch, and the code makes 239 // a linear pass through the tree adding just one key to each node. 240 // We would need to do the same for any other possible repeated tag 241 // (for example, if an instruction could have multiple address sizes) 242 // but so far operand size is the only tag we have needed to repeat. 243 if strings.Count(tags, "operand") > 1 { 244 f := strings.Split(tags, ",") 245 var ops []string 246 w := 0 247 for _, tag := range f { 248 if strings.HasPrefix(tag, "operand") { 249 ops = append(ops, tag) 250 } else { 251 if strings.Contains(tag, "operand") { 252 log.Fatalf("unknown tag %q", tag) 253 } 254 f[w] = tag 255 w++ 256 } 257 } 258 f = f[:w] 259 for _, op := range ops { 260 add(root, text, opcode, valid32, valid64, cpuid, strings.Join(append(f, op), ",")) 261 } 262 return 263 } 264 265 p := root 266 walk := func(action, item string) { 267 p = p.walk(action, item, text, opcode) 268 } 269 270 // Ignore VEX instructions for now. 271 if strings.HasPrefix(opcode, "VEX") { 272 if !strings.HasPrefix(text, "VMOVNTDQ") && 273 !strings.HasPrefix(text, "VMOVDQA") && 274 !strings.HasPrefix(text, "VMOVDQU") && 275 !strings.HasPrefix(text, "VZEROUPPER") { 276 return 277 } 278 if !strings.HasPrefix(opcode, "VEX.256") && !strings.HasPrefix(text, "VZEROUPPER") { 279 return 280 } 281 if !strings.Contains(tags, "VEXC4") { 282 add(root, text, opcode, valid32, valid64, cpuid, tags+",VEXC4") 283 } 284 encoding := strings.Fields(opcode) 285 walk("decode", encoding[1]) 286 walk("is64", "any") 287 if strings.Contains(tags, "VEXC4") { 288 walk("prefix", "C4") 289 } else { 290 walk("prefix", "C5") 291 } 292 for _, pref := range strings.Split(encoding[0], ".") { 293 if isVexEncodablePrefix[pref] { 294 walk("prefix", pref) 295 } 296 } 297 } 298 299 var rex, prefix string 300 encoding := strings.Fields(opcode) 301 if len(encoding) > 0 && strings.HasPrefix(encoding[0], "REX") { 302 rex = encoding[0] 303 encoding = encoding[1:] 304 if len(encoding) > 0 && encoding[0] == "+" { 305 encoding = encoding[1:] 306 } 307 } 308 if len(encoding) > 0 && isPrefix[encoding[0]] { 309 prefix = encoding[0] 310 encoding = encoding[1:] 311 } 312 if rex == "" && len(encoding) > 0 && strings.HasPrefix(encoding[0], "REX") { 313 rex = encoding[0] 314 if rex == "REX" { 315 log.Printf("REX without REX.W: %s %s", text, opcode) 316 } 317 encoding = encoding[1:] 318 if len(encoding) > 0 && encoding[0] == "+" { 319 encoding = encoding[1:] 320 } 321 } 322 if len(encoding) > 0 && isPrefix[encoding[0]] { 323 log.Printf("%s %s: too many prefixes", text, opcode) 324 return 325 } 326 327 var haveModRM, havePlus bool 328 var usedReg string 329 for len(encoding) > 0 && (isHex(encoding[0]) || isSlashNum(encoding[0])) { 330 key := encoding[0] 331 if isSlashNum(key) { 332 if usedReg != "" { 333 log.Printf("%s %s: multiple modrm checks", text, opcode) 334 } 335 haveModRM = true 336 usedReg = key 337 } 338 if i := strings.Index(key, "+"); i >= 0 { 339 key = key[:i+1] 340 havePlus = true 341 } 342 walk("decode", key) 343 encoding = encoding[1:] 344 } 345 346 if valid32 != "V" { 347 walk("is64", "1") 348 } else if valid64 != "V" { 349 walk("is64", "0") 350 } else { 351 walk("is64", "any") 352 } 353 354 if prefix == "" { 355 prefix = "0" 356 } 357 walk("prefix", prefix) 358 359 if strings.Contains(tags, "address16") { 360 walk("addrsize", "16") 361 } else if strings.Contains(tags, "address32") { 362 walk("addrsize", "32") 363 } else if strings.Contains(tags, "address64") { 364 walk("addrsize", "64") 365 } else { 366 walk("addrsize", "any") 367 } 368 369 if strings.Contains(tags, "operand16") { 370 walk("datasize", "16") 371 } else if strings.Contains(tags, "operand32") { 372 walk("datasize", "32") 373 } else if strings.Contains(tags, "operand64") { 374 walk("datasize", "64") 375 } else { 376 walk("datasize", "any") 377 } 378 379 if len(encoding) > 0 && encoding[0] == "/r" { 380 haveModRM = true 381 } 382 if haveModRM { 383 if strings.Contains(tags, "modrm_regonly") { 384 walk("ismem", "0") 385 } else if strings.Contains(tags, "modrm_memonly") { 386 walk("ismem", "1") 387 } else { 388 walk("ismem", "any") 389 } 390 } 391 392 walk("op", strings.Fields(text)[0]) 393 394 if len(encoding) > 0 && strings.HasPrefix(encoding[0], "VEX") { 395 for _, field := range encoding[2:] { 396 walk("read", field) 397 } 398 } else { 399 for _, field := range encoding { 400 walk("read", field) 401 } 402 } 403 404 var usedRM string 405 for _, arg := range strings.Fields(text)[1:] { 406 arg = strings.TrimRight(arg, ",") 407 if usesReg[arg] && !haveModRM && !havePlus { 408 log.Printf("%s %s: no modrm field to use for %s", text, opcode, arg) 409 continue 410 } 411 if usesRM[arg] && !haveModRM { 412 log.Printf("%s %s: no modrm field to use for %s", text, opcode, arg) 413 continue 414 } 415 if usesReg[arg] { 416 if usedReg != "" { 417 log.Printf("%s %s: modrm reg field used by both %s and %s", text, opcode, usedReg, arg) 418 continue 419 } 420 usedReg = arg 421 } 422 if usesRM[arg] { 423 if usedRM != "" { 424 log.Printf("%s %s: modrm r/m field used by both %s and %s", text, opcode, usedRM, arg) 425 continue 426 } 427 usedRM = arg 428 } 429 walk("arg", arg) 430 } 431 432 walk("match", "!") 433 } 434 435 // allKeys records the list of all possible child keys for actions that support "any". 436 var allKeys = map[string][]string{ 437 "is64": {"0", "1"}, 438 "ismem": {"0", "1"}, 439 "addrsize": {"16", "32", "64"}, 440 "datasize": {"16", "32", "64"}, 441 } 442 443 // check checks that the program tree is well-formed. 444 // It also merges "any" keys into specific decoding keys in order to 445 // create an invariant that a particular check node either has a 446 // single "any" child - making it a no-op - or has no "any" children. 447 // See the discussion of "any" in the comment for add above. 448 func check(p *Prog) { 449 if p.Child["any"] != nil && len(p.Child) > 1 { 450 for _, key := range p.keys() { 451 if key != "any" { 452 mergeCopy(p.Child[key], p.Child["any"]) 453 } 454 } 455 if allKeys[p.Action] == nil { 456 log.Printf("%s: unknown key space for %s=any", p.Path, p.Action) 457 } 458 for _, key := range allKeys[p.Action] { 459 if p.Child[key] == nil { 460 p.Child[key] = p.Child["any"] 461 } 462 } 463 delete(p.Child, "any") 464 } 465 466 for _, q := range p.Child { 467 check(q) 468 } 469 470 switch p.Action { 471 case "op", "read", "arg": 472 if len(p.Child) > 1 { 473 log.Printf("%s: multiple children for action=%s: %v", p.Path, p.Action, p.keys()) 474 } 475 } 476 } 477 478 // mergeCopy merges a copy of the tree rooted at src into dst. 479 // It is only used once no more paths will be added to the tree, 480 // so it is safe to introduce cross-links that make the program 481 // a dag rather than a tree. 482 func mergeCopy(dst, src *Prog) { 483 //log.Printf("merge %s|%s and %s|%s\n", dst.Path, dst.Action, src.Path, src.Action) 484 if dst.Action != src.Action { 485 log.Printf("cannot merge %s|%s and %s|%s", dst.Path, dst.Action, src.Path, src.Action) 486 return 487 } 488 489 for _, key := range src.keys() { 490 if dst.Child[key] == nil { 491 // Create new subtree by creating cross-link. 492 dst.Child[key] = src.Child[key] 493 } else { 494 // Merge src subtree into existing dst subtree. 495 mergeCopy(dst.Child[key], src.Child[key]) 496 } 497 } 498 } 499 500 // set returns a map mapping each of the words in all to true. 501 func set(all string) map[string]bool { 502 m := map[string]bool{} 503 for _, f := range strings.Fields(all) { 504 m[f] = true 505 } 506 return m 507 } 508 509 // isPrefix records the x86 opcode prefix bytes. 510 var isPrefix = set(` 511 26 512 2E 513 36 514 3E 515 64 516 65 517 66 518 67 519 F0 520 F2 521 F3 522 `) 523 524 // usesReg records the argument codes that use the modrm reg field. 525 var usesReg = set(` 526 r8 527 r16 528 r32 529 r64 530 `) 531 532 // usesRM records the argument codes that use the modrm r/m field. 533 var usesRM = set(` 534 r/m8 535 r/m16 536 r/m32 537 r/m64 538 `) 539 540 var isVexEncodablePrefix = set(` 541 0F 542 0F38 543 0F3A 544 66 545 F3 546 F2 547 `) 548 549 // isHex reports whether the argument is a two digit hex number 550 // possibly followed by a +foo suffix. 551 func isHex(s string) bool { 552 if i := strings.Index(s, "+"); i >= 0 { 553 s = s[:i] 554 } 555 if len(s) != 2 { 556 return false 557 } 558 for i := 0; i < len(s); i++ { 559 c := s[i] 560 if '0' <= c && c <= '9' || 'A' <= c && c <= 'F' { 561 continue 562 } 563 return false 564 } 565 return true 566 } 567 568 // isSlashNum reports whether the argument is /n for some number n in [0,7]. 569 func isSlashNum(s string) bool { 570 return len(s) == 2 && s[0] == '/' && '0' <= s[1] && s[1] <= '7' 571 } 572 573 // mergeTail is supposed to merge common subtrees (program tails), 574 // reducing the size of the final program code. 575 // It identifies the subtrees using a bottom-up canonicalization. 576 // 577 // THIS CODE DOES NOT WORK. IT NEEDS TO BE DEBUGGED. 578 func mergeTail(p *Prog, emitted map[string]*Prog) *Prog { 579 if emitted == nil { 580 emitted = make(map[string]*Prog) 581 } 582 583 if p.Action == "match" { 584 return p 585 } 586 587 for _, key := range p.keys() { 588 p.Child[key] = mergeTail(p.Child[key], emitted) 589 } 590 591 op := "" 592 for _, key := range p.keys() { 593 q := p.Child[key] 594 if q.Action != "op" || len(q.Child) > 1 { 595 op = "" 596 break 597 } 598 qop := q.keys()[0] 599 if op == "" { 600 op = qop 601 } else if op != qop { 602 op = "" 603 break 604 } 605 } 606 607 if op != "" { 608 // Pull 'op x' up above the discriminator. 609 p1 := new(Prog) 610 *p1 = *p 611 for _, key := range p.keys() { 612 p1.Child[key] = p.Child[key].Child[op] 613 } 614 p.Action = "op" 615 p.Child = map[string]*Prog{op: p1} 616 } 617 618 var buf bytes.Buffer 619 fmt.Fprintf(&buf, "%s\n", p.Action) 620 for _, key := range p.keys() { 621 fmt.Fprintf(&buf, "%s %d\n", key, p.Child[key].TailID) 622 } 623 key := buf.String() 624 625 if q := emitted[key]; q != nil { 626 return q 627 } 628 emitted[key] = p 629 p.TailID = len(emitted) 630 return p 631 } 632 633 // printText prints the tree in textual form. 634 func printText(p *Prog) { 635 printTree(os.Stdout, p, 0, false) 636 } 637 638 var tabs = strings.Repeat(" ", 100) 639 640 func printTree(w io.Writer, p *Prog, depth int, compact bool) { 641 if compact && len(p.Child) == 1 { 642 fmt.Fprintf(w, "%.*s%s", 4*depth, tabs, p.Action) 643 for len(p.Child) == 1 { 644 key := p.keys()[0] 645 child := p.Child[key] 646 fmt.Fprintf(w, " %s %s", key, child.Action) 647 p = child 648 } 649 fmt.Fprintf(w, "\n") 650 } else { 651 fmt.Fprintf(w, "%.*s%s\n", 4*depth, tabs, p.Action) 652 } 653 for _, key := range p.keys() { 654 fmt.Fprintf(w, "%.*s%s\n", 4*(depth+1), tabs, key) 655 printTree(w, p.Child[key], depth+2, compact) 656 } 657 } 658 659 // printDecoder prints a Go array containing the decoder program. 660 // It runs in two passes, both of which traverse and could generate 661 // the entire program. The first pass records the PC for each Prog node, 662 // and the second pass emits the actual program, using the PCs as jump 663 // targets in the places where the program is a dag rather than a tree. 664 func printDecoder(p *Prog) { 665 opMap := map[string]bool{ 666 "PAUSE": true, 667 } 668 printDecoderPass(p, 1, false, opMap) 669 fmt.Printf("// Code generated by x86map -fmt=decoder %s DO NOT EDIT.\n", inputFile) 670 fmt.Printf("\n") 671 fmt.Printf("package x86asm\n\n") 672 fmt.Printf("var decoder = [...]uint16{\n\tuint16(xFail),\n") 673 printDecoderPass(p, 1, true, opMap) 674 fmt.Printf("}\n\n") 675 676 var ops []string 677 for op := range opMap { 678 ops = append(ops, op) 679 } 680 sort.Strings(ops) 681 682 fmt.Printf("const (\n") 683 fmt.Printf("\t_ Op = iota\n\n") 684 last := "" 685 for _, op := range ops { 686 fmt.Printf("\t%s\n", op) 687 last = op 688 } 689 fmt.Printf(")\n\n") 690 fmt.Printf("const maxOp = %s\n\n", last) 691 692 fmt.Printf("var opNames = [...]string{\n") 693 for _, op := range ops { 694 fmt.Printf("\t%s: \"%s\",\n", op, op) 695 } 696 fmt.Printf("}\n") 697 } 698 699 // printScanner prints the decoding table for a scanner. 700 // The scanner can identify instruction boundaries but does not do 701 // full decoding. It is meant to be lighter weight than the x86asm 702 // decoder tables. 703 func printScanner(p *Prog) { 704 walkScanTree(p, -1) 705 var out []uint16 706 out = append(out, 0) 707 emitScanFunc(p, &out) 708 fmt.Printf("var scanProg = []uint16{\n") 709 fmt.Printf("\t/*0*/ 0, // dead\n") 710 for i := 1; i < len(out); i++ { 711 fmt.Printf("\t/*%d*/ ", i) 712 switch out[i] { 713 default: 714 log.Fatalf("malformed program %#x", out[i]) 715 case scanMatch: 716 fmt.Printf("scanMatch,\n") 717 continue 718 case scanJump: 719 fmt.Printf("scanJump, %d,\n", out[i+1]) 720 i++ 721 continue 722 case scanSwitchByte: 723 fmt.Printf("scanSwitchByte,\n") 724 for j := 0; j < 256/8; j++ { 725 fmt.Printf("\t") 726 fmt.Printf("/* %#02x-%#02x */", j*8, j*8+7) 727 for k := 0; k < 8; k++ { 728 fmt.Printf(" %d,", out[i+1+j*8+k]) 729 } 730 fmt.Printf("\n") 731 } 732 i += 256 733 continue 734 case scanSwitchSlash: 735 fmt.Printf("scanSwitchSlash, %d,\n", out[i+1]) 736 n := int(out[i+1]) 737 for j := 0; j < n; j++ { 738 fmt.Printf("\t/* byte */ %#x, %d,\n", out[i+2+2*j], out[i+2+2*j+1]) 739 } 740 for j := 0; j < 8; j++ { 741 fmt.Printf("\t/* /%d */ %d,\n", j, out[i+2+2*n+j]) 742 } 743 i += 1 + 2*n + 8 744 continue 745 case scanSwitchPrefix: 746 fmt.Printf("scanSwitchPrefix, %d,\n", out[i+1]) 747 n := int(out[i+1]) 748 for j := 0; j < n; j++ { 749 fmt.Printf("\t/* prefix */ %#x, %d,\n", out[i+2+2*j], out[i+2+2*j+1]) 750 } 751 i += 1 + 2*n 752 continue 753 case scanSwitchIs64: 754 fmt.Printf("scanSwitchIs64, %d, %d\n", out[i+1], out[i+2]) 755 i += 2 756 continue 757 case scanSwitchDatasize: 758 fmt.Printf("scanSwitchDatasize, %d, %d, %d\n", out[i+1], out[i+2], out[i+3]) 759 i += 3 760 continue 761 case scanSwitchIsMem: 762 fmt.Printf("scanSwitchIsMem, %d, %d\n", out[i+1], out[i+2]) 763 i += 2 764 continue 765 case scanReadModRM: 766 fmt.Printf("scanReadModRM,\n") 767 continue 768 case scanReadIB: 769 fmt.Printf("scanReadIB,\n") 770 continue 771 case scanReadIW: 772 fmt.Printf("scanReadIW,\n") 773 continue 774 case scanReadIWD: 775 fmt.Printf("scanReadIWD,\n") 776 continue 777 case scanReadIWDO: 778 fmt.Printf("scanReadIWDO,\n") 779 continue 780 case scanReadCWD: 781 fmt.Printf("scanReadCWD,\n") 782 continue 783 case scanReadCB: 784 fmt.Printf("scanReadCB,\n") 785 continue 786 case scanReadCDP: 787 fmt.Printf("scanReadCDP,\n") 788 continue 789 case scanReadCM: 790 fmt.Printf("scanReadCM,\n") 791 continue 792 } 793 } 794 fmt.Printf("}\n") 795 } 796 797 func walkScanTree(p *Prog, is64 int) { 798 keys := p.keys() 799 for _, key := range keys { 800 if p.Action == "is64" { 801 switch key { 802 case "0": 803 is64 = 0 804 case "1": 805 is64 = 1 806 } 807 } 808 walkScanTree(p.Child[key], is64) 809 } 810 811 switch p.Action { 812 case "read", "match": 813 // keep 814 return 815 case "decode": 816 if len(keys) >= 8 && keys[0] == "/0" && keys[7] == "/7" && allSame(p, keys) { 817 p.Action = "read" 818 p.Child = map[string]*Prog{"/r": p.Child[keys[0]]} 819 return 820 } 821 case "op", "arg": 822 // drop 823 *p = *p.Child[keys[0]] 824 return 825 case "prefix": 826 if len(keys) >= 1 && keys[0] == "0" && allSame(p, keys) { 827 *p = *p.Child[keys[0]] 828 return 829 } 830 case "is64", "addrsize", "datasize", "ismem": 831 if len(keys) == 1 && keys[0] == "any" { 832 *p = *p.Child[keys[0]] 833 return 834 } 835 nkey := len(allKeys[p.Action]) 836 if p.Action == "addrsize" { 837 nkey = 2 838 } 839 if p.Action == "datasize" && is64 == 0 { 840 nkey = 2 841 } 842 if len(keys) == nkey && allSame(p, keys) { 843 *p = *p.Child[keys[0]] 844 return 845 } 846 } 847 848 switch p.Action { 849 case "datasize": 850 if len(keys) == 2 && is64 == 0 || len(keys) == 3 { 851 if treeText(p.Child["16"]) == "read iw match ! \n" && treeText(p.Child["32"]) == "read id match ! \n" && (len(keys) == 2 || treeText(p.Child["64"]) == "read id match ! \n") { 852 p.Action = "read" 853 p.Child = map[string]*Prog{"iwd/d": p.Child["16"].Child["iw"]} 854 return 855 } 856 if len(keys) == 3 && treeText(p.Child["16"]) == "read iw match ! \n" && treeText(p.Child["32"]) == "read id match ! \n" && treeText(p.Child["64"]) == "read io match ! \n" { 857 p.Action = "read" 858 p.Child = map[string]*Prog{"iwdo/d": p.Child["16"].Child["iw"]} 859 return 860 } 861 if treeText(p.Child["16"]) == "read /r read iw match ! \n" && treeText(p.Child["32"]) == "read /r read id match ! \n" && (len(keys) == 2 || treeText(p.Child["64"]) == "read /r read id match ! \n") { 862 p.Action = "read" 863 p.Child = map[string]*Prog{"/r": {Action: "read", Child: map[string]*Prog{"iwd/d": p.Child["16"].Child["/r"].Child["iw"]}}} 864 return 865 } 866 if treeText(p.Child["16"]) == "read cw match ! \n" && treeText(p.Child["32"]) == "read cd match ! \n" && (len(keys) == 2 || treeText(p.Child["64"]) == "read cd match ! \n") { 867 p.Action = "read" 868 p.Child = map[string]*Prog{"cwd/d": p.Child["16"].Child["cw"]} 869 return 870 } 871 if treeText(p.Child["16"]) == "read cd match ! \n" && treeText(p.Child["32"]) == "read cp match ! \n" && (len(keys) == 2 || treeText(p.Child["64"]) == "read cp match ! \n") { 872 p.Action = "read" 873 p.Child = map[string]*Prog{"cdp/d": p.Child["16"].Child["cd"]} 874 return 875 } 876 fmt.Printf("!! %q\n", treeText(p.Child["16"])) 877 } 878 879 case "is64": 880 if len(keys) == 2 && treeText(p.Child["0"]) == "read cwd/d match ! \n" && treeText(p.Child["1"]) == "read cd match ! \n" { 881 *p = *p.Child["0"] 882 return 883 } 884 if len(keys) == 2 && treeText(p.Child["0"]) == "read iwd/d match ! \n" && treeText(p.Child["1"]) == "read iwdo/d match ! \n" { 885 *p = *p.Child["1"] 886 return 887 } 888 } 889 890 /* 891 match := make(map[string][]string) 892 for _, key := range keys { 893 text := treeText(p.Child[key]) 894 match[text] = append(match[text], key) 895 } 896 child := make(map[string]*Prog) 897 for _, keys := range match { 898 child[strings.Join(keys, ",")] = p.Child[keys[0]] 899 } 900 p.Child = child 901 */ 902 } 903 904 func treeText(p *Prog) string { 905 var buf bytes.Buffer 906 printTree(&buf, p, 0, true) 907 return buf.String() 908 } 909 910 func allSame(p *Prog, keys []string) bool { 911 var tree string 912 for i, key := range keys { 913 if i == 0 { 914 tree = treeText(p.Child[key]) 915 continue 916 } 917 if treeText(p.Child[key]) != tree { 918 return false 919 } 920 } 921 return true 922 } 923 924 var scanCache = map[string]uint16{} 925 926 const ( 927 _ uint16 = iota 928 scanMatch 929 scanJump 930 scanSwitchByte 931 scanSwitchSlash 932 scanSwitchIs64 933 scanSwitchDatasize 934 scanSwitchIsMem 935 scanSwitchPrefix 936 scanReadModRM 937 scanReadIB 938 scanReadIW 939 scanReadIWD 940 scanReadIWDO 941 scanReadCWD 942 scanReadCB 943 scanReadCDP 944 scanReadCM 945 ) 946 947 func decodeKeyPlus(key string) (val, n int) { 948 n = 1 949 if strings.HasSuffix(key, "+") { 950 n = 8 951 key = key[:len(key)-1] 952 } 953 v, err := strconv.ParseUint(key, 16, 8) 954 if err != nil { 955 log.Fatalf("unexpected decode key %q", key) 956 } 957 return int(v), n 958 } 959 960 func decodeKey(key string) int { 961 val, n := decodeKeyPlus(key) 962 if n != 1 { 963 log.Panicf("unexpected decode key+ %q", key) 964 } 965 return val 966 } 967 968 func emitScanFunc(p *Prog, out *[]uint16) uint16 { 969 keys := p.keys() 970 text := treeText(p) 971 if off, ok := scanCache[text]; ok { 972 return off 973 } 974 start := uint16(len(*out)) 975 scanCache[text] = start 976 switch p.Action { 977 case "decode": 978 if keys[0][0] != '/' { 979 *out = append(*out, scanSwitchByte) 980 off := len(*out) 981 for i := 0; i < 256; i++ { 982 *out = append(*out, 0) 983 } 984 for _, key := range keys { 985 val, n := decodeKeyPlus(key) 986 dst := emitScanFunc(p.Child[key], out) 987 for j := 0; j < n; j++ { 988 (*out)[off+val+j] = dst 989 } 990 } 991 return start 992 } 993 994 n := len(keys) 995 for n > 0 && keys[n-1][0] != '/' { 996 n-- 997 } 998 total := 0 999 for i := n; i < len(keys); i++ { 1000 key := keys[i] 1001 _, n := decodeKeyPlus(key) 1002 total += n 1003 } 1004 *out = append(*out, scanSwitchSlash, uint16(total)) 1005 off := len(*out) 1006 for i := 0; i < total; i++ { 1007 *out = append(*out, 0, 0) 1008 } 1009 for i := 0; i < 8; i++ { 1010 *out = append(*out, 0) 1011 } 1012 for i := n; i < len(keys); i++ { 1013 key := keys[i] 1014 val, valn := decodeKeyPlus(key) 1015 targ := emitScanFunc(p.Child[key], out) 1016 for j := 0; j < valn; j++ { 1017 (*out)[off] = uint16(val + j) 1018 off++ 1019 (*out)[off] = targ 1020 off++ 1021 } 1022 } 1023 for i := 0; i < n; i++ { 1024 key := keys[i] 1025 if len(key) != 2 || key[0] != '/' || key[1] < '0' || '8' <= key[1] { 1026 log.Fatalf("unexpected decode key %q", key) 1027 } 1028 (*out)[off+int(key[1]-'0')] = emitScanFunc(p.Child[key], out) 1029 } 1030 return start 1031 1032 case "read": 1033 switch keys[0] { 1034 default: 1035 log.Fatalf("unexpected read %q", keys[0]) 1036 case "/r": 1037 *out = append(*out, scanReadModRM) 1038 case "ib": 1039 *out = append(*out, scanReadIB) 1040 case "iw": 1041 *out = append(*out, scanReadIW) 1042 case "cb": 1043 *out = append(*out, scanReadCB) 1044 case "cm": 1045 *out = append(*out, scanReadCM) 1046 case "iwd/d": 1047 *out = append(*out, scanReadIWD) 1048 case "iwdo/d": 1049 *out = append(*out, scanReadIWDO) 1050 case "cwd/d": 1051 *out = append(*out, scanReadCWD) 1052 case "cdp/d": 1053 *out = append(*out, scanReadCDP) 1054 } 1055 next := p.Child[keys[0]] 1056 if next.Action == "match" { 1057 *out = append(*out, scanMatch) 1058 } else { 1059 *out = append(*out, scanJump, 0) 1060 off := len(*out) 1061 (*out)[off-1] = emitScanFunc(next, out) 1062 } 1063 return start 1064 1065 case "match": 1066 *out = append(*out, scanMatch) 1067 return start 1068 1069 case "is64": 1070 *out = append(*out, scanSwitchIs64, 0, 0) 1071 if next := p.Child["0"]; next != nil { 1072 (*out)[start+1] = emitScanFunc(next, out) 1073 } 1074 if next := p.Child["1"]; next != nil { 1075 (*out)[start+2] = emitScanFunc(next, out) 1076 } 1077 return start 1078 1079 case "ismem": 1080 *out = append(*out, scanSwitchIsMem, 0, 0) 1081 if next := p.Child["0"]; next != nil { 1082 (*out)[start+1] = emitScanFunc(next, out) 1083 } 1084 if next := p.Child["1"]; next != nil { 1085 (*out)[start+2] = emitScanFunc(next, out) 1086 } 1087 return start 1088 1089 case "datasize": 1090 *out = append(*out, scanSwitchDatasize, 0, 0, 0) 1091 if next := p.Child["16"]; next != nil { 1092 (*out)[start+1] = emitScanFunc(next, out) 1093 } 1094 if next := p.Child["32"]; next != nil { 1095 (*out)[start+2] = emitScanFunc(next, out) 1096 } 1097 if next := p.Child["64"]; next != nil { 1098 (*out)[start+3] = emitScanFunc(next, out) 1099 } 1100 return start 1101 case "prefix": 1102 *out = append(*out, scanSwitchPrefix, uint16(len(keys))) 1103 n := len(keys) 1104 for i := 0; i < n; i++ { 1105 *out = append(*out, uint16(decodeKey(keys[i])), 0) 1106 } 1107 for i := 0; i < n; i++ { 1108 (*out)[int(start)+2+2*i+1] = emitScanFunc(p.Child[keys[i]], out) 1109 } 1110 return start 1111 1112 } 1113 1114 log.Fatalf("unexpected action %q", p.Action) 1115 return start 1116 } 1117 1118 // printDecoderPass prints the decoding table program for p, 1119 // assuming that we are emitting code at the given program counter. 1120 // It returns the new current program counter, that is, the program 1121 // counter after the printed instructions. 1122 // If printing==false, printDecoderPass does not print the actual 1123 // code words but still does the PC computation. 1124 func printDecoderPass(p *Prog, pc int, printing bool, ops map[string]bool) int { 1125 // Record PC on first pass. 1126 if p.PC == 0 { 1127 p.PC = pc 1128 } 1129 1130 // If PC doesn't match, we've already printed this code 1131 // because it was reached some other way. Jump to that copy. 1132 if p.PC != pc { 1133 if printing { 1134 fmt.Printf("/*%d*/\tuint16(xJump), %d,\n", pc, p.PC) 1135 } 1136 return pc + 2 1137 } 1138 1139 // Otherwise, emit the code for the given action. 1140 1141 // At the bottom, if next is non-nil, emit code for next. 1142 // Then emit the code for the children named by the keys. 1143 keys := p.keys() 1144 var next *Prog 1145 1146 switch p.Action { 1147 default: 1148 log.Printf("printDecoderPass: unknown action %q: %s", p.Action, p.Path) 1149 1150 case "decode": 1151 // Decode hex bytes or /n modrm op checks. 1152 // Hex bytes take priority, so do them first. 1153 // Hex bytes of the form "40+" indicate an 1154 // 8 entry-wide swath of codes: 40, 41, ..., 47. 1155 hex := 0 1156 slash := 0 1157 for _, key := range keys { 1158 if isHex(key) { 1159 if strings.Contains(key, "+") { 1160 hex += 8 1161 } else { 1162 hex++ 1163 } 1164 } 1165 if isSlashNum(key) { 1166 slash++ 1167 } 1168 } 1169 if hex > 0 { 1170 // TODO(rsc): Introduce an xCondByte256 that has 256 child entries 1171 // and no explicit keys. That will cut the size in half for large 1172 // tables. 1173 if printing { 1174 fmt.Printf("/*%d*/\tuint16(xCondByte), %d,\n", pc, hex) 1175 for _, key := range keys { 1176 if !isHex(key) { 1177 continue 1178 } 1179 if i := strings.Index(key, "+"); i >= 0 { 1180 nextPC := p.Child[key].PC 1181 n, _ := strconv.ParseUint(key[:i], 16, 0) 1182 for j := 0; j < 8; j++ { 1183 fmt.Printf("\t%#02x, %d,\n", int(n)+j, nextPC) 1184 } 1185 continue 1186 } 1187 fmt.Printf("\t0x%s, %d,\n", key, p.Child[key].PC) 1188 } 1189 } 1190 pc += 2 + 2*hex 1191 1192 // All other condition checks fail the decoding if nothing is found, 1193 // but this one falls through so that we can then do /n checks. 1194 // If there are no upcoming /n checks, insert an explicit failure. 1195 if slash == 0 { 1196 if printing { 1197 fmt.Printf("\tuint16(xFail),\n") 1198 } 1199 pc++ 1200 } 1201 } 1202 if slash > 0 { 1203 if printing { 1204 fmt.Printf("/*%d*/\tuint16(xCondSlashR),\n", pc) 1205 for i := 0; i < 8; i++ { 1206 fmt.Printf("\t%d, // %d\n", p.childPC(fmt.Sprintf("/%d", i)), i) 1207 } 1208 } 1209 pc += 1 + 8 1210 } 1211 1212 case "is64": 1213 // Decode based on processor mode: 64-bit or not. 1214 if len(keys) == 1 && keys[0] == "any" { 1215 next = p.Child["any"] 1216 break 1217 } 1218 if p.Child["any"] != nil { 1219 log.Printf("%s: mixed is64 keys: %v", p.Path, keys) 1220 } 1221 1222 if printing { 1223 fmt.Printf("/*%d*/\tuint16(xCondIs64), %d, %d,\n", pc, p.childPC("0"), p.childPC("1")) 1224 } 1225 pc += 3 1226 1227 case "prefix": 1228 // Decode based on presence of prefix. 1229 // The "0" prefix means "none of the above", so if there's 1230 // nothing else, it's the same as "any". 1231 if len(keys) == 1 && (keys[0] == "any" || keys[0] == "0") { 1232 next = p.Child["any"] 1233 break 1234 } 1235 if p.Child["any"] != nil { 1236 log.Printf("%s: mixed prefix keys: %v", p.Path, keys) 1237 } 1238 1239 // Emit the prefixes in reverse sorted order, so that F3 and F2 are 1240 // considered before 66, and the fallback 0 is considered last. 1241 if printing { 1242 fmt.Printf("/*%d*/\tuint16(xCondPrefix), %d,\n", pc, len(keys)) 1243 for i := len(keys) - 1; i >= 0; i-- { 1244 key := keys[i] 1245 nextPC := p.Child[key].PC 1246 fmt.Printf("\t0x%s, %d,\n", key, nextPC) 1247 } 1248 } 1249 pc += 2 + 2*len(keys) 1250 1251 case "addrsize": 1252 // Decode based on address size attribute. 1253 if len(keys) == 1 && keys[0] == "any" { 1254 next = p.Child["any"] 1255 break 1256 } 1257 if p.Child["any"] != nil { 1258 log.Printf("%s: mixed addrsize keys: %v", p.Path, keys) 1259 } 1260 1261 if printing { 1262 fmt.Printf("/*%d*/\tuint16(xCondAddrSize), %d, %d, %d,\n", pc, p.childPC("16"), p.childPC("32"), p.childPC("64")) 1263 } 1264 pc += 4 1265 1266 case "datasize": 1267 // Decode based on operand size attribute. 1268 if len(keys) == 1 && keys[0] == "any" { 1269 next = p.Child["any"] 1270 break 1271 } 1272 if p.Child["any"] != nil { 1273 log.Printf("%s: mixed datasize keys: %v", p.Path, keys) 1274 } 1275 1276 if printing { 1277 fmt.Printf("/*%d*/\tuint16(xCondDataSize), %d, %d, %d,\n", pc, p.childPC("16"), p.childPC("32"), p.childPC("64")) 1278 } 1279 pc += 4 1280 1281 case "ismem": 1282 // Decode based on modrm form: memory or register reference. 1283 if len(keys) == 1 && keys[0] == "any" { 1284 next = p.Child["any"] 1285 break 1286 } 1287 if p.Child["any"] != nil { 1288 log.Printf("%s: mixed ismem keys: %v", p.Path, keys) 1289 } 1290 1291 if printing { 1292 fmt.Printf("/*%d*/\tuint16(xCondIsMem), %d, %d,\n", pc, p.childPC("0"), p.childPC("1")) 1293 } 1294 pc += 3 1295 1296 case "op": 1297 // Set opcode. 1298 ops[keys[0]] = true 1299 if printing { 1300 fmt.Printf("/*%d*/\tuint16(xSetOp), uint16(%s),\n", pc, keys[0]) 1301 } 1302 next = p.Child[keys[0]] 1303 pc += 2 1304 1305 case "read": 1306 // Read argument bytes. 1307 if printing { 1308 fmt.Printf("/*%d*/\tuint16(xRead%s),\n", pc, xOp(keys[0])) 1309 } 1310 next = p.Child[keys[0]] 1311 pc++ 1312 1313 case "arg": 1314 // Record instruction argument (interpret bytes loaded with read). 1315 if printing { 1316 fmt.Printf("/*%d*/\tuint16(xArg%s),\n", pc, xOp(keys[0])) 1317 } 1318 next = p.Child[keys[0]] 1319 pc++ 1320 1321 case "match": 1322 // Finish match. 1323 if printing { 1324 fmt.Printf("/*%d*/\tuint16(xMatch),\n", pc) 1325 } 1326 pc++ 1327 return pc 1328 } 1329 1330 if next != nil { 1331 pc = printDecoderPass(next, pc, printing, ops) 1332 } 1333 1334 for _, key := range keys { 1335 q := p.Child[key] 1336 if q.PC == 0 || q.PC == pc { 1337 pc = printDecoderPass(q, pc, printing, ops) 1338 } 1339 } 1340 1341 return pc 1342 } 1343 1344 // childPC returns the PC for the given child key. 1345 // If the key is not present, it returns PC 0, 1346 // which is known to be an xFail instruction. 1347 func (p *Prog) childPC(key string) int { 1348 q := p.Child[key] 1349 if q == nil { 1350 return 0 1351 } 1352 return q.PC 1353 } 1354 1355 // isLower reports whether c is an ASCII lower case letter. 1356 func isLower(c byte) bool { 1357 return 'a' <= c && c <= 'z' 1358 } 1359 1360 // isLetterDigit reports whether c is an ASCII letter or digit. 1361 func isLetterDigit(c byte) bool { 1362 return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' 1363 } 1364 1365 // xOp converts arg, an Intel manual shorthand, into a decoder opcode suffix. 1366 // The standard form is LeadingUpperLetter with a few punctuation symbols 1367 // turned into purely lower case words: M16and32, M16colon32, CR0dashCR7. 1368 func xOp(arg string) string { 1369 var buf []byte 1370 for i := 0; i < len(arg); i++ { 1371 c := arg[i] 1372 if isLower(c) && (i == 0 || !isLetterDigit(arg[i-1])) { 1373 c -= 'a' - 'A' 1374 } 1375 buf = append(buf, c) 1376 } 1377 return argFix.Replace(string(buf)) 1378 } 1379 1380 var argFix = strings.NewReplacer( 1381 "/R", "SlashR", 1382 "/", "", 1383 "<", "", 1384 ">", "", 1385 "+", "plus", 1386 "-", "dash", 1387 ":", "colon", 1388 "&", "and", 1389 "ST(0)", "ST", 1390 "ST(I)", "STi", 1391 "ST(I)+Op", "STi", 1392 )