golang.org/x/arch@v0.17.0/x86/x86spec/parse.go

golang.org/x/arch@v0.17.0/x86/x86spec/parse.go (about)

     1  // Copyright 2016 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"log"
    13  	"math"
    14  	"os"
    15  	"reflect"
    16  	"regexp"
    17  	"sort"
    18  	"strconv"
    19  	"strings"
    20  	"time"
    21  
    22  	"rsc.io/pdf"
    23  )
    24  
    25  // listing holds information about one or more parsed manual pages
    26  // concerning a single instruction listing.
    27  type listing struct {
    28  	pageNum   int
    29  	name      string       // instruction heading
    30  	mtables   [][][]string // mnemonic tables (at most one per page)
    31  	enctables [][][]string // encoding tables (at most one per page)
    32  	compat    string
    33  }
    34  
    35  type logReaderAt struct {
    36  	f io.ReaderAt
    37  }
    38  
    39  func (l *logReaderAt) ReadAt(x []byte, off int64) (int, error) {
    40  	log.Printf("read %d @ %d", len(x), off)
    41  	return l.f.ReadAt(x, off)
    42  }
    43  
    44  const (
    45  	cacheBlockSize = 64 * 1024
    46  	numCacheBlock  = 16
    47  )
    48  
    49  type cachedReaderAt struct {
    50  	r     io.ReaderAt
    51  	cache *cacheBlock
    52  }
    53  
    54  type cacheBlock struct {
    55  	next   *cacheBlock
    56  	buf    []byte
    57  	offset int64
    58  	err    error
    59  }
    60  
    61  func newCachedReaderAt(r io.ReaderAt) *cachedReaderAt {
    62  	c := &cachedReaderAt{
    63  		r: r,
    64  	}
    65  	for i := 0; i < numCacheBlock; i++ {
    66  		c.cache = &cacheBlock{next: c.cache}
    67  	}
    68  	return c
    69  }
    70  
    71  func (c *cachedReaderAt) ReadAt(p []byte, offset int64) (n int, err error) {
    72  	// Assume large reads indicate a caller that doesn't need caching.
    73  	if len(p) >= cacheBlockSize {
    74  		return c.r.ReadAt(p, offset)
    75  	}
    76  
    77  	for n < len(p) {
    78  		o := offset + int64(n)
    79  		f := o & (cacheBlockSize - 1)
    80  		b := c.readBlock(o - f)
    81  		n += copy(p[n:], b.buf[f:])
    82  		if n < len(p) && b.err != nil {
    83  			return n, b.err
    84  		}
    85  	}
    86  	return n, nil
    87  }
    88  
    89  var errShortRead = errors.New("short read")
    90  
    91  func (c *cachedReaderAt) readBlock(offset int64) *cacheBlock {
    92  	if offset&(cacheBlockSize-1) != 0 {
    93  		panic("misuse of cachedReaderAt.readBlock")
    94  	}
    95  
    96  	// Look in cache.
    97  	var b, prev *cacheBlock
    98  	for b = c.cache; ; prev, b = b, b.next {
    99  		if b.buf != nil && b.offset == offset {
   100  			// Move to front.
   101  			if prev != nil {
   102  				prev.next = b.next
   103  				b.next = c.cache
   104  				c.cache = b
   105  			}
   106  			return b
   107  		}
   108  		if b.next == nil {
   109  			break
   110  		}
   111  	}
   112  
   113  	// Otherwise b is LRU block in cache, prev points at b.
   114  	if b.buf == nil {
   115  		b.buf = make([]byte, cacheBlockSize)
   116  	}
   117  	b.offset = offset
   118  	n, err := c.r.ReadAt(b.buf[:cacheBlockSize], offset)
   119  	b.buf = b.buf[:n]
   120  	b.err = err
   121  	if n > 0 {
   122  		// Move to front.
   123  		prev.next = nil
   124  		b.next = c.cache
   125  		c.cache = b
   126  	}
   127  	return b
   128  }
   129  
   130  func pdfOpen(name string) (*pdf.Reader, error) {
   131  	f, err := os.Open(name)
   132  	if err != nil {
   133  		return nil, err
   134  	}
   135  	fi, err := f.Stat()
   136  	if err != nil {
   137  		f.Close()
   138  		return nil, err
   139  	}
   140  	return pdf.NewReader(newCachedReaderAt(f), fi.Size())
   141  }
   142  
   143  func parse() []*instruction {
   144  	var insts []*instruction
   145  
   146  	f, err := pdfOpen(*flagFile)
   147  	if err != nil {
   148  		log.Fatal(err)
   149  	}
   150  
   151  	// Find instruction set reference in outline, to build instruction list.
   152  	instList := instHeadings(f.Outline())
   153  	if len(instList) < 200 {
   154  		log.Fatalf("only found %d instructions in table of contents", len(instList))
   155  	}
   156  
   157  	// Scan document looking for instructions.
   158  	// Must find exactly the ones in the outline.
   159  	n := f.NumPage()
   160  	var current *listing
   161  	finishInstruction := func() {
   162  		if current == nil {
   163  			return
   164  		}
   165  		if len(current.mtables) == 0 || len(current.mtables[0]) <= 1 {
   166  			fmt.Fprintf(os.Stderr, "p.%d: no mnemonics for instruction %q\n", current.pageNum, current.name)
   167  		}
   168  		processListing(current, &insts)
   169  		current = nil
   170  	}
   171  
   172  	for pageNum := 1; pageNum <= n; pageNum++ {
   173  		if onlySomePages && !isDebugPage(pageNum) {
   174  			continue
   175  		}
   176  		p := f.Page(pageNum)
   177  		parsed := parsePage(p, pageNum)
   178  		if parsed.name != "" {
   179  			finishInstruction()
   180  			for j, headline := range instList {
   181  				if parsed.name == headline {
   182  					instList[j] = ""
   183  					current = parsed
   184  					break
   185  				}
   186  			}
   187  			if current == nil {
   188  				fmt.Fprintf(os.Stderr, "p.%d: unexpected instruction %q\n", pageNum, parsed.name)
   189  			}
   190  			continue
   191  		}
   192  		if current != nil {
   193  			merge(current, parsed)
   194  			continue
   195  		}
   196  		if parsed.mtables != nil {
   197  			fmt.Fprintf(os.Stderr, "p.%d: unexpected mnemonic table\n", pageNum)
   198  		}
   199  		if parsed.enctables != nil {
   200  			fmt.Fprintf(os.Stderr, "p.%d: unexpected encoding table\n", pageNum)
   201  		}
   202  		if parsed.compat != "" {
   203  			fmt.Fprintf(os.Stderr, "p.%d: unexpected compatibility statement\n", pageNum)
   204  		}
   205  	}
   206  	finishInstruction()
   207  
   208  	if !onlySomePages {
   209  		for _, headline := range instList {
   210  			if headline != "" {
   211  				fmt.Fprintf(os.Stderr, "missing instruction %q\n", headline)
   212  			}
   213  		}
   214  	}
   215  
   216  	return insts
   217  }
   218  
   219  // isDebugPage reports whether the -debugpage flag mentions page n.
   220  // The argument is a comma-separated list of pages.
   221  // Maybe some day it will support ranges.
   222  func isDebugPage(n int) bool {
   223  	s := *flagDebugPage
   224  	var k int
   225  	for i := 0; ; i++ {
   226  		if i == len(s) || s[i] == ',' {
   227  			if n == k {
   228  				return true
   229  			}
   230  			k = 0
   231  		}
   232  		if i == len(s) {
   233  			break
   234  		}
   235  		if '0' <= s[i] && s[i] <= '9' {
   236  			k = k*10 + int(s[i]) - '0'
   237  		}
   238  	}
   239  	return false
   240  }
   241  
   242  // merge merges the content of y into the running collection in x.
   243  func merge(x, y *listing) {
   244  	if y.name != "" {
   245  		fmt.Fprintf(os.Stderr, "p.%d: merging page incorrectly\n", y.pageNum)
   246  		return
   247  	}
   248  
   249  	x.mtables = append(x.mtables, y.mtables...)
   250  	x.enctables = append(x.enctables, y.enctables...)
   251  	x.compat += y.compat
   252  }
   253  
   254  // instHeadings returns the list of instruction headings from the table of contents.
   255  // When we parse the pages we expect to find every one of these.
   256  func instHeadings(outline pdf.Outline) []string {
   257  	return appendInstHeadings(outline, nil)
   258  }
   259  
   260  var instRE = regexp.MustCompile(`\d Instructions \([A-Z]-[A-Z]\)|VMX Instructions|Instruction SET Reference|SHA Extensions Reference`)
   261  
   262  // The headings are inconsistent about dash and superscript usage. Normalize.
   263  var fixDash = strings.NewReplacer(
   264  	"Compute 2 –1", "Compute 2^x-1",
   265  	"Compute 2x-1", "Compute 2^x-1",
   266  	"Compute 2x–1", "Compute 2^x-1",
   267  	"/ FUCOMI", "/FUCOMI",
   268  	"Compute y ∗ log x", "Compute y * log₂x",
   269  	"Compute y * log2x", "Compute y * log₂x",
   270  	"Compute y * log2(x +1)", "Compute y * log₂(x+1)",
   271  	"Compute y ∗ log (x +1)", "Compute y * log₂(x+1)",
   272  	" — ", "-",
   273  	"— ", "-",
   274  	" —", "-",
   275  	"—", "-",
   276  	" – ", "-",
   277  	" –", "-",
   278  	"– ", "-",
   279  	"–", "-",
   280  	" - ", "-",
   281  	"- ", "-",
   282  	" -", "-",
   283  )
   284  
   285  func appendInstHeadings(outline pdf.Outline, list []string) []string {
   286  	if instRE.MatchString(outline.Title) {
   287  		for _, child := range outline.Child {
   288  			list = append(list, fixDash.Replace(child.Title))
   289  		}
   290  	}
   291  	for _, child := range outline.Child {
   292  		list = appendInstHeadings(child, list)
   293  	}
   294  	return list
   295  }
   296  
   297  var dateRE = regexp.MustCompile(`\b(January|February|March|April|May|June|July|August|September|October|November|December) ((19|20)[0-9][0-9])\b`)
   298  
   299  // parsePage parses a single PDF page and returns the content it found.
   300  func parsePage(p pdf.Page, pageNum int) *listing {
   301  	if debugging {
   302  		fmt.Fprintf(os.Stderr, "DEBUG: parsing page %d\n", pageNum)
   303  	}
   304  
   305  	parsed := new(listing)
   306  	parsed.pageNum = pageNum
   307  
   308  	content := p.Content()
   309  
   310  	for i, t := range content.Text {
   311  		if match(t, "Symbol", 11, "≠") {
   312  			t.Font = "NeoSansIntel"
   313  			t.FontSize = 9
   314  			content.Text[i] = t
   315  		}
   316  		if t.S == "*" || t.S == "**" || t.S == "***" || t.S == "," && t.Font == "Arial" && t.FontSize < 9 || t.S == "1" && t.Font == "Arial" {
   317  			t.Font = "NeoSansIntel"
   318  			t.FontSize = 9
   319  			if i+1 < len(content.Text) {
   320  				t.Y = content.Text[i+1].Y
   321  			}
   322  			content.Text[i] = t
   323  		}
   324  	}
   325  
   326  	text := findWords(content.Text)
   327  
   328  	for i, t := range text {
   329  		if match(t, "NeoSansIntel", 8, ".WIG") || match(t, "NeoSansIntel", 8, "AVX2") {
   330  			t.FontSize = 9
   331  			text[i] = t
   332  		}
   333  		if t.Font == "NeoSansIntel-Medium" {
   334  			t.Font = "NeoSansIntelMedium"
   335  			text[i] = t
   336  		}
   337  		if t.Font == "NeoSansIntel-Italic" {
   338  			t.Font = "NeoSansIntel,Italic"
   339  			text[i] = t
   340  		}
   341  	}
   342  
   343  	if debugging {
   344  		for _, t := range text {
   345  			fmt.Println(t)
   346  		}
   347  	}
   348  
   349  	if pageNum == 1 {
   350  		var buf bytes.Buffer
   351  		for _, t := range text {
   352  			buf.WriteString(t.S + "\n")
   353  		}
   354  		all := buf.String()
   355  		m := regexp.MustCompile(`Order Number: ([\w-\-]+)`).FindStringSubmatch(all)
   356  		num := "???"
   357  		if m != nil {
   358  			num = m[1]
   359  		}
   360  		date := dateRE.FindString(all)
   361  		if date == "" {
   362  			date = "???"
   363  		}
   364  
   365  		fmt.Printf("# x86 instruction set description version %s, %s\n",
   366  			specFormatVersion, time.Now().Format("2006-01-02"))
   367  		fmt.Printf("# Based on Intel Instruction Set Reference #%s, %s.\n", num, date)
   368  		fmt.Printf("# https://golang.org/x/arch/x86/x86spec\n")
   369  	}
   370  
   371  	// Remove text we should ignore.
   372  	out := text[:0]
   373  	for _, t := range text {
   374  		if shouldIgnore(t) {
   375  			continue
   376  		}
   377  		out = append(out, t)
   378  	}
   379  	text = out
   380  
   381  	// Page header must say instruction set reference.
   382  	if len(text) == 0 {
   383  		return parsed
   384  	}
   385  	if (!match(text[0], "NeoSansIntel", 9, "INSTRUCTION") || !match(text[0], "NeoSansIntel", 9, "REFERENCE")) &&
   386  		!match(text[0], "NeoSansIntel", 9, "EXTENSIONS") {
   387  		return parsed
   388  	}
   389  	text = text[1:]
   390  
   391  	enctable := findEncodingTable(text)
   392  	if enctable != nil {
   393  		parsed.enctables = append(parsed.enctables, enctable)
   394  	}
   395  
   396  	parsed.compat = findCompat(text)
   397  
   398  	// Narrow scope for finding mnemonic table.
   399  	// Must be last, since it trims text.
   400  	// Next line is headline. Can wrap to multiple lines.
   401  	if len(text) == 0 || !match(text[0], "NeoSansIntelMedium", 12, "") || !isInstHeadline(text[0].S) {
   402  		if debugging {
   403  			fmt.Fprintf(os.Stderr, "non-inst-headline: %v\n", text[0])
   404  		}
   405  	} else {
   406  		parsed.name = text[0].S
   407  		text = text[1:]
   408  		for len(text) > 0 && match(text[0], "NeoSansIntelMedium", 12, "") {
   409  			parsed.name += " " + text[0].S
   410  			text = text[1:]
   411  		}
   412  		parsed.name = fixDash.Replace(parsed.name)
   413  	}
   414  
   415  	// Table follows; heading is NeoSansIntelMedium and rows are NeoSansIntel.
   416  	i := 0
   417  	for i < len(text) && match(text[i], "NeoSansIntelMedium", 9, "") {
   418  		i++
   419  	}
   420  	for i < len(text) && match(text[i], "NeoSansIntel", 9, "") && text[i].S != "NOTES:" {
   421  		i++
   422  	}
   423  
   424  	mtable := findMnemonicTable(text[:i])
   425  	if mtable != nil {
   426  		parsed.mtables = append(parsed.mtables, mtable)
   427  	}
   428  
   429  	return parsed
   430  }
   431  
   432  func match(t pdf.Text, font string, size float64, substr string) bool {
   433  	return t.Font == font && math.Abs(t.FontSize-size) < 0.1 && strings.Contains(t.S, substr)
   434  }
   435  
   436  func shouldIgnore(t pdf.Text) bool {
   437  	// Ignore footnote stars, which are in Arial.
   438  	// Also, the page describing MOVS has a tiny 2pt Arial backslash.
   439  	if (t.S == "*" || t.S == "\\") && strings.HasPrefix(t.Font, "Arial") {
   440  		return true
   441  	}
   442  
   443  	// Ignore superscript numbers, superscript ST(0), and superscript x.
   444  	if len(t.S) == 1 && '1' <= t.S[0] && t.S[0] <= '9' || t.S == "ST(0)" || t.S == "x" {
   445  		if match(t, "NeoSansIntel", 7.2, "") || match(t, "NeoSansIntel", 5.6, "") || match(t, "NeoSansIntelMedium", 8, "") || match(t, "NeoSansIntelMedium", 9.6, "") {
   446  			return true
   447  		}
   448  	}
   449  
   450  	return false
   451  }
   452  
   453  func isInstHeadline(s string) bool {
   454  	return strings.Contains(s, "—") ||
   455  		strings.Contains(s, " - ") ||
   456  		strings.Contains(s, "PTEST- Logical Compare")
   457  }
   458  
   459  func findWords(chars []pdf.Text) (words []pdf.Text) {
   460  	// Sort by Y coordinate and normalize.
   461  	const nudge = 1
   462  	sort.Sort(pdf.TextVertical(chars))
   463  	old := -100000.0
   464  	for i, c := range chars {
   465  		if c.Y != old && math.Abs(old-c.Y) < nudge {
   466  			chars[i].Y = old
   467  		} else {
   468  			old = c.Y
   469  		}
   470  	}
   471  
   472  	// Sort by Y coordinate, breaking ties with X.
   473  	// This will bring letters in a single word together.
   474  	sort.Sort(pdf.TextVertical(chars))
   475  
   476  	// Loop over chars.
   477  	for i := 0; i < len(chars); {
   478  		// Find all chars on line.
   479  		j := i + 1
   480  		for j < len(chars) && chars[j].Y == chars[i].Y {
   481  			j++
   482  		}
   483  		var end float64
   484  		// Split line into words (really, phrases).
   485  		for k := i; k < j; {
   486  			ck := &chars[k]
   487  			s := ck.S
   488  			end = ck.X + ck.W
   489  			charSpace := ck.FontSize / 6
   490  			wordSpace := ck.FontSize * 2 / 3
   491  			l := k + 1
   492  			for l < j {
   493  				// Grow word.
   494  				cl := &chars[l]
   495  				if sameFont(cl.Font, ck.Font) && cl.FontSize == ck.FontSize && cl.X <= end+charSpace {
   496  					s += cl.S
   497  					end = cl.X + cl.W
   498  					l++
   499  					continue
   500  				}
   501  				// Add space to phrase before next word.
   502  				if sameFont(cl.Font, ck.Font) && cl.FontSize == ck.FontSize && cl.X <= end+wordSpace {
   503  					s += " " + cl.S
   504  					end = cl.X + cl.W
   505  					l++
   506  					continue
   507  				}
   508  				break
   509  			}
   510  			f := ck.Font
   511  			f = strings.TrimSuffix(f, ",Italic")
   512  			f = strings.TrimSuffix(f, "-Italic")
   513  			words = append(words, pdf.Text{
   514  				Font:     f,
   515  				FontSize: ck.FontSize,
   516  				X:        ck.X,
   517  				Y:        ck.Y,
   518  				W:        end,
   519  				S:        s,
   520  			})
   521  			k = l
   522  		}
   523  		i = j
   524  	}
   525  
   526  	return words
   527  }
   528  
   529  func sameFont(f1, f2 string) bool {
   530  	f1 = strings.TrimSuffix(f1, ",Italic")
   531  	f1 = strings.TrimSuffix(f1, "-Italic")
   532  	f2 = strings.TrimSuffix(f1, ",Italic")
   533  	f2 = strings.TrimSuffix(f1, "-Italic")
   534  	return strings.TrimSuffix(f1, ",Italic") == strings.TrimSuffix(f2, ",Italic") || f1 == "Symbol" || f2 == "Symbol" || f1 == "TimesNewRoman" || f2 == "TimesNewRoman"
   535  }
   536  
   537  func findMnemonicTable(text []pdf.Text) [][]string {
   538  	sort.Sort(pdf.TextHorizontal(text))
   539  
   540  	const nudge = 1
   541  
   542  	old := -100000.0
   543  	var col []float64
   544  	for i, t := range text {
   545  		if t.Font != "NeoSansIntelMedium" { // only headings count
   546  			continue
   547  		}
   548  		if t.X != old && math.Abs(old-t.X) < nudge {
   549  			text[i].X = old
   550  		} else if t.X != old {
   551  			old = t.X
   552  			col = append(col, old)
   553  		}
   554  	}
   555  	sort.Sort(pdf.TextVertical(text))
   556  
   557  	if len(col) == 0 {
   558  		return nil
   559  	}
   560  
   561  	y := -100000.0
   562  	var table [][]string
   563  	var line []string
   564  	bold := -1
   565  	for _, t := range text {
   566  		if t.Y != y {
   567  			table = append(table, make([]string, len(col)))
   568  			line = table[len(table)-1]
   569  			y = t.Y
   570  			if t.Font == "NeoSansIntelMedium" {
   571  				bold = len(table) - 1
   572  			}
   573  		}
   574  		i := 0
   575  		for i+1 < len(col) && col[i+1] <= t.X+nudge {
   576  			i++
   577  		}
   578  		if line[i] != "" {
   579  			line[i] += " "
   580  		}
   581  		line[i] += t.S
   582  	}
   583  
   584  	var mtable [][]string
   585  	for i, t := range table {
   586  		if 0 < i && i <= bold || bold < i && halfMissing(t) {
   587  			// merge with earlier line
   588  			last := mtable[len(mtable)-1]
   589  			for j, s := range t {
   590  				if s != "" {
   591  					last[j] += "\n" + s
   592  				}
   593  			}
   594  		} else {
   595  			mtable = append(mtable, t)
   596  		}
   597  	}
   598  
   599  	if bold >= 0 {
   600  		heading := mtable[0]
   601  		for i, x := range heading {
   602  			heading[i] = fixHeading.Replace(x)
   603  		}
   604  	}
   605  
   606  	return mtable
   607  }
   608  
   609  var fixHeading = strings.NewReplacer(
   610  	"64/32-\nbit\nMode", "64/32-Bit Mode",
   611  	"64/32-\nbit Mode", "64/32-Bit Mode",
   612  	"64/32-bit\nMode", "64/32-Bit Mode",
   613  	"64/3\n2-bit\nMode", "64/32-Bit Mode",
   614  	"64/32 bit\nMode\nSupport", "64/32-Bit Mode",
   615  	"64/32bit\nMode\nSupport", "64/32-Bit Mode",
   616  	"64/32\n-bit\nMode", "64/32-Bit Mode",
   617  	"64/32\nbit Mode\nSupport", "64/32-Bit Mode",
   618  	"64-Bit\nMode", "64-Bit Mode",
   619  	"64-bit\nMode", "64-Bit Mode",
   620  
   621  	"Op/ En", "Op/En",
   622  	"Op/\nEn", "Op/En",
   623  	"Op/\nEN", "Op/En",
   624  	"Op /\nEn", "Op/En",
   625  	"Opcode***", "Opcode",
   626  	"Opcode**", "Opcode",
   627  	"Opcode*", "Opcode",
   628  	"/\nInstruction", "/Instruction",
   629  
   630  	"CPUID Fea-\nture Flag", "CPUID Feature Flag",
   631  	"CPUID\nFeature\nFlag", "CPUID Feature Flag",
   632  	"CPUID\nFeature Flag", "CPUID Feature Flag",
   633  	"CPUIDFeature\nFlag", "CPUID Feature Flag",
   634  
   635  	"Compat/\nLeg Mode*", "Compat/Leg Mode",
   636  	"Compat/\nLeg Mode", "Compat/Leg Mode",
   637  	"Compat/ *\nLeg Mode", "Compat/Leg Mode",
   638  )
   639  
   640  func halfMissing(x []string) bool {
   641  	n := 0
   642  	for _, s := range x {
   643  		if s == "" {
   644  			n++
   645  		}
   646  	}
   647  	return n >= len(x)/2
   648  }
   649  
   650  func findEncodingTable(text []pdf.Text) [][]string {
   651  	// Look for operand encoding table.
   652  	sort.Sort(pdf.TextVertical(text))
   653  	var col []float64
   654  	sawTitle := false
   655  
   656  	center := func(t pdf.Text) float64 {
   657  		return t.X + t.W/2
   658  	}
   659  
   660  	start := 0
   661  	end := len(text)
   662  	for i, t := range text {
   663  		if match(t, "NeoSansIntelMedium", 10, "Instruction Operand Encoding") {
   664  			sawTitle = true
   665  			start = i + 1
   666  			continue
   667  		}
   668  		if !sawTitle {
   669  			continue
   670  		}
   671  		if match(t, "NeoSansIntel", 9, "Op/En") || match(t, "NeoSansIntel", 9, "Operand") {
   672  			if debugging {
   673  				fmt.Printf("column %d at %.2f: %v\n", len(col), center(t), t)
   674  			}
   675  			col = append(col, center(t))
   676  		}
   677  		if match(t, "NeoSansIntelMedium", 10, "Description") {
   678  			end = i
   679  			break
   680  		}
   681  	}
   682  	text = text[start:end]
   683  
   684  	if len(col) == 0 {
   685  		return nil
   686  	}
   687  
   688  	const nudge = 20
   689  
   690  	y := -100000.0
   691  	var table [][]string
   692  	var line []string
   693  	for _, t := range text {
   694  		if t.Y != y {
   695  			table = append(table, make([]string, len(col)))
   696  			line = table[len(table)-1]
   697  			y = t.Y
   698  		}
   699  		i := 0
   700  		x := center(t)
   701  		for i+1 < len(col) && col[i+1] <= x+nudge {
   702  			i++
   703  		}
   704  		if debugging {
   705  			fmt.Printf("text at %.2f: %v => %d\n", x, t, i)
   706  		}
   707  		if line[i] != "" {
   708  			line[i] += " "
   709  		}
   710  		line[i] += t.S
   711  	}
   712  
   713  	out := table[:0]
   714  	for _, line := range table {
   715  		if strings.HasPrefix(line[len(line)-1], "Vol. 2") { // page footer
   716  			continue
   717  		}
   718  		if line[0] == "" && len(out) > 0 {
   719  			last := out[len(out)-1]
   720  			for i, col := range line {
   721  				if col != "" {
   722  					last[i] += " " + col
   723  				}
   724  			}
   725  			continue
   726  		}
   727  		out = append(out, line)
   728  	}
   729  	table = out
   730  
   731  	return table
   732  }
   733  
   734  func findCompat(text []pdf.Text) string {
   735  	sort.Sort(pdf.TextVertical(text))
   736  
   737  	inCompat := false
   738  	out := ""
   739  	for _, t := range text {
   740  		if match(t, "NeoSansIntelMedium", 10, "") {
   741  			inCompat = strings.Contains(t.S, "Architecture Compatibility")
   742  			if inCompat {
   743  				out += t.S + "\n"
   744  			}
   745  		}
   746  		if inCompat && match(t, "Verdana", 9, "") || strings.Contains(t.S, "were introduced") {
   747  			out += t.S + "\n"
   748  		}
   749  	}
   750  	return out
   751  }
   752  
   753  func processListing(p *listing, insts *[]*instruction) {
   754  	if debugging {
   755  		for _, table := range p.mtables {
   756  			fmt.Printf("table:\n")
   757  			for _, row := range table {
   758  				fmt.Printf("%q\n", row)
   759  			}
   760  		}
   761  		fmt.Printf("enctable:\n")
   762  		for _, table := range p.enctables {
   763  			for _, row := range table {
   764  				fmt.Printf("%q\n", row)
   765  			}
   766  		}
   767  		fmt.Printf("compat:\n%s", p.compat)
   768  	}
   769  
   770  	if *flagCompat && p.compat != "" {
   771  		fmt.Printf("# p.%d: %s\n#\t%s\n", p.pageNum, p.name, strings.Replace(p.compat, "\n", "\n#\t", -1))
   772  	}
   773  
   774  	encs := make(map[string][]string)
   775  	for _, table := range p.enctables {
   776  		for _, row := range table[1:] {
   777  			for len(row) > 1 && (row[len(row)-1] == "NA" || row[len(row)-1] == "" || row[len(row)-1] == " source") {
   778  				row = row[:len(row)-1]
   779  			}
   780  			encs[row[0]] = row[1:]
   781  		}
   782  	}
   783  
   784  	var wrong string
   785  	for _, table := range p.mtables {
   786  		heading := table[0]
   787  		for _, row := range table[1:] {
   788  			if row[0] == heading[0] && reflect.DeepEqual(row, heading) {
   789  				continue
   790  			}
   791  			if len(row) >= 5 && row[1] == "CMOVG r64, r/m64" && row[3] == "V/N.E." && row[4] == "NA" {
   792  				row[3] = "V"
   793  				row[4] = "N.E."
   794  			}
   795  			inst := new(instruction)
   796  			inst.page = p.pageNum
   797  			inst.compat = strings.Join(strings.Fields(p.compat), " ")
   798  			for i, hdr := range heading {
   799  				x := row[i]
   800  				x = strings.Replace(x, "\n", " ", -1)
   801  				switch strings.TrimSpace(hdr) {
   802  				default:
   803  					wrong = "unexpected header: " + strconv.Quote(hdr)
   804  					goto BadTable
   805  				case "Opcode/Instruction":
   806  					x = row[i]
   807  					if strings.HasPrefix(x, "\nVEX") {
   808  						x = x[1:]
   809  						row[i] = x
   810  					}
   811  					if strings.Contains(x, "\n/r ") {
   812  						x = strings.Replace(x, "\n/r ", " /r ", -1)
   813  						row[i] = x
   814  					}
   815  					if strings.Contains(x, ",\nimm") {
   816  						x = strings.Replace(x, ",\nimm", ", imm", -1)
   817  						row[i] = x
   818  					}
   819  					if strings.Count(x, "\n") < 1 {
   820  						wrong = "bad Opcode/Instruction pairing: " + strconv.Quote(x)
   821  						goto BadTable
   822  					}
   823  					i := strings.Index(x, "\n")
   824  					inst.opcode = x[:i]
   825  					inst.syntax = strings.Replace(x[i+1:], "\n", " ", -1)
   826  
   827  				case "Opcode":
   828  					inst.opcode = x
   829  
   830  				case "Instruction":
   831  					inst.syntax = x
   832  
   833  				case "Op/En":
   834  					inst.args = encs[x]
   835  					if inst.args == nil && len(encs) == 1 && encs["A"] != nil {
   836  						inst.args = encs["A"]
   837  					}
   838  					// In the December 2015 manual, PREFETCHW says
   839  					// encoding A but the table gives encoding M.
   840  					if inst.args == nil && inst.syntax == "PREFETCHW m8" && x == "A" && len(encs) == 1 && encs["M"] != nil {
   841  						inst.args = encs["M"]
   842  					}
   843  
   844  				case "64-Bit Mode":
   845  					x, ok := parseMode(x)
   846  					if !ok {
   847  						wrong = "unexpected value for 64-Bit Mode column: " + x
   848  						goto BadTable
   849  					}
   850  					inst.valid64 = x
   851  
   852  				case "Compat/Leg Mode":
   853  					x, ok := parseMode(x)
   854  					if !ok {
   855  						wrong = "unexpected value for Compat/Leg Mode column: " + x
   856  						goto BadTable
   857  					}
   858  					inst.valid32 = x
   859  
   860  				case "64/32-Bit Mode":
   861  					i := strings.Index(x, "/")
   862  					if i < 0 {
   863  						wrong = "unexpected value for 64/32-Bit Mode column: " + x
   864  						goto BadTable
   865  					}
   866  					x1, ok1 := parseMode(x[:i])
   867  					x2, ok2 := parseMode(x[i+1:])
   868  					if !ok1 || !ok2 {
   869  						wrong = "unexpected value for 64/32-Bit Mode column: " + x
   870  						goto BadTable
   871  					}
   872  					inst.valid64 = x1
   873  					inst.valid32 = x2
   874  
   875  				case "CPUID Feature Flag":
   876  					inst.cpuid = x
   877  
   878  				case "Description":
   879  					if inst.desc != "" {
   880  						inst.desc += " "
   881  					}
   882  					inst.desc += x
   883  				}
   884  			}
   885  
   886  			// Fixup various typos or bugs in opcode descriptions.
   887  			if inst.opcode == "VEX.128.66.0F.W0 6E /" {
   888  				inst.opcode += "r"
   889  			}
   890  			fix := func(old, new string) {
   891  				inst.opcode = strings.Replace(inst.opcode, old, new, -1)
   892  			}
   893  			fix(" imm8", " ib")
   894  			fix("REX.w", "REX.W")
   895  			fix("REX.W+", "REX.W +")
   896  			fix(" 0f ", " 0F ")
   897  			fix(". 0F38", ".0F38")
   898  			fix("0F .WIG", "0F.WIG")
   899  			fix("0F38 .WIG", "0F38.WIG")
   900  			fix("NDS .LZ", "NDS.LZ")
   901  			fix("58+ r", "58+r")
   902  			fix("B0+ ", "B0+")
   903  			fix("B8+ ", "B8+")
   904  			fix("40+ ", "40+")
   905  			fix("*", "")
   906  			fix(",", " ")
   907  			fix("/", " /")
   908  			fix("REX.W +", "REX.W")
   909  			fix("REX +", "REX")
   910  			fix("REX 0F BE", "REX.W 0F BE")
   911  			fix("REX 0F B2", "REX.W 0F B2")
   912  			fix("REX 0F B4", "REX.W 0F B4")
   913  			fix("REX 0F B5", "REX.W 0F B5")
   914  			fix("0F38.0", "0F38.W0")
   915  			fix(".660F.", ".66.0F.")
   916  			fix("VEX128", "VEX.128")
   917  			fix("0F3A.W0.1D", "0F3A.W0 1D")
   918  
   919  			inst.opcode = strings.Join(strings.Fields(inst.opcode), " ")
   920  
   921  			fix = func(old, new string) {
   922  				inst.syntax = strings.Replace(inst.syntax, old, new, -1)
   923  			}
   924  			fix("xmm1 xmm2", "xmm1, xmm2")
   925  			fix("r16/m16", "r/m16")
   926  			fix("r32/m161", "r32/m16") // really r32/m16¹ (footnote)
   927  			fix("r32/m32", "r/m32")
   928  			fix("r64/m64", "r/m64")
   929  			fix("\u2013", "-")
   930  			fix("mm3 /m", "mm3/m")
   931  			fix("mm3/.m", "mm3/m")
   932  			inst.syntax = joinSyntax(splitSyntax(inst.syntax))
   933  
   934  			fix = func(old, new string) {
   935  				inst.cpuid = strings.Replace(inst.cpuid, old, new, -1)
   936  			}
   937  			fix("PCLMUL- QDQ", "PCLMULQDQ")
   938  			fix("PCL- MULQDQ", "PCLMULQDQ")
   939  			fix("Both PCLMULQDQ and AVX flags", "PCLMULQDQ+AVX")
   940  
   941  			if !instBlacklist[inst.syntax] {
   942  				*insts = append(*insts, inst)
   943  			}
   944  		}
   945  	}
   946  	return
   947  
   948  BadTable:
   949  	fmt.Fprintf(os.Stderr, "p.%d: reading %v: %v\n", p.pageNum, p.name, wrong)
   950  	for _, table := range p.mtables {
   951  		for _, t := range table {
   952  			fmt.Fprintf(os.Stderr, "\t%q\n", t)
   953  		}
   954  	}
   955  	fmt.Fprintf(os.Stderr, "\n")
   956  }
   957  
   958  func parseMode(s string) (string, bool) {
   959  	switch strings.TrimSpace(s) {
   960  	case "Invalid", "Invalid*", "Inv.", "I", "i":
   961  		return "I", true
   962  	case "Valid", "Valid*", "V":
   963  		return "V", true
   964  	case "N.E.", "NE", "N. E.":
   965  		return "N.E.", true
   966  	case "N.P.", "N. P.":
   967  		return "N.P.", true
   968  	case "N.S.", "N. S.":
   969  		return "N.S.", true
   970  	case "N.I.", "N. I.":
   971  		return "N.I.", true
   972  	}
   973  	return s, false
   974  }
   975  
   976  func splitSyntax(syntax string) (op string, args []string) {
   977  	i := strings.Index(syntax, " ")
   978  	if i < 0 {
   979  		return syntax, nil
   980  	}
   981  	op, syntax = syntax[:i], syntax[i+1:]
   982  	args = strings.Split(syntax, ",")
   983  	for i, arg := range args {
   984  		arg = strings.TrimSpace(arg)
   985  		arg = strings.TrimRight(arg, "*")
   986  		args[i] = arg
   987  	}
   988  	return
   989  }
   990  
   991  func joinSyntax(op string, args []string) string {
   992  	if len(args) == 0 {
   993  		return op
   994  	}
   995  	return op + " " + strings.Join(args, ", ")
   996  }