golang.org/x/arch@v0.17.0/arm64/arm64asm/ext_test.go (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Support for testing against external disassembler program.
     6  // Copied and simplified from ../../arm/armasm/ext_test.go.
     7  
     8  package arm64asm
     9  
    10  import (
    11  	"bufio"
    12  	"bytes"
    13  	"encoding/hex"
    14  	"encoding/json"
    15  	"flag"
    16  	"fmt"
    17  	"io"
    18  	"io/ioutil"
    19  	"log"
    20  	"math/rand"
    21  	"os"
    22  	"os/exec"
    23  	"path/filepath"
    24  	"regexp"
    25  	"strconv"
    26  	"strings"
    27  	"testing"
    28  	"time"
    29  )
    30  
    31  var (
    32  	dumpTest = flag.Bool("dump", false, "dump all encodings")
    33  	mismatch = flag.Bool("mismatch", false, "log allowed mismatches")
    34  	keep     = flag.Bool("keep", false, "keep object files around")
    35  	debug    = false
    36  )
    37  
    38  // An ExtInst represents a single decoded instruction parsed
    39  // from an external disassembler's output.
    40  type ExtInst struct {
    41  	addr uint64
    42  	enc  [4]byte
    43  	nenc int
    44  	text string
    45  }
    46  
    47  func (r ExtInst) String() string {
    48  	return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text)
    49  }
    50  
    51  // An ExtDis is a connection between an external disassembler and a test.
    52  type ExtDis struct {
    53  	Arch     Mode
    54  	Dec      chan ExtInst
    55  	File     *os.File
    56  	Size     int
    57  	KeepFile bool
    58  	Cmd      *exec.Cmd
    59  }
    60  
    61  // InstJson describes instruction fields value got from ARMv8-A Reference Manual
    62  type InstJson struct {
    63  	Name   string
    64  	Bits   string
    65  	Arch   string
    66  	Syntax string
    67  	Code   string
    68  	Alias  string
    69  	Enc    uint32
    70  }
    71  
    72  // A Mode is an instruction execution mode.
    73  type Mode int
    74  
    75  const (
    76  	_ Mode = iota
    77  	ModeARM64
    78  )
    79  
    80  // Run runs the given command - the external disassembler - and returns
    81  // a buffered reader of its standard output.
    82  func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) {
    83  	if *keep {
    84  		log.Printf("%s\n", strings.Join(cmd, " "))
    85  	}
    86  	ext.Cmd = exec.Command(cmd[0], cmd[1:]...)
    87  	out, err := ext.Cmd.StdoutPipe()
    88  	if err != nil {
    89  		return nil, fmt.Errorf("stdoutpipe: %v", err)
    90  	}
    91  	if err := ext.Cmd.Start(); err != nil {
    92  		return nil, fmt.Errorf("exec: %v", err)
    93  	}
    94  
    95  	b := bufio.NewReaderSize(out, 1<<20)
    96  	return b, nil
    97  }
    98  
    99  // Wait waits for the command started with Run to exit.
   100  func (ext *ExtDis) Wait() error {
   101  	return ext.Cmd.Wait()
   102  }
   103  
   104  // testExtDis tests a set of byte sequences against an external disassembler.
   105  // The disassembler is expected to produce the given syntax and run
   106  // in the given architecture mode (16, 32, or 64-bit).
   107  // The extdis function must start the external disassembler
   108  // and then parse its output, sending the parsed instructions on ext.Dec.
   109  // The generate function calls its argument f once for each byte sequence
   110  // to be tested. The generate function itself will be called twice, and it must
   111  // make the same sequence of calls to f each time.
   112  // When a disassembly does not match the internal decoding,
   113  // allowedMismatch determines whether this mismatch should be
   114  // allowed, or else considered an error.
   115  func testExtDis(
   116  	t *testing.T,
   117  	syntax string,
   118  	arch Mode,
   119  	extdis func(ext *ExtDis) error,
   120  	generate func(f func([]byte)),
   121  	allowedMismatch func(text string, inst *Inst, dec ExtInst) bool,
   122  ) {
   123  	start := time.Now()
   124  	ext := &ExtDis{
   125  		Dec:  make(chan ExtInst),
   126  		Arch: arch,
   127  	}
   128  	errc := make(chan error)
   129  
   130  	// First pass: write instructions to input file for external disassembler.
   131  	file, f, size, err := writeInst(generate)
   132  	if err != nil {
   133  		t.Fatal(err)
   134  	}
   135  	ext.Size = size
   136  	ext.File = f
   137  	defer func() {
   138  		f.Close()
   139  		if !*keep {
   140  			os.Remove(file)
   141  		}
   142  	}()
   143  
   144  	// Second pass: compare disassembly against our decodings.
   145  	var (
   146  		totalTests  = 0
   147  		totalSkips  = 0
   148  		totalErrors = 0
   149  
   150  		errors = make([]string, 0, 100) // Sampled errors, at most cap
   151  	)
   152  	go func() {
   153  		errc <- extdis(ext)
   154  	}()
   155  
   156  	generate(func(enc []byte) {
   157  		dec, ok := <-ext.Dec
   158  		if !ok {
   159  			t.Errorf("decoding stream ended early")
   160  			return
   161  		}
   162  		inst, text := disasm(syntax, pad(enc))
   163  
   164  		totalTests++
   165  		if *dumpTest {
   166  			fmt.Printf("%x -> %s [%d]\n", enc, dec.text, dec.nenc)
   167  		}
   168  		if text != dec.text && !strings.Contains(dec.text, "unknown") && syntax == "gnu" {
   169  			suffix := ""
   170  			if allowedMismatch(text, &inst, dec) {
   171  				totalSkips++
   172  				if !*mismatch {
   173  					return
   174  				}
   175  				suffix += " (allowed mismatch)"
   176  			}
   177  			totalErrors++
   178  			cmp := fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s\n", enc, text, len(enc), dec.text, dec.nenc, suffix)
   179  
   180  			if len(errors) >= cap(errors) {
   181  				j := rand.Intn(totalErrors)
   182  				if j >= cap(errors) {
   183  					return
   184  				}
   185  				errors = append(errors[:j], errors[j+1:]...)
   186  			}
   187  			errors = append(errors, cmp)
   188  		}
   189  	})
   190  
   191  	if *mismatch {
   192  		totalErrors -= totalSkips
   193  	}
   194  
   195  	for _, b := range errors {
   196  		t.Log(b)
   197  	}
   198  
   199  	if totalErrors > 0 {
   200  		t.Fail()
   201  	}
   202  	t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds())
   203  	t.Logf("decoder coverage: %.1f%%;\n", decodeCoverage())
   204  	if err := <-errc; err != nil {
   205  		t.Fatalf("external disassembler: %v", err)
   206  	}
   207  
   208  }
   209  
   210  // Start address of text.
   211  const start = 0x8000
   212  
   213  // writeInst writes the generated byte sequences to a new file
   214  // starting at offset start. That file is intended to be the input to
   215  // the external disassembler.
   216  func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) {
   217  	f, err = ioutil.TempFile("", "arm64asm")
   218  	if err != nil {
   219  		return
   220  	}
   221  
   222  	file = f.Name()
   223  
   224  	f.Seek(start, io.SeekStart)
   225  	w := bufio.NewWriter(f)
   226  	defer w.Flush()
   227  	size = 0
   228  	generate(func(x []byte) {
   229  		if debug {
   230  			fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):])
   231  		}
   232  		w.Write(x)
   233  		w.Write(zeros[len(x):])
   234  		size += len(zeros)
   235  	})
   236  	return file, f, size, nil
   237  }
   238  
   239  var zeros = []byte{0, 0, 0, 0}
   240  
   241  // pad pads the code sequence with pops.
   242  func pad(enc []byte) []byte {
   243  	if len(enc) < 4 {
   244  		enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...)
   245  	}
   246  	return enc
   247  }
   248  
   249  // disasm returns the decoded instruction and text
   250  // for the given source bytes, using the given syntax and mode.
   251  func disasm(syntax string, src []byte) (inst Inst, text string) {
   252  	var err error
   253  	inst, err = Decode(src)
   254  	if err != nil {
   255  		text = "error: " + err.Error()
   256  		return
   257  	}
   258  	switch syntax {
   259  	case "gnu":
   260  		text = GNUSyntax(inst)
   261  	case "plan9": // [sic]
   262  		text = GoSyntax(inst, 0, nil, nil)
   263  	default:
   264  		text = "error: unknown syntax " + syntax
   265  	}
   266  	return
   267  }
   268  
   269  // decodeCoverage returns a floating point number denoting the
   270  // decoder coverage.
   271  func decodeCoverage() float64 {
   272  	n := 0
   273  	for _, t := range decoderCover {
   274  		if t {
   275  			n++
   276  		}
   277  	}
   278  	return 100 * float64(1+n) / float64(1+len(decoderCover))
   279  }
   280  
   281  // Helpers for writing disassembler output parsers.
   282  
   283  // hasPrefix reports whether any of the space-separated words in the text s
   284  // begins with any of the given prefixes.
   285  func hasPrefix(s string, prefixes ...string) bool {
   286  	for _, prefix := range prefixes {
   287  		for cur_s := s; cur_s != ""; {
   288  			if strings.HasPrefix(cur_s, prefix) {
   289  				return true
   290  			}
   291  			i := strings.Index(cur_s, " ")
   292  			if i < 0 {
   293  				break
   294  			}
   295  			cur_s = cur_s[i+1:]
   296  		}
   297  	}
   298  	return false
   299  }
   300  
   301  // isHex reports whether b is a hexadecimal character (0-9a-fA-F).
   302  func isHex(b byte) bool {
   303  	return ('0' <= b && b <= '9') || ('a' <= b && b <= 'f') || ('A' <= b && b <= 'F')
   304  }
   305  
   306  // parseHex parses the hexadecimal byte dump in hex,
   307  // appending the parsed bytes to raw and returning the updated slice.
   308  // The returned bool reports whether any invalid hex was found.
   309  // Spaces and tabs between bytes are okay but any other non-hex is not.
   310  func parseHex(hex []byte, raw []byte) ([]byte, bool) {
   311  	hex = bytes.TrimSpace(hex)
   312  	for j := 0; j < len(hex); {
   313  		for hex[j] == ' ' || hex[j] == '\t' {
   314  			j++
   315  		}
   316  		if j >= len(hex) {
   317  			break
   318  		}
   319  		if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) {
   320  			return nil, false
   321  		}
   322  		raw = append(raw, unhex(hex[j])<<4|unhex(hex[j+1]))
   323  		j += 2
   324  	}
   325  	return raw, true
   326  }
   327  
   328  func unhex(b byte) byte {
   329  	if '0' <= b && b <= '9' {
   330  		return b - '0'
   331  	} else if 'A' <= b && b <= 'F' {
   332  		return b - 'A' + 10
   333  	} else if 'a' <= b && b <= 'f' {
   334  		return b - 'a' + 10
   335  	}
   336  	return 0
   337  }
   338  
   339  // index is like bytes.Index(s, []byte(t)) but avoids the allocation.
   340  func index(s []byte, t string) int {
   341  	i := 0
   342  	for {
   343  		j := bytes.IndexByte(s[i:], t[0])
   344  		if j < 0 {
   345  			return -1
   346  		}
   347  		i = i + j
   348  		if i+len(t) > len(s) {
   349  			return -1
   350  		}
   351  		for k := 1; k < len(t); k++ {
   352  			if s[i+k] != t[k] {
   353  				goto nomatch
   354  			}
   355  		}
   356  		return i
   357  	nomatch:
   358  		i++
   359  	}
   360  }
   361  
   362  // fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s.
   363  // If s must be rewritten, it is rewritten in place.
   364  func fixSpace(s []byte) []byte {
   365  	s = bytes.TrimSpace(s)
   366  	for i := 0; i < len(s); i++ {
   367  		if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' {
   368  			goto Fix
   369  		}
   370  	}
   371  	return s
   372  
   373  Fix:
   374  	b := s
   375  	w := 0
   376  	for i := 0; i < len(s); i++ {
   377  		c := s[i]
   378  		if c == '\t' || c == '\n' {
   379  			c = ' '
   380  		}
   381  		if c == ' ' && w > 0 && b[w-1] == ' ' {
   382  			continue
   383  		}
   384  		b[w] = c
   385  		w++
   386  	}
   387  	if w > 0 && b[w-1] == ' ' {
   388  		w--
   389  	}
   390  	return b[:w]
   391  }
   392  
   393  // Fllowing regular expressions matches instructions using relative addressing mode.
   394  // pcrel matches B instructions and BL instructions.
   395  // pcrelr matches instrucions which consisted of register arguments and label arguments.
   396  // pcrelim matches instructions which consisted of register arguments, immediate
   397  // arguments and lable arguments.
   398  // pcrelrzr and prcelimzr matches instructions when register arguments is zero register.
   399  // pcrelprfm matches PRFM instructions when arguments consisted of register and lable.
   400  // pcrelprfmim matches PRFM instructions when arguments consisted of immediate and lable.
   401  var (
   402  	pcrel       = regexp.MustCompile(`^((?:.* )?(?:b|bl)x?(?:\.)?(?:eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al|nv)?) 0x([0-9a-f]+)$`)
   403  	pcrelr      = regexp.MustCompile(`^((?:.*)?(?:ldr|adrp|adr|cbnz|cbz|ldrsw) (?:x|w|s|d|q)(?:[0-9]+,)) 0x([0-9a-f]+)$`)
   404  	pcrelrzr    = regexp.MustCompile(`^((?:.*)?(?:ldr|adrp|adr|cbnz|cbz|ldrsw) (?:x|w)zr,) 0x([0-9a-f]+)$`)
   405  	pcrelim     = regexp.MustCompile(`^((?:.*)?(?:tbnz|tbz) (?:x|w)(?:[0-9]+,) (?:#[0-9a-f]+,)) 0x([0-9a-f]+)$`)
   406  	pcrelimzr   = regexp.MustCompile(`^((?:.*)?(?:tbnz|tbz) (?:x|w)zr, (?:#[0-9a-f]+,)) 0x([0-9a-f]+)$`)
   407  	pcrelprfm   = regexp.MustCompile(`^((?:.*)?(?:prfm) (?:[0-9a-z]+,)) 0x([0-9a-f]+)$`)
   408  	pcrelprfmim = regexp.MustCompile(`^((?:.*)?(?:prfm) (?:#0x[0-9a-f]+,)) 0x([0-9a-f]+)$`)
   409  )
   410  
   411  // Round is the multiple of the number of instructions that read from Json file.
   412  // Round used as seed value for pseudo-random number generator provides the same sequence
   413  // in the same round run for the external disassembler and decoder.
   414  var Round int
   415  
   416  // condmark is used to mark conditional instructions when need to generate and test
   417  // conditional instructions.
   418  var condmark bool = false
   419  
   420  // Generate instruction binary according to Json file
   421  // Encode variable field of instruction with random value
   422  func doFuzzy(inst *InstJson, Ninst int) {
   423  	var testdata uint32
   424  	var NonDigRE = regexp.MustCompile(`[\D]`)
   425  	rand.Seed(int64(Round + Ninst))
   426  	off := 0
   427  	DigBit := ""
   428  	if condmark == true && !strings.Contains(inst.Bits, "cond") {
   429  		inst.Enc = 0xffffffff
   430  	} else {
   431  		for _, f := range strings.Split(inst.Bits, "|") {
   432  			if i := strings.Index(f, ":"); i >= 0 {
   433  				// consider f contains "01:2" and "Rm:5"
   434  				DigBit = f[:i]
   435  				m := NonDigRE.FindStringSubmatch(DigBit)
   436  				if m == nil {
   437  					DigBit = strings.TrimSpace(DigBit)
   438  					s := strings.Split(DigBit, "")
   439  					for i := 0; i < len(s); i++ {
   440  						switch s[i] {
   441  						case "1", "(1)":
   442  							testdata |= 1 << uint(31-off)
   443  						}
   444  						off++
   445  					}
   446  				} else {
   447  					// DigBit is "Rn" or "imm3"
   448  					n, _ := strconv.Atoi(f[i+1:])
   449  					if DigBit == "cond" && condmark == true {
   450  						r := uint8(Round)
   451  						for i := n - 1; i >= 0; i-- {
   452  							switch (r >> uint(i)) & 1 {
   453  							case 1:
   454  								testdata |= 1 << uint(31-off)
   455  							}
   456  							off++
   457  						}
   458  					} else {
   459  						for i := 0; i < n; i++ {
   460  							r := rand.Intn(2)
   461  							switch r {
   462  							case 1:
   463  								testdata |= 1 << uint(31-off)
   464  							}
   465  							off++
   466  						}
   467  					}
   468  				}
   469  				continue
   470  			}
   471  			for _, bit := range strings.Fields(f) {
   472  				switch bit {
   473  				case "0", "(0)":
   474  					off++
   475  					continue
   476  				case "1", "(1)":
   477  					testdata |= 1 << uint(31-off)
   478  				default:
   479  					r := rand.Intn(2)
   480  					switch r {
   481  					case 1:
   482  						testdata |= 1 << uint(31-off)
   483  					}
   484  				}
   485  				off++
   486  			}
   487  		}
   488  		if off != 32 {
   489  			log.Printf("incorrect bit count for %s %s: have %d", inst.Name, inst.Bits, off)
   490  		}
   491  		inst.Enc = testdata
   492  	}
   493  }
   494  
   495  // Generators.
   496  //
   497  // The test cases are described as functions that invoke a callback repeatedly,
   498  // with a new input sequence each time. These helpers make writing those
   499  // a little easier.
   500  
   501  // JSONCases generates ARM64 instructions according to inst.json.
   502  func JSONCases(t *testing.T) func(func([]byte)) {
   503  	return func(try func([]byte)) {
   504  		data, err := ioutil.ReadFile("inst.json")
   505  		if err != nil {
   506  			t.Fatal(err)
   507  		}
   508  		var insts []InstJson
   509  		var instsN []InstJson
   510  		// Change N value to get more cases only when condmark=false.
   511  		N := 100
   512  		if condmark == true {
   513  			N = 16
   514  		}
   515  		if err := json.Unmarshal(data, &insts); err != nil {
   516  			t.Fatal(err)
   517  		}
   518  		// Append instructions to get more test cases.
   519  		for i := 0; i < N; i++ {
   520  			instsN = append(instsN, insts...)
   521  		}
   522  		Round = 0
   523  		for i := range instsN {
   524  			if i%len(insts) == 0 {
   525  				Round++
   526  			}
   527  			doFuzzy(&instsN[i], i)
   528  		}
   529  		for _, inst := range instsN {
   530  			if condmark == true && inst.Enc == 0xffffffff {
   531  				continue
   532  			}
   533  			enc := inst.Enc
   534  			try([]byte{byte(enc), byte(enc >> 8), byte(enc >> 16), byte(enc >> 24)})
   535  		}
   536  	}
   537  }
   538  
   539  // condCases generates conditional instructions.
   540  func condCases(t *testing.T) func(func([]byte)) {
   541  	return func(try func([]byte)) {
   542  		condmark = true
   543  		JSONCases(t)(func(enc []byte) {
   544  			try(enc)
   545  		})
   546  	}
   547  }
   548  
   549  // hexCases generates the cases written in hexadecimal in the encoded string.
   550  // Spaces in 'encoded' separate entire test cases, not individual bytes.
   551  func hexCases(t *testing.T, encoded string) func(func([]byte)) {
   552  	return func(try func([]byte)) {
   553  		for _, x := range strings.Fields(encoded) {
   554  			src, err := hex.DecodeString(x)
   555  			if err != nil {
   556  				t.Errorf("parsing %q: %v", x, err)
   557  			}
   558  			try(src)
   559  		}
   560  	}
   561  }
   562  
   563  // testdataCases generates the test cases recorded in testdata/cases.txt.
   564  // It only uses the inputs; it ignores the answers recorded in that file.
   565  func testdataCases(t *testing.T, syntax string) func(func([]byte)) {
   566  	var codes [][]byte
   567  	input := filepath.Join("testdata", syntax+"cases.txt")
   568  	data, err := ioutil.ReadFile(input)
   569  	if err != nil {
   570  		t.Fatal(err)
   571  	}
   572  	for _, line := range strings.Split(string(data), "\n") {
   573  		line = strings.TrimSpace(line)
   574  		if line == "" || strings.HasPrefix(line, "#") {
   575  			continue
   576  		}
   577  		f := strings.Fields(line)[0]
   578  		i := strings.Index(f, "|")
   579  		if i < 0 {
   580  			t.Errorf("parsing %q: missing | separator", f)
   581  			continue
   582  		}
   583  		if i%2 != 0 {
   584  			t.Errorf("parsing %q: misaligned | separator", f)
   585  		}
   586  		code, err := hex.DecodeString(f[:i] + f[i+1:])
   587  		if err != nil {
   588  			t.Errorf("parsing %q: %v", f, err)
   589  			continue
   590  		}
   591  		codes = append(codes, code)
   592  	}
   593  
   594  	return func(try func([]byte)) {
   595  		for _, code := range codes {
   596  			try(code)
   597  		}
   598  	}
   599  }