golang.org/x/arch@v0.17.0/loong64/loong64asm/ext_test.go (about)

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Support for testing against external disassembler program.
     6  
     7  package loong64asm
     8  
     9  import (
    10  	"bufio"
    11  	"bytes"
    12  	"encoding/hex"
    13  	"flag"
    14  	"fmt"
    15  	"io"
    16  	"io/ioutil"
    17  	"log"
    18  	"math/rand"
    19  	"os"
    20  	"os/exec"
    21  	"path/filepath"
    22  	"strings"
    23  	"testing"
    24  	"time"
    25  )
    26  
    27  var (
    28  	dumpTest = flag.Bool("dump", false, "dump all encodings")
    29  	mismatch = flag.Bool("mismatch", false, "log allowed mismatches")
    30  	keep     = flag.Bool("keep", false, "keep object files around")
    31  	debug    = false
    32  )
    33  
    34  // An ExtInst represents a single decoded instruction parsed
    35  // from an external disassembler's output.
    36  type ExtInst struct {
    37  	addr uint64
    38  	enc  [4]byte
    39  	nenc int
    40  	text string
    41  }
    42  
    43  func (r ExtInst) String() string {
    44  	return fmt.Sprintf("%#x: % x: %s", r.addr, r.enc, r.text)
    45  }
    46  
    47  // An ExtDis is a connection between an external disassembler and a test.
    48  type ExtDis struct {
    49  	Dec  chan ExtInst
    50  	File *os.File
    51  	Size int
    52  	Cmd  *exec.Cmd
    53  }
    54  
    55  // Run runs the given command - the external disassembler - and returns
    56  // a buffered reader of its standard output.
    57  func (ext *ExtDis) Run(cmd ...string) (*bufio.Reader, error) {
    58  	if *keep {
    59  		log.Printf("%s\n", strings.Join(cmd, " "))
    60  	}
    61  	ext.Cmd = exec.Command(cmd[0], cmd[1:]...)
    62  	out, err := ext.Cmd.StdoutPipe()
    63  	if err != nil {
    64  		return nil, fmt.Errorf("stdoutpipe: %v", err)
    65  	}
    66  	if err := ext.Cmd.Start(); err != nil {
    67  		return nil, fmt.Errorf("exec: %v", err)
    68  	}
    69  
    70  	b := bufio.NewReaderSize(out, 1<<20)
    71  	return b, nil
    72  }
    73  
    74  // Wait waits for the command started with Run to exit.
    75  func (ext *ExtDis) Wait() error {
    76  	return ext.Cmd.Wait()
    77  }
    78  
    79  // testExtDis tests a set of byte sequences against an external disassembler.
    80  // The disassembler is expected to produce the given syntax and run
    81  // in the given architecture mode (16, 32, or 64-bit).
    82  // The extdis function must start the external disassembler
    83  // and then parse its output, sending the parsed instructions on ext.Dec.
    84  // The generate function calls its argument f once for each byte sequence
    85  // to be tested. The generate function itself will be called twice, and it must
    86  // make the same sequence of calls to f each time.
    87  // When a disassembly does not match the internal decoding,
    88  // allowedMismatch determines whether this mismatch should be
    89  // allowed, or else considered an error.
    90  func testExtDis(
    91  	t *testing.T,
    92  	syntax string,
    93  	extdis func(ext *ExtDis) error,
    94  	generate func(f func([]byte)),
    95  	allowedMismatch func(text string, inst *Inst, dec ExtInst) bool,
    96  ) {
    97  	start := time.Now()
    98  	ext := &ExtDis{
    99  		Dec: make(chan ExtInst),
   100  	}
   101  	errc := make(chan error)
   102  
   103  	// First pass: write instructions to input file for external disassembler.
   104  	file, f, size, err := writeInst(generate)
   105  	if err != nil {
   106  		t.Fatal(err)
   107  	}
   108  	ext.Size = size
   109  	ext.File = f
   110  	defer func() {
   111  		f.Close()
   112  		if !*keep {
   113  			os.Remove(file)
   114  		}
   115  	}()
   116  
   117  	// Second pass: compare disassembly against our decodings.
   118  	var (
   119  		totalTests  = 0
   120  		totalSkips  = 0
   121  		totalErrors = 0
   122  
   123  		errors = make([]string, 0, 100) // Sampled errors, at most cap
   124  	)
   125  	go func() {
   126  		errc <- extdis(ext)
   127  	}()
   128  
   129  	generate(func(enc []byte) {
   130  		dec, ok := <-ext.Dec
   131  		if !ok {
   132  			t.Errorf("decoding stream ended early")
   133  			return
   134  		}
   135  		inst, text := disasm(syntax, pad(enc))
   136  
   137  		totalTests++
   138  		if *dumpTest {
   139  			fmt.Printf("%x -> %s [%d]\n", enc[:len(enc)], dec.text, dec.nenc)
   140  		}
   141  
   142  		if text != dec.text && !strings.Contains(dec.text, "unknown") && syntax == "gnu" {
   143  			suffix := ""
   144  			if allowedMismatch(text, &inst, dec) {
   145  				totalSkips++
   146  				if !*mismatch {
   147  					return
   148  				}
   149  				suffix += " (allowed mismatch)"
   150  			}
   151  			totalErrors++
   152  			cmp := fmt.Sprintf("decode(%x) = %q, %d, want %q, %d%s\n", enc, text, len(enc), dec.text, dec.nenc, suffix)
   153  
   154  			if len(errors) >= cap(errors) {
   155  				j := rand.Intn(totalErrors)
   156  				if j >= cap(errors) {
   157  					return
   158  				}
   159  				errors = append(errors[:j], errors[j+1:]...)
   160  			}
   161  			errors = append(errors, cmp)
   162  		}
   163  	})
   164  
   165  	if *mismatch {
   166  		totalErrors -= totalSkips
   167  	}
   168  
   169  	fmt.Printf("totalTest: %d total skip: %d total error: %d\n", totalTests, totalSkips, totalErrors)
   170  
   171  	// Here are some errors about mismatches(44)
   172  	for _, b := range errors {
   173  		t.Log(b)
   174  	}
   175  
   176  	if totalErrors > 0 {
   177  		t.Fail()
   178  	}
   179  	t.Logf("%d test cases, %d expected mismatches, %d failures; %.0f cases/second", totalTests, totalSkips, totalErrors, float64(totalTests)/time.Since(start).Seconds())
   180  	t.Logf("decoder coverage: %.1f%%;\n", decodeCoverage())
   181  }
   182  
   183  // Start address of text.
   184  const start = 0x8000
   185  
   186  // writeInst writes the generated byte sequences to a new file
   187  // starting at offset start. That file is intended to be the input to
   188  // the external disassembler.
   189  func writeInst(generate func(func([]byte))) (file string, f *os.File, size int, err error) {
   190  	f, err = ioutil.TempFile("", "loong64asm")
   191  	if err != nil {
   192  		return
   193  	}
   194  
   195  	file = f.Name()
   196  
   197  	f.Seek(start, io.SeekStart)
   198  	w := bufio.NewWriter(f)
   199  	defer w.Flush()
   200  	size = 0
   201  	generate(func(x []byte) {
   202  		if debug {
   203  			fmt.Printf("%#x: %x%x\n", start+size, x, zeros[len(x):])
   204  		}
   205  		w.Write(x)
   206  		w.Write(zeros[len(x):])
   207  		size += len(zeros)
   208  	})
   209  	return file, f, size, nil
   210  }
   211  
   212  var zeros = []byte{0, 0, 0, 0}
   213  
   214  // pad pads the code sequence with pops.
   215  func pad(enc []byte) []byte {
   216  	if len(enc) < 4 {
   217  		enc = append(enc[:len(enc):len(enc)], zeros[:4-len(enc)]...)
   218  	}
   219  	return enc
   220  }
   221  
   222  // disasm returns the decoded instruction and text
   223  // for the given source bytes, using the given syntax and mode.
   224  func disasm(syntax string, src []byte) (inst Inst, text string) {
   225  	var err error
   226  	inst, err = Decode(src)
   227  	if err != nil {
   228  		text = "error: " + err.Error()
   229  		return
   230  	}
   231  	text = inst.String()
   232  	switch syntax {
   233  	case "gnu":
   234  		text = GNUSyntax(inst)
   235  	case "plan9": // [sic]
   236  		text = GoSyntax(inst, 0, nil)
   237  	default:
   238  		text = "error: unknown syntax " + syntax
   239  	}
   240  	return
   241  }
   242  
   243  // decodeCoverage returns a floating point number denoting the
   244  // decoder coverage.
   245  func decodeCoverage() float64 {
   246  	n := 0
   247  	for _, t := range decoderCover {
   248  		if t {
   249  			n++
   250  		}
   251  	}
   252  	return 100 * float64(1+n) / float64(1+len(decoderCover))
   253  }
   254  
   255  // Helpers for writing disassembler output parsers.
   256  
   257  // isHex reports whether b is a hexadecimal character (0-9a-fA-F).
   258  func isHex(b byte) bool {
   259  	return ('0' <= b && b <= '9') || ('a' <= b && b <= 'f') || ('A' <= b && b <= 'F')
   260  }
   261  
   262  // parseHex parses the hexadecimal byte dump in hex,
   263  // appending the parsed bytes to raw and returning the updated slice.
   264  // The returned bool reports whether any invalid hex was found.
   265  // Spaces and tabs between bytes are okay but any other non-hex is not.
   266  func parseHex(hex []byte, raw []byte) ([]byte, bool) {
   267  	hex = bytes.TrimSpace(hex)
   268  	for j := 0; j < len(hex); {
   269  		for hex[j] == ' ' || hex[j] == '\t' {
   270  			j++
   271  		}
   272  		if j >= len(hex) {
   273  			break
   274  		}
   275  		if j+2 > len(hex) || !isHex(hex[j]) || !isHex(hex[j+1]) {
   276  			return nil, false
   277  		}
   278  		raw = append(raw, unhex(hex[j])<<4|unhex(hex[j+1]))
   279  		j += 2
   280  	}
   281  	return raw, true
   282  }
   283  
   284  func unhex(b byte) byte {
   285  	if '0' <= b && b <= '9' {
   286  		return b - '0'
   287  	} else if 'A' <= b && b <= 'F' {
   288  		return b - 'A' + 10
   289  	} else if 'a' <= b && b <= 'f' {
   290  		return b - 'a' + 10
   291  	}
   292  	return 0
   293  }
   294  
   295  // index is like bytes.Index(s, []byte(t)) but avoids the allocation.
   296  func index(s []byte, t string) int {
   297  	i := 0
   298  	for {
   299  		j := bytes.IndexByte(s[i:], t[0])
   300  		if j < 0 {
   301  			return -1
   302  		}
   303  		i = i + j
   304  		if i+len(t) > len(s) {
   305  			return -1
   306  		}
   307  		for k := 1; k < len(t); k++ {
   308  			if s[i+k] != t[k] {
   309  				goto nomatch
   310  			}
   311  		}
   312  		return i
   313  	nomatch:
   314  		i++
   315  	}
   316  }
   317  
   318  // fixSpace rewrites runs of spaces, tabs, and newline characters into single spaces in s.
   319  // If s must be rewritten, it is rewritten in place.
   320  func fixSpace(s []byte) []byte {
   321  	s = bytes.TrimSpace(s)
   322  	for i := 0; i < len(s); i++ {
   323  		if s[i] == '\t' || s[i] == '\n' || i > 0 && s[i] == ' ' && s[i-1] == ' ' {
   324  			goto Fix
   325  		}
   326  	}
   327  	return s
   328  
   329  Fix:
   330  	b := s
   331  	w := 0
   332  	for i := 0; i < len(s); i++ {
   333  		c := s[i]
   334  		if c == '\t' || c == '\n' {
   335  			c = ' '
   336  		}
   337  		if c == ' ' && w > 0 && b[w-1] == ' ' {
   338  			continue
   339  		}
   340  		b[w] = c
   341  		w++
   342  	}
   343  	if w > 0 && b[w-1] == ' ' {
   344  		w--
   345  	}
   346  	return b[:w]
   347  }
   348  
   349  // Generators.
   350  //
   351  // The test cases are described as functions that invoke a callback repeatedly,
   352  // with a new input sequence each time. These helpers make writing those
   353  // a little easier.
   354  
   355  // hexCases generates the cases written in hexadecimal in the encoded string.
   356  // Spaces in 'encoded' separate entire test cases, not individual bytes.
   357  func hexCases(t *testing.T, encoded string) func(func([]byte)) {
   358  	return func(try func([]byte)) {
   359  		for _, x := range strings.Fields(encoded) {
   360  			src, err := hex.DecodeString(x)
   361  			if err != nil {
   362  				t.Errorf("parsing %q: %v", x, err)
   363  			}
   364  			try(src)
   365  		}
   366  	}
   367  }
   368  
   369  // testdataCases generates the test cases recorded in testdata/cases.txt.
   370  // It only uses the inputs; it ignores the answers recorded in that file.
   371  func testdataCases(t *testing.T, syntax string) func(func([]byte)) {
   372  	var codes [][]byte
   373  	input := filepath.Join("testdata", syntax+"cases.txt")
   374  	data, err := ioutil.ReadFile(input)
   375  	if err != nil {
   376  		t.Fatal(err)
   377  	}
   378  	for _, line := range strings.Split(string(data), "\n") {
   379  		line = strings.TrimSpace(line)
   380  		if line == "" || strings.HasPrefix(line, "#") {
   381  			continue
   382  		}
   383  		f := strings.Fields(line)[0]
   384  		i := strings.Index(f, "|")
   385  		if i < 0 {
   386  			t.Errorf("parsing %q: missing | separator", f)
   387  			continue
   388  		}
   389  		if i%2 != 0 {
   390  			t.Errorf("parsing %q: misaligned | separator", f)
   391  		}
   392  		code, err := hex.DecodeString(f[:i] + f[i+1:])
   393  		if err != nil {
   394  			t.Errorf("parsing %q: %v", f, err)
   395  			continue
   396  		}
   397  		codes = append(codes, code)
   398  	}
   399  
   400  	return func(try func([]byte)) {
   401  		for _, code := range codes {
   402  			try(code)
   403  		}
   404  	}
   405  }