golang.org/x/arch@v0.17.0/internal/simdgen/xed.go (about)

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"fmt"
     9  	"log"
    10  	"regexp"
    11  	"strconv"
    12  	"strings"
    13  
    14  	"golang.org/x/arch/internal/unify"
    15  	"golang.org/x/arch/x86/xeddata"
    16  	"gopkg.in/yaml.v3"
    17  )
    18  
    19  // TODO: Doc. Returns Values with Def domains.
    20  func loadXED(xedPath string) []*unify.Value {
    21  	// TODO: Obviously a bunch more to do here.
    22  
    23  	db, err := xeddata.NewDatabase(xedPath)
    24  	if err != nil {
    25  		log.Fatalf("open database: %v", err)
    26  	}
    27  
    28  	var defs []*unify.Value
    29  	err = xeddata.WalkInsts(xedPath, func(inst *xeddata.Inst) {
    30  		inst.Pattern = xeddata.ExpandStates(db, inst.Pattern)
    31  
    32  		switch {
    33  		case inst.RealOpcode == "N":
    34  			return // Skip unstable instructions
    35  		case !(strings.HasPrefix(inst.Extension, "SSE") || strings.HasPrefix(inst.Extension, "AVX")):
    36  			// We're only intested in SSE and AVX instuctions.
    37  			return // Skip non-AVX or SSE instructions
    38  		}
    39  
    40  		if *flagDebugXED {
    41  			fmt.Printf("%s:\n%+v\n", inst.Pos, inst)
    42  		}
    43  
    44  		ins, outs := decodeOperands(db, strings.Fields(inst.Operands))
    45  		// TODO: "feature"
    46  		fields := []string{"goarch", "asm", "in", "out"}
    47  		values := []*unify.Value{
    48  			unify.NewValue(unify.NewStringExact("amd64")),
    49  			unify.NewValue(unify.NewStringExact(inst.Opcode())),
    50  			unify.NewValue(ins),
    51  			unify.NewValue(outs),
    52  		}
    53  		pos := unify.Pos{Path: inst.Pos.Path, Line: inst.Pos.Line}
    54  		defs = append(defs, unify.NewValuePos(unify.NewDef(fields, values), pos))
    55  		if *flagDebugXED {
    56  			y, _ := yaml.Marshal(defs[len(defs)-1])
    57  			fmt.Printf("==>\n%s\n", y)
    58  		}
    59  	})
    60  	if err != nil {
    61  		log.Fatalf("walk insts: %v", err)
    62  	}
    63  	return defs
    64  }
    65  
    66  func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tuple) {
    67  	var inVals, outVals []*unify.Value
    68  	for asmPos, o := range operands {
    69  		op, err := xeddata.NewOperand(db, o)
    70  		if err != nil {
    71  			log.Fatalf("parsing operand %q: %v", o, err)
    72  		}
    73  		if *flagDebugXED {
    74  			fmt.Printf("  %+v\n", op)
    75  		}
    76  
    77  		// TODO: We should have a fixed set of fields once this gets more cleaned up.
    78  		var fields []string
    79  		var values []*unify.Value
    80  		add := func(f string, v *unify.Value) {
    81  			fields = append(fields, f)
    82  			values = append(values, v)
    83  		}
    84  
    85  		add("asmPos", unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos))))
    86  
    87  		var r, w bool
    88  		switch op.Action {
    89  		case "r":
    90  			r = true
    91  		case "w":
    92  			w = true
    93  		case "rw":
    94  			r, w = true, true
    95  		default:
    96  			continue
    97  		}
    98  
    99  		lhs := op.NameLHS()
   100  		if strings.HasPrefix(lhs, "MEM") {
   101  			add("mem", unify.NewValue(unify.NewStringExact("true")))
   102  			add("w", unify.NewValue(unify.NewStringExact("TODO")))
   103  			add("base", unify.NewValue(unify.NewStringExact("TODO")))
   104  		} else if strings.HasPrefix(lhs, "REG") {
   105  			if op.Width == "mskw" {
   106  				add("mask", unify.NewValue(unify.NewStringExact("true")))
   107  				add("w", unify.NewValue(unify.NewStringExact("TODO")))
   108  				add("base", unify.NewValue(unify.NewStringExact("TODO")))
   109  			} else {
   110  				width, ok := decodeReg(op)
   111  				if !ok {
   112  					return
   113  				}
   114  				baseRe, bits, ok := decodeBits(op)
   115  				if !ok {
   116  					return
   117  				}
   118  				baseDomain, err := unify.NewStringRegex(baseRe)
   119  				if err != nil {
   120  					panic("parsing baseRe: " + err.Error())
   121  				}
   122  				add("bits", unify.NewValue(unify.NewStringExact(fmt.Sprint(bits))))
   123  				add("w", unify.NewValue(unify.NewStringExact(fmt.Sprint(width))))
   124  				add("base", unify.NewValue(baseDomain))
   125  			}
   126  		} else {
   127  			// TODO: Immediates
   128  			add("UNKNOWN", unify.NewValue(unify.NewStringExact(o)))
   129  		}
   130  		// dq => 128 bits (XMM)
   131  		// qq => 256 bits (YMM)
   132  		// mskw => K
   133  		// z[iuf?](8|16|32|...) => 512 bits (ZMM)
   134  		//
   135  		// Are these always XMM/YMM/ZMM or can other irregular things
   136  		// with large widths use these same codes?
   137  		//
   138  		// The only zi* is zi32. I don't understand the difference between
   139  		// zi32 and zu32 or why there are a bunch of zu* but only one zi.
   140  		//
   141  		// The xtype tells you the element type. i8, i16, i32, i64, etc.
   142  		//
   143  		// Things like AVX2 VPAND have an xtype of u256.
   144  		// I think we have to map that to all widths.
   145  		// There's no u512 (presumably those are all masked, so elem width matters).
   146  		// These are all Category: LOGICAL. Maybe we use that info?
   147  
   148  		if r {
   149  			inVal := unify.NewValue(unify.NewDef(fields, values))
   150  			inVals = append(inVals, inVal)
   151  		}
   152  		if w {
   153  			outVal := unify.NewValue(unify.NewDef(fields, values))
   154  			outVals = append(outVals, outVal)
   155  		}
   156  	}
   157  
   158  	return unify.NewTuple(inVals...), unify.NewTuple(outVals...)
   159  }
   160  
   161  func decodeReg(op *xeddata.Operand) (w int, ok bool) {
   162  	if !strings.HasPrefix(op.NameLHS(), "REG") {
   163  		return 0, false
   164  	}
   165  	// TODO: We shouldn't be relying on the macro naming conventions. We should
   166  	// use all-dec-patterns.txt, but xeddata doesn't support that table right now.
   167  	rhs := op.NameRHS()
   168  	if !strings.HasSuffix(rhs, "()") {
   169  		return 0, false
   170  	}
   171  	switch {
   172  	case strings.HasPrefix(rhs, "XMM_"):
   173  		return 128, true
   174  	case strings.HasPrefix(rhs, "YMM_"):
   175  		return 256, true
   176  	case strings.HasPrefix(rhs, "ZMM_"):
   177  		return 512, true
   178  	}
   179  	return 0, false
   180  }
   181  
   182  var xtypeRe = regexp.MustCompile(`^([iuf])([0-9]+)$`)
   183  
   184  func decodeBits(op *xeddata.Operand) (baseRe string, bits int, ok bool) {
   185  	// Handle some weird ones.
   186  	switch op.Xtype {
   187  	// 8-bit float formats as defined by Open Compute Project "OCP 8-bit
   188  	// Floating Point Specification (OFP8)".
   189  	case "bf8", // E5M2 float
   190  		"hf8": // E4M3 float
   191  		return "", 0, false // TODO
   192  	case "bf16": // bfloat16 float
   193  		return "", 0, false // TODO
   194  	case "2f16":
   195  		// Complex consisting of 2 float16s. Doesn't exist in Go, but we can say
   196  		// what it would be.
   197  		return "complex", 32, true
   198  	case "2i8", "2I8":
   199  		// These just use the lower INT8 in each 16 bit field.
   200  		// As far as I can tell, "2I8" is a typo.
   201  		return "int", 8, true
   202  	}
   203  
   204  	// The rest follow a simple pattern.
   205  	m := xtypeRe.FindStringSubmatch(op.Xtype)
   206  	if m == nil {
   207  		// TODO: Report unrecognized xtype
   208  		return "", 0, false
   209  	}
   210  	bits, _ = strconv.Atoi(m[2])
   211  	switch m[1] {
   212  	case "i", "u":
   213  		// XED is rather inconsistent about what's signed, unsigned, or doesn't
   214  		// matter, so merge them together and let the Go definitions narrow as
   215  		// appropriate. Maybe there's a better way to do this.
   216  		baseRe = "int|uint"
   217  	case "f":
   218  		baseRe = "float"
   219  	}
   220  	return baseRe, bits, true
   221  }