golang.org/x/arch@v0.17.0/internal/simdgen/xed.go (about) 1 // Copyright 2025 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package main 6 7 import ( 8 "fmt" 9 "log" 10 "regexp" 11 "strconv" 12 "strings" 13 14 "golang.org/x/arch/internal/unify" 15 "golang.org/x/arch/x86/xeddata" 16 "gopkg.in/yaml.v3" 17 ) 18 19 // TODO: Doc. Returns Values with Def domains. 20 func loadXED(xedPath string) []*unify.Value { 21 // TODO: Obviously a bunch more to do here. 22 23 db, err := xeddata.NewDatabase(xedPath) 24 if err != nil { 25 log.Fatalf("open database: %v", err) 26 } 27 28 var defs []*unify.Value 29 err = xeddata.WalkInsts(xedPath, func(inst *xeddata.Inst) { 30 inst.Pattern = xeddata.ExpandStates(db, inst.Pattern) 31 32 switch { 33 case inst.RealOpcode == "N": 34 return // Skip unstable instructions 35 case !(strings.HasPrefix(inst.Extension, "SSE") || strings.HasPrefix(inst.Extension, "AVX")): 36 // We're only intested in SSE and AVX instuctions. 37 return // Skip non-AVX or SSE instructions 38 } 39 40 if *flagDebugXED { 41 fmt.Printf("%s:\n%+v\n", inst.Pos, inst) 42 } 43 44 ins, outs := decodeOperands(db, strings.Fields(inst.Operands)) 45 // TODO: "feature" 46 fields := []string{"goarch", "asm", "in", "out"} 47 values := []*unify.Value{ 48 unify.NewValue(unify.NewStringExact("amd64")), 49 unify.NewValue(unify.NewStringExact(inst.Opcode())), 50 unify.NewValue(ins), 51 unify.NewValue(outs), 52 } 53 pos := unify.Pos{Path: inst.Pos.Path, Line: inst.Pos.Line} 54 defs = append(defs, unify.NewValuePos(unify.NewDef(fields, values), pos)) 55 if *flagDebugXED { 56 y, _ := yaml.Marshal(defs[len(defs)-1]) 57 fmt.Printf("==>\n%s\n", y) 58 } 59 }) 60 if err != nil { 61 log.Fatalf("walk insts: %v", err) 62 } 63 return defs 64 } 65 66 func decodeOperands(db *xeddata.Database, operands []string) (ins, outs unify.Tuple) { 67 var inVals, outVals []*unify.Value 68 for asmPos, o := range operands { 69 op, err := xeddata.NewOperand(db, o) 70 if err != nil { 71 log.Fatalf("parsing operand %q: %v", o, err) 72 } 73 if *flagDebugXED { 74 fmt.Printf(" %+v\n", op) 75 } 76 77 // TODO: We should have a fixed set of fields once this gets more cleaned up. 78 var fields []string 79 var values []*unify.Value 80 add := func(f string, v *unify.Value) { 81 fields = append(fields, f) 82 values = append(values, v) 83 } 84 85 add("asmPos", unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos)))) 86 87 var r, w bool 88 switch op.Action { 89 case "r": 90 r = true 91 case "w": 92 w = true 93 case "rw": 94 r, w = true, true 95 default: 96 continue 97 } 98 99 lhs := op.NameLHS() 100 if strings.HasPrefix(lhs, "MEM") { 101 add("mem", unify.NewValue(unify.NewStringExact("true"))) 102 add("w", unify.NewValue(unify.NewStringExact("TODO"))) 103 add("base", unify.NewValue(unify.NewStringExact("TODO"))) 104 } else if strings.HasPrefix(lhs, "REG") { 105 if op.Width == "mskw" { 106 add("mask", unify.NewValue(unify.NewStringExact("true"))) 107 add("w", unify.NewValue(unify.NewStringExact("TODO"))) 108 add("base", unify.NewValue(unify.NewStringExact("TODO"))) 109 } else { 110 width, ok := decodeReg(op) 111 if !ok { 112 return 113 } 114 baseRe, bits, ok := decodeBits(op) 115 if !ok { 116 return 117 } 118 baseDomain, err := unify.NewStringRegex(baseRe) 119 if err != nil { 120 panic("parsing baseRe: " + err.Error()) 121 } 122 add("bits", unify.NewValue(unify.NewStringExact(fmt.Sprint(bits)))) 123 add("w", unify.NewValue(unify.NewStringExact(fmt.Sprint(width)))) 124 add("base", unify.NewValue(baseDomain)) 125 } 126 } else { 127 // TODO: Immediates 128 add("UNKNOWN", unify.NewValue(unify.NewStringExact(o))) 129 } 130 // dq => 128 bits (XMM) 131 // qq => 256 bits (YMM) 132 // mskw => K 133 // z[iuf?](8|16|32|...) => 512 bits (ZMM) 134 // 135 // Are these always XMM/YMM/ZMM or can other irregular things 136 // with large widths use these same codes? 137 // 138 // The only zi* is zi32. I don't understand the difference between 139 // zi32 and zu32 or why there are a bunch of zu* but only one zi. 140 // 141 // The xtype tells you the element type. i8, i16, i32, i64, etc. 142 // 143 // Things like AVX2 VPAND have an xtype of u256. 144 // I think we have to map that to all widths. 145 // There's no u512 (presumably those are all masked, so elem width matters). 146 // These are all Category: LOGICAL. Maybe we use that info? 147 148 if r { 149 inVal := unify.NewValue(unify.NewDef(fields, values)) 150 inVals = append(inVals, inVal) 151 } 152 if w { 153 outVal := unify.NewValue(unify.NewDef(fields, values)) 154 outVals = append(outVals, outVal) 155 } 156 } 157 158 return unify.NewTuple(inVals...), unify.NewTuple(outVals...) 159 } 160 161 func decodeReg(op *xeddata.Operand) (w int, ok bool) { 162 if !strings.HasPrefix(op.NameLHS(), "REG") { 163 return 0, false 164 } 165 // TODO: We shouldn't be relying on the macro naming conventions. We should 166 // use all-dec-patterns.txt, but xeddata doesn't support that table right now. 167 rhs := op.NameRHS() 168 if !strings.HasSuffix(rhs, "()") { 169 return 0, false 170 } 171 switch { 172 case strings.HasPrefix(rhs, "XMM_"): 173 return 128, true 174 case strings.HasPrefix(rhs, "YMM_"): 175 return 256, true 176 case strings.HasPrefix(rhs, "ZMM_"): 177 return 512, true 178 } 179 return 0, false 180 } 181 182 var xtypeRe = regexp.MustCompile(`^([iuf])([0-9]+)$`) 183 184 func decodeBits(op *xeddata.Operand) (baseRe string, bits int, ok bool) { 185 // Handle some weird ones. 186 switch op.Xtype { 187 // 8-bit float formats as defined by Open Compute Project "OCP 8-bit 188 // Floating Point Specification (OFP8)". 189 case "bf8", // E5M2 float 190 "hf8": // E4M3 float 191 return "", 0, false // TODO 192 case "bf16": // bfloat16 float 193 return "", 0, false // TODO 194 case "2f16": 195 // Complex consisting of 2 float16s. Doesn't exist in Go, but we can say 196 // what it would be. 197 return "complex", 32, true 198 case "2i8", "2I8": 199 // These just use the lower INT8 in each 16 bit field. 200 // As far as I can tell, "2I8" is a typo. 201 return "int", 8, true 202 } 203 204 // The rest follow a simple pattern. 205 m := xtypeRe.FindStringSubmatch(op.Xtype) 206 if m == nil { 207 // TODO: Report unrecognized xtype 208 return "", 0, false 209 } 210 bits, _ = strconv.Atoi(m[2]) 211 switch m[1] { 212 case "i", "u": 213 // XED is rather inconsistent about what's signed, unsigned, or doesn't 214 // matter, so merge them together and let the Go definitions narrow as 215 // appropriate. Maybe there's a better way to do this. 216 baseRe = "int|uint" 217 case "f": 218 baseRe = "float" 219 } 220 return baseRe, bits, true 221 }