golang.org/x/arch@v0.17.0/loong64/loong64spec/spec.go (about) 1 // Copyright 2024 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // loong64spec reads the "LoongArch-Vol1-EN.pdf" [1] to collect instruction 6 // encoding details and output to tables.go. 7 // 8 // usage: go run spec.go LoongArch-Vol1-EN.pdf 9 // 10 // [1]: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.pdf 11 12 package main 13 14 import ( 15 "bytes" 16 "fmt" 17 "log" 18 "math" 19 "os" 20 "regexp" 21 "sort" 22 "strconv" 23 "strings" 24 25 "rsc.io/pdf" 26 ) 27 28 func mergeMap(m1 map[string]string, m2 map[string]string) { 29 for k := range m2 { 30 m1[k] = m2[k] 31 } 32 } 33 34 func main() { 35 log.SetFlags(0) 36 log.SetPrefix("loong64spec: ") 37 38 if len(os.Args) != 2 { 39 fmt.Fprintf(os.Stderr, "usage: loong64spec LoongArch-Vol1-EN.pdf\n") 40 os.Exit(2) 41 } 42 f, err := pdf.Open(os.Args[1]) 43 if err != nil { 44 log.Fatal(err) 45 } 46 var prologue bytes.Buffer 47 prologue.Write([]byte("// Code generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT.\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage loong64asm\n\n")) 48 49 var op_f bytes.Buffer 50 op_f.Write([]byte("const (\n\t_ Op = iota\n")) 51 52 var opstr_f bytes.Buffer 53 opstr_f.Write([]byte("var opstr = [...]string{\n")) 54 55 var instFormats_f bytes.Buffer 56 instFormats_f.Write([]byte("var instFormats = [...]instFormat{\n")) 57 58 // Scan document looking for instructions. 59 n := f.NumPage() 60 var ops []string 61 opstrs := map[string]string{} 62 instFormatComments := map[string]string{} 63 instFormats := map[string]string{} 64 var fp int 65 for pageNum := 1; pageNum <= n; pageNum++ { 66 p := f.Page(pageNum) 67 if fp == 0 { 68 if !isFirstPage(p) { 69 continue 70 } 71 fp = pageNum 72 } 73 cPageOps, cPageOpstrs, cPageInstFormatComments, cPageInstFormats := parsePage(pageNum, p, fp == pageNum) 74 ops = append(ops, cPageOps...) 75 mergeMap(opstrs, cPageOpstrs) 76 mergeMap(instFormatComments, cPageInstFormatComments) 77 mergeMap(instFormats, cPageInstFormats) 78 } 79 80 sort.Strings(ops) 81 82 for _, op := range ops { 83 // 1. write op 84 op_f.Write([]byte(fmt.Sprintf("\t%s\n", op))) 85 // 2. write opstr 86 opstr_f.Write([]byte(fmt.Sprintf("\t%s\n", opstrs[op]))) 87 // 3. write instFormat 88 instFormats_f.Write([]byte(fmt.Sprintf("\t%s\n\t%s\n", instFormatComments[op], instFormats[op]))) 89 } 90 91 op_f.Write([]byte(")\n\n")) 92 opstr_f.Write([]byte("}\n\n")) 93 instFormats_f.Write([]byte("}\n")) 94 95 fileTables, err := os.Create("tables.go") 96 defer fileTables.Close() 97 98 fileTables.Write(prologue.Bytes()) 99 fileTables.Write(op_f.Bytes()) 100 fileTables.Write(opstr_f.Bytes()) 101 fileTables.Write(instFormats_f.Bytes()) 102 103 fileTables.Close() 104 } 105 106 func isFirstPage(page pdf.Page) bool { 107 content := page.Content() 108 appendixb := "AppendixB" 109 ct := "" 110 for _, t := range content.Text { 111 ct += t.S 112 if ct == "AppendixB" { 113 return true 114 } 115 if strings.HasPrefix(appendixb, ct) { 116 continue 117 } else { 118 return false 119 } 120 } 121 return false 122 } 123 124 func getArg(name string) (length int, argName string) { 125 switch { 126 case strings.Contains("arg_fd", name): 127 return 5, "arg_fd" 128 case strings.Contains("arg_fj", name): 129 return 5, "arg_fj" 130 case strings.Contains("arg_fk", name): 131 return 5, "arg_fk" 132 case strings.Contains("arg_fa", name): 133 return 5, "arg_fa" 134 case strings.Contains("arg_rd", name): 135 return 5, "arg_rd" 136 case strings.Contains("arg_rj", name) || name == "rj!=0,1": 137 return 5, "arg_rj" 138 case strings.Contains("arg_rk", name): 139 return 5, "arg_rk" 140 case name == "csr": 141 return 14, "arg_csr_23_10" 142 case strings.Contains("arg_cd", name): 143 return 5, "arg_cd" 144 case strings.Contains("arg_cj", name): 145 return 5, "arg_cj" 146 case strings.Contains("arg_ca", name): 147 return 5, "arg_ca" 148 case strings.Contains(name, "sa"): 149 length, _ := strconv.Atoi(strings.Split(name, "sa")[1]) 150 if length == 2 { 151 argName = "arg_sa2_16_15" 152 } else { 153 argName = "arg_sa3_17_15" 154 } 155 return length, argName 156 case strings.Contains("arg_seq_17_10", name): 157 return 8, "arg_seq_17_10" 158 case strings.Contains("arg_op_4_0", name): 159 return 5, "arg_op_4_0" 160 case strings.Contains(name, "ui"): 161 length, _ := strconv.Atoi(strings.Split(name, "ui")[1]) 162 if length == 5 { 163 argName = "arg_ui5_14_10" 164 } else if length == 6 { 165 argName = "arg_ui6_15_10" 166 } else { 167 argName = "arg_ui12_21_10" 168 } 169 return length, argName 170 case strings.Contains("arg_lsbw", name): 171 return 5, "arg_lsbw" 172 case strings.Contains("arg_msbw", name): 173 return 5, "arg_msbw" 174 case strings.Contains("arg_lsbd", name): 175 return 6, "arg_lsbd" 176 case strings.Contains("arg_msbd", name): 177 return 6, "arg_msbd" 178 case strings.Contains(name, "si"): 179 length, _ := strconv.Atoi(strings.Split(name, "si")[1]) 180 if length == 12 { 181 argName = "arg_si12_21_10" 182 } else if length == 14 { 183 argName = "arg_si14_23_10" 184 } else if length == 16 { 185 argName = "arg_si16_25_10" 186 } else { 187 argName = "arg_si20_24_5" 188 } 189 return length, argName 190 case strings.Contains(name, "offs"): 191 splitName := strings.Split(name, ":") 192 left, _ := strconv.Atoi(strings.Split(splitName[0], "[")[1]) 193 right, _ := strconv.Atoi(strings.Split(splitName[1], "]")[0]) 194 return left - right + 1, "offs" 195 default: 196 return 0, "" 197 } 198 } 199 200 func binstrToHex(str string) string { 201 rst := 0 202 hex := "0x" 203 charArray := []byte(str) 204 for i := 0; i < 32; { 205 rst = 1*(int(charArray[i+3])-48) + 2*(int(charArray[i+2])-48) + 4*(int(charArray[i+1])-48) + 8*(int(charArray[i])-48) 206 switch rst { 207 case 10: 208 hex = hex + "a" 209 case 11: 210 hex = hex + "b" 211 case 12: 212 hex = hex + "c" 213 case 13: 214 hex = hex + "d" 215 case 14: 216 hex = hex + "e" 217 case 15: 218 hex = hex + "f" 219 default: 220 hex += strconv.Itoa(rst) 221 } 222 223 i = i + 4 224 } 225 return hex 226 } 227 228 /* 229 Here we deal with the instruction FCMP.cond.S/D, which has the following format: 230 231 | 31 - 20 | 19 - 15 | 14 - 10 | 9 - 5 | 4 | 3 | 2 - 0 | 232 |---------|---------|---------|-------|---|---|-------| 233 | op | cond | fk | fj | 0 | 0 | cd | 234 235 The `cond` field has these possible values: 236 237 "CAF": "00", 238 "CUN": "08", 239 "CEQ": "04", 240 "CUEQ": "0c", 241 "CLT": "02", 242 "CULT": "0a", 243 "CLE": "06", 244 "CULE": "0e", 245 "CNE": "10", 246 "COR": "14", 247 "CUNE": "18", 248 "SAF": "01", 249 "SUN": "09", 250 "SEQ": "05", 251 "SUEQ": "0d", 252 "SLT": "03", 253 "SULT": "0b", 254 "SLE": "07", 255 "SULE": "0f", 256 "SNE": "11", 257 "SOR": "15", 258 "SUNE": "19", 259 260 These values are the hexadecimal numbers of bits 19 to 15, the same as 261 described in the instruction set manual. 262 263 The following code defines a map, the values in it represent the hexadecimal 264 encoding of the cond field in the entire instruction. In this case, the upper 265 4 bits and the lowest 1 bit are encoded separately, so the encoding is 266 different from the encoding described above. 267 */ 268 func dealWithFcmp(ds string) (fcmpConditions map[string]map[string]string) { 269 conds := map[string]string{ 270 "CAF": "00", 271 "CUN": "40", 272 "CEQ": "20", 273 "CUEQ": "60", 274 "CLT": "10", 275 "CULT": "50", 276 "CLE": "30", 277 "CULE": "70", 278 "CNE": "80", 279 "COR": "a0", 280 "CUNE": "c0", 281 "SAF": "08", 282 "SUN": "48", 283 "SEQ": "28", 284 "SUEQ": "68", 285 "SLT": "18", 286 "SULT": "58", 287 "SLE": "38", 288 "SULE": "78", 289 "SNE": "88", 290 "SOR": "a8", 291 "SUNE": "c8", 292 } 293 fcmpConditions = make(map[string]map[string]string) 294 for k, v := range conds { 295 op := fmt.Sprintf("FCMP_%s_%s", k, ds) 296 opstr := fmt.Sprintf("FCMP_%s_%s:\t\"FCMP.%s.%s\",", k, ds, k, ds) 297 instFormatComment := fmt.Sprintf("// FCMP.%s.%s cd, fj, fk", k, ds) 298 var instFormat string 299 if ds == "D" { 300 instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c2%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds) 301 } else { 302 instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c1%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds) 303 } 304 305 fcmpConditions[op] = make(map[string]string) 306 fcmpConditions[op]["op"] = op 307 fcmpConditions[op]["opstr"] = opstr 308 fcmpConditions[op]["instFormatComment"] = instFormatComment 309 fcmpConditions[op]["instFormat"] = instFormat 310 } 311 return 312 } 313 314 func findWords(chars []pdf.Text) (words []pdf.Text) { 315 for i := 0; i < len(chars); { 316 xRange := []float64{chars[i].X, chars[i].X} 317 j := i + 1 318 319 // Find all chars on one line. 320 for j < len(chars) && chars[j].Y == chars[i].Y { 321 xRange[1] = chars[j].X 322 j++ 323 } 324 325 // we need to note that the word may change line(Y) but belong to one cell. So, after loop over all continued 326 // chars whose Y are same, check if the next char's X belong to the range of xRange, if true, means it should 327 // be contact to current word, because the next word's X should bigger than current one. 328 for j < len(chars) && chars[j].X >= xRange[0] && chars[j].X <= xRange[1] { 329 j++ 330 } 331 332 var end float64 333 // Split line into words (really, phrases). 334 for k := i; k < j; { 335 ck := &chars[k] 336 s := ck.S 337 end = ck.X + ck.W 338 charSpace := ck.FontSize / 6 339 wordSpace := ck.FontSize * 2 / 3 340 l := k + 1 341 for l < j { 342 // Grow word. 343 cl := &chars[l] 344 345 if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace { 346 s += cl.S 347 end = cl.X + cl.W 348 l++ 349 continue 350 } 351 // Add space to phrase before next word. 352 if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace { 353 s += " " + cl.S 354 end = cl.X + cl.W 355 l++ 356 continue 357 } 358 break 359 } 360 f := ck.Font 361 words = append(words, pdf.Text{ 362 Font: f, 363 FontSize: ck.FontSize, 364 X: ck.X, 365 Y: ck.Y, 366 W: end - ck.X, 367 S: s, 368 }) 369 k = l 370 } 371 i = j 372 } 373 374 return words 375 } 376 377 func parsePage(num int, p pdf.Page, isFP bool) (ops []string, opstrs map[string]string, instFormatComments map[string]string, instFormats map[string]string) { 378 opstrs = make(map[string]string) 379 instFormatComments = make(map[string]string) 380 instFormats = make(map[string]string) 381 382 content := p.Content() 383 384 var text []pdf.Text 385 for _, t := range content.Text { 386 text = append(text, t) 387 } 388 389 // table name(70), table header(64), page num(3) 390 if isFP { 391 text = text[134 : len(text)-3] 392 } else { 393 text = text[64 : len(text)-3] 394 } 395 396 text = findWords(text) 397 398 for i := 0; i < len(text); { 399 var fcmpConditions map[string]map[string]string 400 if strings.HasPrefix(text[i].S, "FCMP") { 401 fcmpConditions = dealWithFcmp(strings.Split(text[i].S, ".")[2]) 402 403 for fc, inst := range fcmpConditions { 404 ops = append(ops, inst["op"]) 405 opstrs[fc] = inst["opstr"] 406 instFormatComments[fc] = inst["instFormatComment"] 407 instFormats[fc] = inst["instFormat"] 408 } 409 t := i + 1 410 for ; text[t].Y == text[i].Y; t++ { 411 continue 412 } 413 i = t 414 continue 415 } 416 417 op := strings.Replace(text[i].S, ".", "_", -1) 418 opstr := fmt.Sprintf("%s:\t\"%s\",", op, text[i].S) 419 instFormatComment := "" 420 binValue := "" 421 binMask := "" 422 instArgs := "" 423 offs := false 424 var offArgs []string 425 426 j := i + 1 427 for ; j < len(text) && text[j].Y == text[i].Y; j++ { 428 429 // Some instruction has no arguments, so the next word(text[j].S) is not the arguments string but 0/1 bit, it shouldn't be skipped. 430 if res, _ := regexp.MatchString("^\\d+$", text[j].S); j == i+1 && res == false { 431 instFormatComment = fmt.Sprintf("// %s %s", text[i].S, strings.Replace(text[j].S, ",", ", ", -1)) 432 continue 433 } 434 if text[j].S == "0" || text[j].S == "1" { 435 binValue += text[j].S 436 binMask += "1" 437 } else { 438 argLen, argName := getArg(text[j].S) 439 440 // Get argument's length failed, compute it by other arguments. 441 if argLen == 0 { 442 left := 31 - len(binValue) 443 right := 0 444 l := j + 1 445 if l < len(text) && text[l].Y == text[j].Y { 446 for ; text[l].Y == text[j].Y; l++ { 447 if text[l].S == "0" || text[l].S == "1" { 448 right += 1 449 } else { 450 tArgLen, _ := getArg(text[l].S) 451 if tArgLen == 0 { 452 fmt.Fprintf(os.Stderr, "there are more than two args whose length is unknown.\n") 453 } 454 right += tArgLen 455 } 456 } 457 } 458 argLen = left - right + 1 459 argName = "arg_" + text[j].S + "_" + strconv.FormatInt(int64(left), 10) + "_" + strconv.FormatInt(int64(right), 10) 460 } 461 462 for k := 0; k < argLen; k++ { 463 binValue += "0" 464 binMask += "0" 465 } 466 467 if argName != "offs" { 468 if instArgs != "" { 469 instArgs = ", " + instArgs 470 } 471 instArgs = argName + instArgs 472 } else { 473 offs = true 474 offArgs = append(offArgs, text[j].S) 475 } 476 } 477 } 478 479 // The real offset is a combination of two offsets in the binary code of the instruction, for example: BEQZ 480 if offs && offArgs != nil { 481 var left int 482 var right int 483 if len(offArgs) == 1 { 484 left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[0], "[")[1]) 485 right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0]) 486 } else if len(offArgs) == 2 { 487 left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[1], ":")[0], "[")[1]) 488 right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0]) 489 } 490 491 if instArgs == "" { 492 instArgs = fmt.Sprintf("arg_offset_%d_%d", left, right) 493 } else { 494 instArgs += fmt.Sprintf(", arg_offset_%d_%d", left, right) 495 } 496 } 497 498 ops = append(ops, op) 499 opstrs[op] = opstr 500 if instFormatComment == "" { 501 instFormatComment = "// " + text[i].S 502 } else if strings.HasPrefix(op, "AM") { 503 instFormatComment = fmt.Sprintf("// %s rd, rk, rj", text[i].S) 504 } 505 instFormatComments[op] = instFormatComment 506 // The parameter order of some instructions is inconsistent in encoding and syntax, such as BSTRINS.* 507 if instArgs != "" { 508 args := strings.Split(instFormatComment, " ")[2:] 509 tInstArgs := strings.Split(instArgs, ", ") 510 newOrderedInstArgs := []string{} 511 for _, a := range args { 512 a = strings.Split(a, ",")[0] 513 for _, aa := range tInstArgs { 514 if strings.Contains(aa, a) { 515 newOrderedInstArgs = append(newOrderedInstArgs, aa) 516 break 517 } else if a == "rd" && aa == "arg_fd" { 518 newOrderedInstArgs = append(newOrderedInstArgs, "arg_rd") 519 break 520 } 521 } 522 } 523 instArgs = strings.Join(newOrderedInstArgs, ", ") 524 } 525 if strings.HasPrefix(op, "AM") { 526 instArgs = "arg_rd, arg_rk, arg_rj" 527 } 528 instFormat := fmt.Sprintf("{mask: %s, value: %s, op: %s, args: instArgs{%s}},", binstrToHex(binMask), binstrToHex(binValue), op, instArgs) 529 instFormats[op] = instFormat 530 531 i = j // next instruction 532 } 533 534 return 535 }