golang.org/x/arch@v0.17.0/loong64/loong64spec/spec.go (about)

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // loong64spec reads the "LoongArch-Vol1-EN.pdf" [1] to collect instruction
     6  // encoding details and output to tables.go.
     7  //
     8  // usage: go run spec.go LoongArch-Vol1-EN.pdf
     9  //
    10  // [1]: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.pdf
    11  
    12  package main
    13  
    14  import (
    15  	"bytes"
    16  	"fmt"
    17  	"log"
    18  	"math"
    19  	"os"
    20  	"regexp"
    21  	"sort"
    22  	"strconv"
    23  	"strings"
    24  
    25  	"rsc.io/pdf"
    26  )
    27  
    28  func mergeMap(m1 map[string]string, m2 map[string]string) {
    29  	for k := range m2 {
    30  		m1[k] = m2[k]
    31  	}
    32  }
    33  
    34  func main() {
    35  	log.SetFlags(0)
    36  	log.SetPrefix("loong64spec: ")
    37  
    38  	if len(os.Args) != 2 {
    39  		fmt.Fprintf(os.Stderr, "usage: loong64spec LoongArch-Vol1-EN.pdf\n")
    40  		os.Exit(2)
    41  	}
    42  	f, err := pdf.Open(os.Args[1])
    43  	if err != nil {
    44  		log.Fatal(err)
    45  	}
    46  	var prologue bytes.Buffer
    47  	prologue.Write([]byte("// Code generated by loong64spec LoongArch-Vol1-EN.pdf, DO NOT EDIT.\n\n// Copyright 2024 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage loong64asm\n\n"))
    48  
    49  	var op_f bytes.Buffer
    50  	op_f.Write([]byte("const (\n\t_ Op = iota\n"))
    51  
    52  	var opstr_f bytes.Buffer
    53  	opstr_f.Write([]byte("var opstr = [...]string{\n"))
    54  
    55  	var instFormats_f bytes.Buffer
    56  	instFormats_f.Write([]byte("var instFormats = [...]instFormat{\n"))
    57  
    58  	// Scan document looking for instructions.
    59  	n := f.NumPage()
    60  	var ops []string
    61  	opstrs := map[string]string{}
    62  	instFormatComments := map[string]string{}
    63  	instFormats := map[string]string{}
    64  	var fp int
    65  	for pageNum := 1; pageNum <= n; pageNum++ {
    66  		p := f.Page(pageNum)
    67  		if fp == 0 {
    68  			if !isFirstPage(p) {
    69  				continue
    70  			}
    71  			fp = pageNum
    72  		}
    73  		cPageOps, cPageOpstrs, cPageInstFormatComments, cPageInstFormats := parsePage(pageNum, p, fp == pageNum)
    74  		ops = append(ops, cPageOps...)
    75  		mergeMap(opstrs, cPageOpstrs)
    76  		mergeMap(instFormatComments, cPageInstFormatComments)
    77  		mergeMap(instFormats, cPageInstFormats)
    78  	}
    79  
    80  	sort.Strings(ops)
    81  
    82  	for _, op := range ops {
    83  		// 1. write op
    84  		op_f.Write([]byte(fmt.Sprintf("\t%s\n", op)))
    85  		// 2. write opstr
    86  		opstr_f.Write([]byte(fmt.Sprintf("\t%s\n", opstrs[op])))
    87  		// 3. write instFormat
    88  		instFormats_f.Write([]byte(fmt.Sprintf("\t%s\n\t%s\n", instFormatComments[op], instFormats[op])))
    89  	}
    90  
    91  	op_f.Write([]byte(")\n\n"))
    92  	opstr_f.Write([]byte("}\n\n"))
    93  	instFormats_f.Write([]byte("}\n"))
    94  
    95  	fileTables, err := os.Create("tables.go")
    96  	defer fileTables.Close()
    97  
    98  	fileTables.Write(prologue.Bytes())
    99  	fileTables.Write(op_f.Bytes())
   100  	fileTables.Write(opstr_f.Bytes())
   101  	fileTables.Write(instFormats_f.Bytes())
   102  
   103  	fileTables.Close()
   104  }
   105  
   106  func isFirstPage(page pdf.Page) bool {
   107  	content := page.Content()
   108  	appendixb := "AppendixB"
   109  	ct := ""
   110  	for _, t := range content.Text {
   111  		ct += t.S
   112  		if ct == "AppendixB" {
   113  			return true
   114  		}
   115  		if strings.HasPrefix(appendixb, ct) {
   116  			continue
   117  		} else {
   118  			return false
   119  		}
   120  	}
   121  	return false
   122  }
   123  
   124  func getArg(name string) (length int, argName string) {
   125  	switch {
   126  	case strings.Contains("arg_fd", name):
   127  		return 5, "arg_fd"
   128  	case strings.Contains("arg_fj", name):
   129  		return 5, "arg_fj"
   130  	case strings.Contains("arg_fk", name):
   131  		return 5, "arg_fk"
   132  	case strings.Contains("arg_fa", name):
   133  		return 5, "arg_fa"
   134  	case strings.Contains("arg_rd", name):
   135  		return 5, "arg_rd"
   136  	case strings.Contains("arg_rj", name) || name == "rj!=0,1":
   137  		return 5, "arg_rj"
   138  	case strings.Contains("arg_rk", name):
   139  		return 5, "arg_rk"
   140  	case name == "csr":
   141  		return 14, "arg_csr_23_10"
   142  	case strings.Contains("arg_cd", name):
   143  		return 5, "arg_cd"
   144  	case strings.Contains("arg_cj", name):
   145  		return 5, "arg_cj"
   146  	case strings.Contains("arg_ca", name):
   147  		return 5, "arg_ca"
   148  	case strings.Contains(name, "sa"):
   149  		length, _ := strconv.Atoi(strings.Split(name, "sa")[1])
   150  		if length == 2 {
   151  			argName = "arg_sa2_16_15"
   152  		} else {
   153  			argName = "arg_sa3_17_15"
   154  		}
   155  		return length, argName
   156  	case strings.Contains("arg_seq_17_10", name):
   157  		return 8, "arg_seq_17_10"
   158  	case strings.Contains("arg_op_4_0", name):
   159  		return 5, "arg_op_4_0"
   160  	case strings.Contains(name, "ui"):
   161  		length, _ := strconv.Atoi(strings.Split(name, "ui")[1])
   162  		if length == 5 {
   163  			argName = "arg_ui5_14_10"
   164  		} else if length == 6 {
   165  			argName = "arg_ui6_15_10"
   166  		} else {
   167  			argName = "arg_ui12_21_10"
   168  		}
   169  		return length, argName
   170  	case strings.Contains("arg_lsbw", name):
   171  		return 5, "arg_lsbw"
   172  	case strings.Contains("arg_msbw", name):
   173  		return 5, "arg_msbw"
   174  	case strings.Contains("arg_lsbd", name):
   175  		return 6, "arg_lsbd"
   176  	case strings.Contains("arg_msbd", name):
   177  		return 6, "arg_msbd"
   178  	case strings.Contains(name, "si"):
   179  		length, _ := strconv.Atoi(strings.Split(name, "si")[1])
   180  		if length == 12 {
   181  			argName = "arg_si12_21_10"
   182  		} else if length == 14 {
   183  			argName = "arg_si14_23_10"
   184  		} else if length == 16 {
   185  			argName = "arg_si16_25_10"
   186  		} else {
   187  			argName = "arg_si20_24_5"
   188  		}
   189  		return length, argName
   190  	case strings.Contains(name, "offs"):
   191  		splitName := strings.Split(name, ":")
   192  		left, _ := strconv.Atoi(strings.Split(splitName[0], "[")[1])
   193  		right, _ := strconv.Atoi(strings.Split(splitName[1], "]")[0])
   194  		return left - right + 1, "offs"
   195  	default:
   196  		return 0, ""
   197  	}
   198  }
   199  
   200  func binstrToHex(str string) string {
   201  	rst := 0
   202  	hex := "0x"
   203  	charArray := []byte(str)
   204  	for i := 0; i < 32; {
   205  		rst = 1*(int(charArray[i+3])-48) + 2*(int(charArray[i+2])-48) + 4*(int(charArray[i+1])-48) + 8*(int(charArray[i])-48)
   206  		switch rst {
   207  		case 10:
   208  			hex = hex + "a"
   209  		case 11:
   210  			hex = hex + "b"
   211  		case 12:
   212  			hex = hex + "c"
   213  		case 13:
   214  			hex = hex + "d"
   215  		case 14:
   216  			hex = hex + "e"
   217  		case 15:
   218  			hex = hex + "f"
   219  		default:
   220  			hex += strconv.Itoa(rst)
   221  		}
   222  
   223  		i = i + 4
   224  	}
   225  	return hex
   226  }
   227  
   228  /*
   229  Here we deal with the instruction FCMP.cond.S/D, which has the following format:
   230  
   231  	| 31 - 20 | 19 - 15 | 14 - 10 | 9 - 5 | 4 | 3 | 2 - 0 |
   232  	|---------|---------|---------|-------|---|---|-------|
   233  	|   op    |  cond   |    fk   |   fj  | 0 | 0 |  cd   |
   234  
   235  The `cond` field has these possible values:
   236  
   237  	"CAF": "00",
   238  	"CUN": "08",
   239  	"CEQ": "04",
   240  	"CUEQ": "0c",
   241  	"CLT": "02",
   242  	"CULT": "0a",
   243  	"CLE": "06",
   244  	"CULE": "0e",
   245  	"CNE": "10",
   246  	"COR": "14",
   247  	"CUNE": "18",
   248  	"SAF": "01",
   249  	"SUN": "09",
   250  	"SEQ": "05",
   251  	"SUEQ": "0d",
   252  	"SLT": "03",
   253  	"SULT": "0b",
   254  	"SLE": "07",
   255  	"SULE": "0f",
   256  	"SNE": "11",
   257  	"SOR": "15",
   258  	"SUNE": "19",
   259  
   260  These values are the hexadecimal numbers of bits 19 to 15, the same as
   261  described in the instruction set manual.
   262  
   263  The following code defines a map, the values in it represent the hexadecimal
   264  encoding of the cond field in the entire instruction. In this case, the upper
   265  4 bits and the lowest 1 bit are encoded separately, so the encoding is
   266  different from the encoding described above.
   267  */
   268  func dealWithFcmp(ds string) (fcmpConditions map[string]map[string]string) {
   269  	conds := map[string]string{
   270  		"CAF":  "00",
   271  		"CUN":  "40",
   272  		"CEQ":  "20",
   273  		"CUEQ": "60",
   274  		"CLT":  "10",
   275  		"CULT": "50",
   276  		"CLE":  "30",
   277  		"CULE": "70",
   278  		"CNE":  "80",
   279  		"COR":  "a0",
   280  		"CUNE": "c0",
   281  		"SAF":  "08",
   282  		"SUN":  "48",
   283  		"SEQ":  "28",
   284  		"SUEQ": "68",
   285  		"SLT":  "18",
   286  		"SULT": "58",
   287  		"SLE":  "38",
   288  		"SULE": "78",
   289  		"SNE":  "88",
   290  		"SOR":  "a8",
   291  		"SUNE": "c8",
   292  	}
   293  	fcmpConditions = make(map[string]map[string]string)
   294  	for k, v := range conds {
   295  		op := fmt.Sprintf("FCMP_%s_%s", k, ds)
   296  		opstr := fmt.Sprintf("FCMP_%s_%s:\t\"FCMP.%s.%s\",", k, ds, k, ds)
   297  		instFormatComment := fmt.Sprintf("// FCMP.%s.%s cd, fj, fk", k, ds)
   298  		var instFormat string
   299  		if ds == "D" {
   300  			instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c2%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds)
   301  		} else {
   302  			instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c1%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds)
   303  		}
   304  
   305  		fcmpConditions[op] = make(map[string]string)
   306  		fcmpConditions[op]["op"] = op
   307  		fcmpConditions[op]["opstr"] = opstr
   308  		fcmpConditions[op]["instFormatComment"] = instFormatComment
   309  		fcmpConditions[op]["instFormat"] = instFormat
   310  	}
   311  	return
   312  }
   313  
   314  func findWords(chars []pdf.Text) (words []pdf.Text) {
   315  	for i := 0; i < len(chars); {
   316  		xRange := []float64{chars[i].X, chars[i].X}
   317  		j := i + 1
   318  
   319  		// Find all chars on one line.
   320  		for j < len(chars) && chars[j].Y == chars[i].Y {
   321  			xRange[1] = chars[j].X
   322  			j++
   323  		}
   324  
   325  		// we need to note that the word may change line(Y) but belong to one cell. So, after loop over all continued
   326  		// chars whose Y are same, check if the next char's X belong to the range of xRange, if true, means it should
   327  		// be contact to current word, because the next word's X should bigger than current one.
   328  		for j < len(chars) && chars[j].X >= xRange[0] && chars[j].X <= xRange[1] {
   329  			j++
   330  		}
   331  
   332  		var end float64
   333  		// Split line into words (really, phrases).
   334  		for k := i; k < j; {
   335  			ck := &chars[k]
   336  			s := ck.S
   337  			end = ck.X + ck.W
   338  			charSpace := ck.FontSize / 6
   339  			wordSpace := ck.FontSize * 2 / 3
   340  			l := k + 1
   341  			for l < j {
   342  				// Grow word.
   343  				cl := &chars[l]
   344  
   345  				if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace {
   346  					s += cl.S
   347  					end = cl.X + cl.W
   348  					l++
   349  					continue
   350  				}
   351  				// Add space to phrase before next word.
   352  				if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace {
   353  					s += " " + cl.S
   354  					end = cl.X + cl.W
   355  					l++
   356  					continue
   357  				}
   358  				break
   359  			}
   360  			f := ck.Font
   361  			words = append(words, pdf.Text{
   362  				Font:     f,
   363  				FontSize: ck.FontSize,
   364  				X:        ck.X,
   365  				Y:        ck.Y,
   366  				W:        end - ck.X,
   367  				S:        s,
   368  			})
   369  			k = l
   370  		}
   371  		i = j
   372  	}
   373  
   374  	return words
   375  }
   376  
   377  func parsePage(num int, p pdf.Page, isFP bool) (ops []string, opstrs map[string]string, instFormatComments map[string]string, instFormats map[string]string) {
   378  	opstrs = make(map[string]string)
   379  	instFormatComments = make(map[string]string)
   380  	instFormats = make(map[string]string)
   381  
   382  	content := p.Content()
   383  
   384  	var text []pdf.Text
   385  	for _, t := range content.Text {
   386  		text = append(text, t)
   387  	}
   388  
   389  	// table name(70), table header(64), page num(3)
   390  	if isFP {
   391  		text = text[134 : len(text)-3]
   392  	} else {
   393  		text = text[64 : len(text)-3]
   394  	}
   395  
   396  	text = findWords(text)
   397  
   398  	for i := 0; i < len(text); {
   399  		var fcmpConditions map[string]map[string]string
   400  		if strings.HasPrefix(text[i].S, "FCMP") {
   401  			fcmpConditions = dealWithFcmp(strings.Split(text[i].S, ".")[2])
   402  
   403  			for fc, inst := range fcmpConditions {
   404  				ops = append(ops, inst["op"])
   405  				opstrs[fc] = inst["opstr"]
   406  				instFormatComments[fc] = inst["instFormatComment"]
   407  				instFormats[fc] = inst["instFormat"]
   408  			}
   409  			t := i + 1
   410  			for ; text[t].Y == text[i].Y; t++ {
   411  				continue
   412  			}
   413  			i = t
   414  			continue
   415  		}
   416  
   417  		op := strings.Replace(text[i].S, ".", "_", -1)
   418  		opstr := fmt.Sprintf("%s:\t\"%s\",", op, text[i].S)
   419  		instFormatComment := ""
   420  		binValue := ""
   421  		binMask := ""
   422  		instArgs := ""
   423  		offs := false
   424  		var offArgs []string
   425  
   426  		j := i + 1
   427  		for ; j < len(text) && text[j].Y == text[i].Y; j++ {
   428  
   429  			// Some instruction has no arguments, so the next word(text[j].S) is not the arguments string but 0/1 bit, it shouldn't be skipped.
   430  			if res, _ := regexp.MatchString("^\\d+$", text[j].S); j == i+1 && res == false {
   431  				instFormatComment = fmt.Sprintf("// %s %s", text[i].S, strings.Replace(text[j].S, ",", ", ", -1))
   432  				continue
   433  			}
   434  			if text[j].S == "0" || text[j].S == "1" {
   435  				binValue += text[j].S
   436  				binMask += "1"
   437  			} else {
   438  				argLen, argName := getArg(text[j].S)
   439  
   440  				// Get argument's length failed, compute it by other arguments.
   441  				if argLen == 0 {
   442  					left := 31 - len(binValue)
   443  					right := 0
   444  					l := j + 1
   445  					if l < len(text) && text[l].Y == text[j].Y {
   446  						for ; text[l].Y == text[j].Y; l++ {
   447  							if text[l].S == "0" || text[l].S == "1" {
   448  								right += 1
   449  							} else {
   450  								tArgLen, _ := getArg(text[l].S)
   451  								if tArgLen == 0 {
   452  									fmt.Fprintf(os.Stderr, "there are more than two args whose length is unknown.\n")
   453  								}
   454  								right += tArgLen
   455  							}
   456  						}
   457  					}
   458  					argLen = left - right + 1
   459  					argName = "arg_" + text[j].S + "_" + strconv.FormatInt(int64(left), 10) + "_" + strconv.FormatInt(int64(right), 10)
   460  				}
   461  
   462  				for k := 0; k < argLen; k++ {
   463  					binValue += "0"
   464  					binMask += "0"
   465  				}
   466  
   467  				if argName != "offs" {
   468  					if instArgs != "" {
   469  						instArgs = ", " + instArgs
   470  					}
   471  					instArgs = argName + instArgs
   472  				} else {
   473  					offs = true
   474  					offArgs = append(offArgs, text[j].S)
   475  				}
   476  			}
   477  		}
   478  
   479  		// The real offset is a combination of two offsets in the binary code of the instruction, for example: BEQZ
   480  		if offs && offArgs != nil {
   481  			var left int
   482  			var right int
   483  			if len(offArgs) == 1 {
   484  				left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[0], "[")[1])
   485  				right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0])
   486  			} else if len(offArgs) == 2 {
   487  				left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[1], ":")[0], "[")[1])
   488  				right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0])
   489  			}
   490  
   491  			if instArgs == "" {
   492  				instArgs = fmt.Sprintf("arg_offset_%d_%d", left, right)
   493  			} else {
   494  				instArgs += fmt.Sprintf(", arg_offset_%d_%d", left, right)
   495  			}
   496  		}
   497  
   498  		ops = append(ops, op)
   499  		opstrs[op] = opstr
   500  		if instFormatComment == "" {
   501  			instFormatComment = "// " + text[i].S
   502  		} else if strings.HasPrefix(op, "AM") {
   503  			instFormatComment = fmt.Sprintf("// %s rd, rk, rj", text[i].S)
   504  		}
   505  		instFormatComments[op] = instFormatComment
   506  		// The parameter order of some instructions is inconsistent in encoding and syntax, such as BSTRINS.*
   507  		if instArgs != "" {
   508  			args := strings.Split(instFormatComment, " ")[2:]
   509  			tInstArgs := strings.Split(instArgs, ", ")
   510  			newOrderedInstArgs := []string{}
   511  			for _, a := range args {
   512  				a = strings.Split(a, ",")[0]
   513  				for _, aa := range tInstArgs {
   514  					if strings.Contains(aa, a) {
   515  						newOrderedInstArgs = append(newOrderedInstArgs, aa)
   516  						break
   517  					} else if a == "rd" && aa == "arg_fd" {
   518  						newOrderedInstArgs = append(newOrderedInstArgs, "arg_rd")
   519  						break
   520  					}
   521  				}
   522  			}
   523  			instArgs = strings.Join(newOrderedInstArgs, ", ")
   524  		}
   525  		if strings.HasPrefix(op, "AM") {
   526  			instArgs = "arg_rd, arg_rk, arg_rj"
   527  		}
   528  		instFormat := fmt.Sprintf("{mask: %s, value: %s, op: %s, args: instArgs{%s}},", binstrToHex(binMask), binstrToHex(binValue), op, instArgs)
   529  		instFormats[op] = instFormat
   530  
   531  		i = j // next instruction
   532  	}
   533  
   534  	return
   535  }