golang.org/x/arch@v0.17.0/ppc64/ppc64spec/spec.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Power64spec reads the “Power ISA V2.07” Manual 6 // to collect instruction encoding details and writes those details to standard output 7 // in CSV format. 8 // 9 // Usage: 10 // 11 // ppc64spec PowerISA_V2.07_PUBLIC.pdf >ppc64.csv 12 // 13 // Each CSV line contains four fields: 14 // 15 // instruction 16 // The instruction heading, such as "AAD imm8". 17 // mnemonic 18 // The instruction mnemonics, separated by | symbols. 19 // encoding 20 // The instruction encoding, a sequence of name@startbit| describing each bit field in turn. 21 // tags 22 // For now, empty. 23 // 24 // For more on the exact meaning of these fields, see the Power manual. 25 package main 26 27 import ( 28 "bufio" 29 "fmt" 30 "log" 31 "math" 32 "os" 33 "regexp" 34 "sort" 35 "strconv" 36 "strings" 37 38 "rsc.io/pdf" 39 ) 40 41 type Inst struct { 42 Name string 43 Text string 44 Enc string 45 } 46 47 const debugPage = 0 48 49 var stdout *bufio.Writer 50 51 func main() { 52 log.SetFlags(0) 53 log.SetPrefix("ppc64spec: ") 54 55 if len(os.Args) != 2 { 56 fmt.Fprintf(os.Stderr, "usage: ppc64spec file.pdf\n") 57 os.Exit(2) 58 } 59 60 f, err := pdf.Open(os.Args[1]) 61 if err != nil { 62 log.Fatal(err) 63 } 64 65 // Find instruction set reference in outline, to build instruction list. 66 instList := instHeadings(f.Outline()) 67 if len(instList) < 200 { 68 log.Fatalf("only found %d instructions in table of contents", len(instList)) 69 } 70 71 var all = []Inst{ 72 // Split across multiple columns and pages! 73 {"Count Leading Zeros Word X-form", "cntlzw RA, RS (Rc=0)\ncntlzw. RA, RS (Rc=1)", "31@0|RS@6|RA@11|///@16|26@21|Rc@31|"}, 74 } 75 76 for j, headline := range instList { 77 for _, inst := range all { 78 if headline == inst.Name { 79 instList[j] = "" 80 break 81 } 82 } 83 } 84 85 // Scan document looking for instructions. 86 // Must find exactly the ones in the outline. 87 n := f.NumPage() 88 for pageNum := 1; pageNum <= n; pageNum++ { 89 if debugPage > 0 && pageNum != debugPage { 90 continue 91 } 92 p := f.Page(pageNum) 93 table := parsePage(pageNum, p) 94 if len(table) == 0 { 95 continue 96 } 97 InstLoop: 98 for _, inst := range table { 99 for j, headline := range instList { 100 if inst.Name == headline { 101 instList[j] = "" 102 continue InstLoop 103 } 104 } 105 fmt.Fprintf(os.Stderr, "page %d: unexpected instruction %q\n", pageNum, inst.Name) 106 } 107 all = append(all, table...) 108 } 109 110 if debugPage == 0 { 111 for _, headline := range instList { 112 if headline != "" { 113 switch headline { 114 default: 115 fmt.Fprintf(os.Stderr, "missing instruction %q\n", headline) 116 case "CHKA": // ThumbEE 117 case "CPS": // system instruction 118 case "CPY": // synonym for MOV 119 case "ENTERX": // ThumbEE 120 case "F* (former VFP instruction mnemonics)": // synonyms 121 case "HB, HBL, HBLP, HBP": // ThumbEE 122 case "LEAVEX": // ThumbEE 123 case "MOV (shifted register)": // pseudo instruction for ASR, LSL, LSR, ROR, and RRX 124 case "NEG": // synonym for RSB 125 case "RFE": // system instruction 126 case "SMC (previously SMI)": // system instruction 127 case "SRS": // system instruction 128 case "SUBS PC, LR and related instructions": // system instruction 129 case "VAND (immediate)": // pseudo instruction 130 case "VCLE (register)": // pseudo instruction 131 case "VCLT (register)": // pseudo instruction 132 case "VORN (immediate)": // pseudo instruction 133 } 134 } 135 } 136 } 137 138 stdout = bufio.NewWriter(os.Stdout) 139 for _, inst := range all { 140 fmt.Fprintf(stdout, "%q,%q,%q,%q\n", inst.Name, strings.Replace(inst.Text, "\n", "|", -1), inst.Enc, "") 141 } 142 stdout.Flush() 143 144 } 145 146 func instHeadings(outline pdf.Outline) []string { 147 return appendInstHeadings(outline, nil) 148 } 149 150 var instRE = regexp.MustCompile(` ([A-Z0-9]+-form|Byte|Word|Doubleword|Halfword)($| \[)`) 151 var sectionRE = regexp.MustCompile(`^[0-9A-Z]+\.[0-9]`) 152 153 func appendInstHeadings(outline pdf.Outline, list []string) []string { 154 if strings.Contains(outline.Title, "Variable Length Encoding (VLE) Encoding") { 155 for _, child := range outline.Child { 156 vle = appendInstHeadings(child, vle) 157 } 158 return list 159 } 160 if instRE.MatchString(outline.Title) && !sectionRE.MatchString(outline.Title) { 161 list = append(list, outline.Title) 162 } 163 if outline.Title == "Transaction Abort Word Conditional" { 164 list = append(list, outline.Title+" X-form") 165 } 166 for _, child := range outline.Child { 167 list = appendInstHeadings(child, list) 168 } 169 return list 170 } 171 172 const inch = 72.0 173 174 func parsePage(num int, p pdf.Page) []Inst { 175 content := p.Content() 176 177 var text []pdf.Text 178 for _, t := range content.Text { 179 text = append(text, t) 180 } 181 182 text = findWords(text) 183 184 if debugPage > 0 { 185 for _, t := range text { 186 fmt.Println(t) 187 } 188 for _, r := range content.Rect { 189 fmt.Println(r) 190 } 191 } 192 193 // Look for instruction encodings. 194 // Some begin with a Helvetica-BoldOblique size 11 headline like "AND X-Form", 195 // is followed by Helvetica 9 mnemonic, and then a bit box with 196 // Helvetica 9 fields and Helvetica 7 bit offsets. 197 // Others use Arial,BoldItalic 11 for the headline, 198 // Arial 8 for the mnemonic, and Arial 4.2 for the bit offsets. 199 200 var insts []Inst 201 for { 202 // Heading 203 for len(text) > 0 && !match(text[0], "Helvetica-BoldOblique", 11, "") && !match(text[0], "Arial,BoldItalic", 11, "") && !match(text[0], "Arial,BoldItalic", 10, "") { 204 text = text[1:] 205 } 206 if len(text) == 0 { 207 break 208 } 209 heading := text[0].S 210 text = text[1:] 211 for len(text) > 0 && (match(text[0], "Helvetica-BoldOblique", 11, "") || match(text[0], "Arial,BoldItalic", 11, "") || match(text[0], "Arial,BoldItalic", 10, "")) { 212 heading += " " + text[0].S 213 text = text[1:] 214 } 215 heading = strings.Replace(heading, "]", "] ", -1) 216 heading = strings.Replace(heading, " ", " ", -1) 217 heading = strings.Replace(heading, "rEVX-form", "r EVX-form", -1) 218 heading = strings.Replace(heading, "eX-form", "e X-form", -1) 219 heading = strings.Replace(heading, "mSD4-form", "m SD4-form", -1) 220 heading = strings.Replace(heading, "eSCI8-form", "e SCI8-form", -1) 221 heading = strings.TrimSpace(heading) 222 if isVLE(heading) { 223 continue 224 } 225 226 // Mnemonic 227 if len(text) == 0 || (!match(text[0], "Helvetica", 9, "") && !match(text[0], "Helvetica-BoldOblique", 9, "") && !match(text[0], "Arial", 9, "") && !match(text[0], "Arial", 10, "")) { 228 continue 229 } 230 mnemonic := "" 231 y := text[0].Y 232 x0 := text[0].X 233 for len(text) > 0 && (match(text[0], "Helvetica", 9, "") || match(text[0], "Helvetica-BoldOblique", 9, "") || match(text[0], "Arial", 9, "") || match(text[0], "Courier", 8, "") || match(text[0], "LucidaConsole", 7.17, "") || text[0].Y == y) { 234 if text[0].Y != y { 235 if math.Abs(text[0].X-x0) > 4 { 236 break 237 } 238 mnemonic += "\n" 239 y = text[0].Y 240 } else if mnemonic != "" { 241 mnemonic += " " 242 } 243 mnemonic += text[0].S 244 text = text[1:] 245 } 246 247 // Encoding 248 bits, i := readBitBox(heading, content, text, num) 249 if i == 0 { 250 continue 251 } 252 253 insts = append(insts, Inst{heading, mnemonic, bits}) 254 } 255 return insts 256 } 257 258 var vle = []string{ 259 "System Call C-form,ESC-form", 260 } 261 262 func isVLE(s string) bool { 263 for _, v := range vle { 264 if s == v { 265 return true 266 } 267 } 268 return false 269 } 270 271 func readBitBox(headline string, content pdf.Content, text []pdf.Text, pageNum int) (string, int) { 272 // fields 273 i := 0 274 if len(text) == 0 || (!match(text[i], "Helvetica", 9, "") && !match(text[i], "Helvetica", 7.26, "") && !match(text[i], "Arial", 9, "") && !match(text[i], "Arial", 7.98, "") && !match(text[i], "Arial", 7.2, "")) { 275 fmt.Fprintf(os.Stderr, "page %d: no bit fields for %q\n", pageNum, headline) 276 if len(text) > 0 { 277 fmt.Fprintf(os.Stderr, "\tlast text: %v\n", text[0]) 278 } 279 return "", 0 280 } 281 sz := text[i].FontSize 282 y2 := text[i].Y 283 x2 := 0.0 284 for i < len(text) && text[i].Y == y2 { 285 if x2 < text[i].X+text[i].W { 286 x2 = text[i].X + text[i].W 287 } 288 i++ 289 } 290 y2 += sz / 2 291 292 // bit numbers 293 if i >= len(text) || text[i].S != "0" { 294 if headline == "Transaction Abort Doubleword Conditional X-form" { 295 // Split across the next page. 296 return "31@0|TO@6|RA@11|RB@16|814@21|1@31|", i 297 } 298 if headline == "Add Scaled Immediate SCI8-form" { 299 // Very strange fonts. 300 return "06@0|RT@6|RA@11|8@16|Rc@20|F@21|SCL@22|UI8@24|", i 301 } 302 fmt.Fprintf(os.Stderr, "page %d: no bit numbers for %s\n", pageNum, headline) 303 if i < len(text) { 304 fmt.Fprintf(os.Stderr, "\tlast text: %v\n", text[i]) 305 } 306 return "", 0 307 } 308 sz = text[i].FontSize 309 y1 := text[i].Y 310 x1 := text[i].X 311 for i < len(text) && text[i].Y == y1 { 312 if x2 < text[i].X+text[i].W { 313 x2 = text[i].X + text[i].W 314 } 315 i++ 316 } 317 318 if debugPage > 0 { 319 fmt.Println("encoding box", x1, y1, x2, y2, i, text[0], text[i]) 320 } 321 322 // Find lines (thin rectangles) separating bit fields. 323 var bottom, top pdf.Rect 324 const ( 325 yMargin = 0.25 * 72 326 xMargin = 1 * 72 327 ) 328 for _, r := range content.Rect { 329 // Only consider lines in the same column. 330 if (x1 < 306) != (r.Max.X < 306) { 331 continue 332 } 333 if r.Max.Y-r.Min.Y < 2 && x1-xMargin < r.Min.X && r.Min.X < x1 && x2 < r.Max.X && r.Max.X < x2+xMargin { 334 if y1-yMargin < r.Min.Y && r.Min.Y < y1 { 335 bottom = r 336 } 337 if y2 < r.Min.Y && r.Min.Y < y2+8 { 338 top = r 339 } 340 } 341 } 342 343 if bottom.Min.X == 0 { 344 // maybe bit numbers are outside box; see doze, nap, sleep, rvwinkle. 345 for _, r := range content.Rect { 346 // Only consider lines in the same column. 347 if (x1 < 306) != (r.Max.X < 306) { 348 continue 349 } 350 if r.Max.Y-r.Min.Y < 2 && x1-xMargin < r.Min.X && r.Min.X < x1 && x2 < r.Max.X && r.Max.X < x2+xMargin { 351 if y1+sz/2 < r.Min.Y && r.Min.Y < y2 { 352 bottom = r 353 } 354 } 355 } 356 } 357 358 if debugPage > 0 { 359 fmt.Println("top", top, "bottom", bottom) 360 } 361 362 const ε = 0.1 * 72 363 var bars []pdf.Rect 364 for _, r := range content.Rect { 365 if r.Max.X-r.Min.X < 2 && math.Abs(r.Min.Y-bottom.Min.Y) < ε && math.Abs(r.Max.Y-top.Min.Y) < ε && (bottom.Min.X < 306) == (r.Max.X < 306) { 366 bars = append(bars, r) 367 } 368 } 369 sort.Sort(RectHorizontal(bars)) 370 371 out := "" 372 for i := 0; i < len(bars)-1; i++ { 373 var sub []pdf.Text 374 x1, x2 := bars[i].Min.X, bars[i+1].Min.X 375 for _, t := range content.Text { 376 tx := t.X + t.W/2 377 ty := t.Y + t.FontSize/4 378 if x1 < tx && tx < x2 && y1 < ty && ty < y2 { 379 sub = append(sub, t) 380 } 381 } 382 var str []string 383 for _, t := range findWords(sub) { 384 str = append(str, t.S) 385 } 386 s := strings.Join(str, "@") 387 out += s + "|" 388 } 389 390 if out == "" { 391 fmt.Fprintf(os.Stderr, "page %d: no bit encodings for %s\n", pageNum, headline) 392 } 393 return out, i 394 } 395 396 type RectHorizontal []pdf.Rect 397 398 func (x RectHorizontal) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 399 func (x RectHorizontal) Less(i, j int) bool { return x[i].Min.X < x[j].Min.X } 400 func (x RectHorizontal) Len() int { return len(x) } 401 402 func checkNoEncodings(num int, text []pdf.Text) { 403 for _, t := range text { 404 if match(t, "Helvetica-Bold", 9, "Encoding") { 405 fmt.Fprintf(os.Stderr, "page %d: unexpected encoding: %s\n", num, t.S) 406 } 407 } 408 } 409 410 func match(t pdf.Text, font string, size float64, substr string) bool { 411 return t.Font == font && (size == 0 || math.Abs(t.FontSize-size) < 0.1) && strings.Contains(t.S, substr) 412 } 413 414 func findWords(chars []pdf.Text) (words []pdf.Text) { 415 // Sort by Y coordinate and normalize. 416 const nudge = 1.5 417 sort.Sort(pdf.TextVertical(chars)) 418 old := -100000.0 419 for i, c := range chars { 420 if c.Y != old && math.Abs(old-c.Y) < nudge { 421 chars[i].Y = old 422 } else { 423 old = c.Y 424 } 425 } 426 427 // Sort by Y coordinate, breaking ties with X. 428 // This will bring letters in a single word together. 429 sort.Sort(pdf.TextVertical(chars)) 430 431 // Loop over chars. 432 for i := 0; i < len(chars); { 433 // Find all chars on line. 434 j := i + 1 435 for j < len(chars) && chars[j].Y == chars[i].Y { 436 j++ 437 } 438 var end float64 439 // Split line into words (really, phrases). 440 for k := i; k < j; { 441 ck := &chars[k] 442 s := ck.S 443 end = ck.X + ck.W 444 charSpace := ck.FontSize / 6 445 wordSpace := ck.FontSize * 2 / 3 446 l := k + 1 447 for l < j { 448 // Grow word. 449 cl := &chars[l] 450 if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace { 451 s += cl.S 452 end = cl.X + cl.W 453 l++ 454 continue 455 } 456 // Add space to phrase before next word. 457 if sameFont(cl.Font, ck.Font) && math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace { 458 s += " " + cl.S 459 end = cl.X + cl.W 460 l++ 461 continue 462 } 463 break 464 } 465 f := ck.Font 466 f = strings.TrimSuffix(f, ",Italic") 467 f = strings.TrimSuffix(f, "-Italic") 468 words = append(words, pdf.Text{ 469 Font: f, 470 FontSize: ck.FontSize, 471 X: ck.X, 472 Y: ck.Y, 473 W: end - ck.X, 474 S: s, 475 }) 476 k = l 477 } 478 i = j 479 } 480 481 // Split into two columns. 482 var col1, col2 []pdf.Text 483 for _, w := range words { 484 if w.X > 306 { 485 col2 = append(col2, w) 486 } else { 487 col1 = append(col1, w) 488 } 489 } 490 return append(col1, col2...) 491 } 492 493 func sameFont(f1, f2 string) bool { 494 f1 = strings.TrimSuffix(f1, ",Italic") 495 f1 = strings.TrimSuffix(f1, "-Italic") 496 f2 = strings.TrimSuffix(f1, ",Italic") 497 f2 = strings.TrimSuffix(f1, "-Italic") 498 return strings.TrimSuffix(f1, ",Italic") == strings.TrimSuffix(f2, ",Italic") || f1 == "Symbol" || f2 == "Symbol" || f1 == "TimesNewRoman" || f2 == "TimesNewRoman" 499 } 500 501 var jsFix = strings.NewReplacer( 502 // `\u003c`, `<`, 503 // `\u003e`, `>`, 504 // `\u0026`, `&`, 505 // `\u0009`, `\t`, 506 ) 507 508 func printTable(name string, table []Inst) { 509 _ = strconv.Atoi 510 }