github.com/chenzhuoyu/iasm@v0.9.1/x86_64/assembler.go (about) 1 package x86_64 2 3 import ( 4 `bytes` 5 `errors` 6 `fmt` 7 `math` 8 `strconv` 9 `strings` 10 `unicode` 11 12 `github.com/chenzhuoyu/iasm/expr` 13 ) 14 15 type ( 16 _TokenKind int 17 _Punctuation int 18 ) 19 20 const ( 21 _T_end _TokenKind = iota + 1 22 _T_int 23 _T_name 24 _T_punc 25 _T_space 26 ) 27 28 const ( 29 _P_plus _Punctuation = iota + 1 30 _P_minus 31 _P_star 32 _P_slash 33 _P_percent 34 _P_amp 35 _P_bar 36 _P_caret 37 _P_shl 38 _P_shr 39 _P_tilde 40 _P_lbrk 41 _P_rbrk 42 _P_dot 43 _P_comma 44 _P_colon 45 _P_dollar 46 _P_hash 47 ) 48 49 var _PUNC_NAME = map[_Punctuation]string { 50 _P_plus : "+", 51 _P_minus : "-", 52 _P_star : "*", 53 _P_slash : "/", 54 _P_percent : "%", 55 _P_amp : "&", 56 _P_bar : "|", 57 _P_caret : "^", 58 _P_shl : "<<", 59 _P_shr : ">>", 60 _P_tilde : "~", 61 _P_lbrk : "(", 62 _P_rbrk : ")", 63 _P_dot : ".", 64 _P_comma : ",", 65 _P_colon : ":", 66 _P_dollar : "$", 67 _P_hash : "#", 68 } 69 70 func (self _Punctuation) String() string { 71 if v, ok := _PUNC_NAME[self]; ok { 72 return v 73 } else { 74 return fmt.Sprintf("_Punctuation(%d)", self) 75 } 76 } 77 78 type _Token struct { 79 pos int 80 end int 81 u64 uint64 82 str string 83 tag _TokenKind 84 } 85 86 func (self *_Token) punc() _Punctuation { 87 return _Punctuation(self.u64) 88 } 89 90 func (self *_Token) String() string { 91 switch self.tag { 92 case _T_end : return "<END>" 93 case _T_int : return fmt.Sprintf("<INT %d>", self.u64) 94 case _T_punc : return fmt.Sprintf("<PUNC %s>", _Punctuation(self.u64)) 95 case _T_name : return fmt.Sprintf("<NAME %s>", strconv.QuoteToASCII(self.str)) 96 case _T_space : return "<SPACE>" 97 default : return fmt.Sprintf("<UNK:%d %d %s>", self.tag, self.u64, strconv.QuoteToASCII(self.str)) 98 } 99 } 100 101 func tokenEnd(p int, end int) _Token { 102 return _Token { 103 pos: p, 104 end: end, 105 tag: _T_end, 106 } 107 } 108 109 func tokenInt(p int, val uint64) _Token { 110 return _Token { 111 pos: p, 112 u64: val, 113 tag: _T_int, 114 } 115 } 116 117 func tokenName(p int, name string) _Token { 118 return _Token { 119 pos: p, 120 str: name, 121 tag: _T_name, 122 } 123 } 124 125 func tokenPunc(p int, punc _Punctuation) _Token { 126 return _Token { 127 pos: p, 128 tag: _T_punc, 129 u64: uint64(punc), 130 } 131 } 132 133 func tokenSpace(p int, end int) _Token { 134 return _Token { 135 pos: p, 136 end: end, 137 tag: _T_space, 138 } 139 } 140 141 // SyntaxError represents an error in the assembly syntax. 142 type SyntaxError struct { 143 Pos int 144 Row int 145 Src []rune 146 Reason string 147 } 148 149 // Error implements the error interface. 150 func (self *SyntaxError) Error() string { 151 if self.Pos < 0 { 152 return fmt.Sprintf("%s at line %d", self.Reason, self.Row) 153 } else { 154 return fmt.Sprintf("%s at %d:%d", self.Reason, self.Row, self.Pos + 1) 155 } 156 } 157 158 type _Tokenizer struct { 159 pos int 160 row int 161 src []rune 162 } 163 164 func (self *_Tokenizer) ch() rune { 165 return self.src[self.pos] 166 } 167 168 func (self *_Tokenizer) eof() bool { 169 return self.pos >= len(self.src) 170 } 171 172 func (self *_Tokenizer) rch() (ret rune) { 173 ret, self.pos = self.src[self.pos], self.pos + 1 174 return 175 } 176 177 func (self *_Tokenizer) err(pos int, msg string) *SyntaxError { 178 return &SyntaxError { 179 Pos : pos, 180 Row : self.row, 181 Src : self.src, 182 Reason : msg, 183 } 184 } 185 186 type _TrimState int 187 188 const ( 189 _TS_normal _TrimState = iota 190 _TS_slcomm 191 _TS_hscomm 192 _TS_string 193 _TS_escape 194 _TS_accept 195 _TS_nolast 196 ) 197 198 func (self *_Tokenizer) init(src string) { 199 var i int 200 var ch rune 201 var st _TrimState 202 203 /* set the source */ 204 self.pos = 0 205 self.src = []rune(src) 206 207 /* remove commends, including "//" and "##" */ 208 loop: for i, ch = range self.src { 209 switch { 210 case st == _TS_normal && ch == '/' : st = _TS_slcomm 211 case st == _TS_normal && ch == '"' : st = _TS_string 212 case st == _TS_normal && ch == ';' : st = _TS_accept; break loop 213 case st == _TS_normal && ch == '#' : st = _TS_hscomm 214 case st == _TS_slcomm && ch == '/' : st = _TS_nolast; break loop 215 case st == _TS_slcomm : st = _TS_normal 216 case st == _TS_hscomm && ch == '#' : st = _TS_nolast; break loop 217 case st == _TS_hscomm : st = _TS_normal 218 case st == _TS_string && ch == '"' : st = _TS_normal 219 case st == _TS_string && ch == '\\' : st = _TS_escape 220 case st == _TS_escape : st = _TS_string 221 } 222 } 223 224 /* check for errors */ 225 switch st { 226 case _TS_accept: self.src = self.src[:i] 227 case _TS_nolast: self.src = self.src[:i - 1] 228 case _TS_string: panic(self.err(i, "string is not terminated")) 229 case _TS_escape: panic(self.err(i, "escape sequence is not terminated")) 230 } 231 } 232 233 func (self *_Tokenizer) skip(check func(v rune) bool) { 234 for !self.eof() && check(self.ch()) { 235 self.pos++ 236 } 237 } 238 239 func (self *_Tokenizer) find(pos int, check func(v rune) bool) string { 240 self.skip(check) 241 return string(self.src[pos:self.pos]) 242 } 243 244 func (self *_Tokenizer) chrv(p int) _Token { 245 var err error 246 var val uint64 247 248 /* starting and ending position */ 249 p0 := p + 1 250 p1 := p0 + 1 251 252 /* find the end of the literal */ 253 for p1 < len(self.src) && self.src[p1] != '\'' { 254 if p1++; self.src[p1 - 1] == '\\' { 255 p1++ 256 } 257 } 258 259 /* empty literal */ 260 if p1 == p0 { 261 panic(self.err(p1, "empty character constant")) 262 } 263 264 /* check for EOF */ 265 if p1 == len(self.src) { 266 panic(self.err(p1, "unexpected EOF when scanning literals")) 267 } 268 269 /* parse the literal */ 270 if val, err = literal64(string(self.src[p0:p1])); err != nil { 271 panic(self.err(p0, "cannot parse literal: " + err.Error())) 272 } 273 274 /* skip the closing '\'' */ 275 self.pos = p1 + 1 276 return tokenInt(p, val) 277 } 278 279 func (self *_Tokenizer) numv(p int) _Token { 280 if val, err := strconv.ParseUint(self.find(p, isnumber), 0, 64); err != nil { 281 panic(self.err(p, "invalid immediate value: " + err.Error())) 282 } else { 283 return tokenInt(p, val) 284 } 285 } 286 287 func (self *_Tokenizer) defv(p int, cc rune) _Token { 288 if isdigit(cc) { 289 return self.numv(p) 290 } else if isident0(cc) { 291 return tokenName(p, self.find(p, isident)) 292 } else { 293 panic(self.err(p, "invalid char: " + strconv.QuoteRune(cc))) 294 } 295 } 296 297 func (self *_Tokenizer) rep2(p int, pp _Punctuation, cc rune) _Token { 298 if self.eof() { 299 panic(self.err(self.pos, "unexpected EOF when scanning operators")) 300 } else if c := self.rch(); c != cc { 301 panic(self.err(p + 1, strconv.QuoteRune(cc) + " expected, got " + strconv.QuoteRune(c))) 302 } else { 303 return tokenPunc(p, pp) 304 } 305 } 306 307 func (self *_Tokenizer) read() _Token { 308 var p int 309 var c rune 310 var t _Token 311 312 /* check for EOF */ 313 if self.eof() { 314 return tokenEnd(self.pos, self.pos) 315 } 316 317 /* skip spaces as needed */ 318 if p = self.pos; unicode.IsSpace(self.src[p]) { 319 self.skip(unicode.IsSpace) 320 return tokenSpace(p, self.pos) 321 } 322 323 /* check for line comments */ 324 if p = self.pos; p < len(self.src) - 1 && self.src[p] == '/' && self.src[p + 1] == '/' { 325 self.pos = len(self.src) 326 return tokenEnd(p, self.pos) 327 } 328 329 /* read the next character */ 330 p = self.pos 331 c = self.rch() 332 333 /* parse the next character */ 334 switch c { 335 case '+' : t = tokenPunc(p, _P_plus) 336 case '-' : t = tokenPunc(p, _P_minus) 337 case '*' : t = tokenPunc(p, _P_star) 338 case '/' : t = tokenPunc(p, _P_slash) 339 case '%' : t = tokenPunc(p, _P_percent) 340 case '&' : t = tokenPunc(p, _P_amp) 341 case '|' : t = tokenPunc(p, _P_bar) 342 case '^' : t = tokenPunc(p, _P_caret) 343 case '<' : t = self.rep2(p, _P_shl, '<') 344 case '>' : t = self.rep2(p, _P_shr, '>') 345 case '~' : t = tokenPunc(p, _P_tilde) 346 case '(' : t = tokenPunc(p, _P_lbrk) 347 case ')' : t = tokenPunc(p, _P_rbrk) 348 case '.' : t = tokenPunc(p, _P_dot) 349 case ',' : t = tokenPunc(p, _P_comma) 350 case ':' : t = tokenPunc(p, _P_colon) 351 case '$' : t = tokenPunc(p, _P_dollar) 352 case '#' : t = tokenPunc(p, _P_hash) 353 case '\'' : t = self.chrv(p) 354 default : t = self.defv(p, c) 355 } 356 357 /* mark the end of token */ 358 t.end = self.pos 359 return t 360 } 361 362 func (self *_Tokenizer) next() (tk _Token) { 363 for { 364 if tk = self.read(); tk.tag != _T_space { 365 return 366 } 367 } 368 } 369 370 // LabelKind indicates the type of label reference. 371 type LabelKind int 372 373 // OperandKind indicates the type of the operand. 374 type OperandKind int 375 376 // InstructionPrefix indicates the prefix bytes prepended to the instruction. 377 type InstructionPrefix byte 378 379 const ( 380 // OpImm means the operand is an immediate value. 381 OpImm OperandKind = 1 << iota 382 383 // OpReg means the operand is a register. 384 OpReg 385 386 // OpMem means the operand is a memory address. 387 OpMem 388 389 // OpLabel means the operand is a label, specifically for 390 // branch instructions. 391 OpLabel 392 ) 393 394 const ( 395 // Declaration means the label is a declaration. 396 Declaration LabelKind = iota + 1 397 398 // BranchTarget means the label should be treated as a branch target. 399 BranchTarget 400 401 // RelativeAddress means the label should be treated as a reference to 402 // the code section (e.g. RIP-relative addressing). 403 RelativeAddress 404 ) 405 406 const ( 407 // PrefixLock causes the processor's LOCK# signal to be asserted during execution of 408 // the accompanying instruction (turns the instruction into an atomic instruction). 409 // In a multiprocessor environment, the LOCK# signal insures that the processor 410 // has exclusive use of any shared memory while the signal is asserted. 411 PrefixLock InstructionPrefix = iota 412 413 // PrefixSegmentCS overrides the memory operation of this instruction to CS (Code Segment). 414 PrefixSegmentCS 415 416 // PrefixSegmentDS overrides the memory operation of this instruction to DS (Data Segment), 417 // this is the default section for most instructions if not specified. 418 PrefixSegmentDS 419 420 // PrefixSegmentES overrides the memory operation of this instruction to ES (Extra Segment). 421 PrefixSegmentES 422 423 // PrefixSegmentFS overrides the memory operation of this instruction to FS. 424 PrefixSegmentFS 425 426 // PrefixSegmentGS overrides the memory operation of this instruction to GS. 427 PrefixSegmentGS 428 429 // PrefixSegmentSS overrides the memory operation of this instruction to SS (Stack Segment). 430 PrefixSegmentSS 431 ) 432 433 // ParsedLabel represents a label in the source, either a jump target or 434 // an RIP-relative addressing. 435 type ParsedLabel struct { 436 Name string 437 Kind LabelKind 438 } 439 440 // ParsedOperand represents an operand of an instruction in the source. 441 type ParsedOperand struct { 442 Op OperandKind 443 Imm int64 444 Reg Register 445 Label ParsedLabel 446 Memory MemoryAddress 447 } 448 449 // ParsedInstruction represents an instruction in the source. 450 type ParsedInstruction struct { 451 Mnemonic string 452 Operands []ParsedOperand 453 Prefixes []InstructionPrefix 454 } 455 456 func (self *ParsedInstruction) imm(v int64) { 457 self.Operands = append(self.Operands, ParsedOperand { 458 Op : OpImm, 459 Imm : v, 460 }) 461 } 462 463 func (self *ParsedInstruction) reg(v Register) { 464 self.Operands = append(self.Operands, ParsedOperand { 465 Op : OpReg, 466 Reg : v, 467 }) 468 } 469 470 func (self *ParsedInstruction) mem(v MemoryAddress) { 471 self.Operands = append(self.Operands, ParsedOperand { 472 Op : OpMem, 473 Memory : v, 474 }) 475 } 476 477 func (self *ParsedInstruction) target(v string) { 478 self.Operands = append(self.Operands, ParsedOperand { 479 Op : OpLabel, 480 Label : ParsedLabel { 481 Name: v, 482 Kind: BranchTarget, 483 }, 484 }) 485 } 486 487 func (self *ParsedInstruction) reference(v string) { 488 self.Operands = append(self.Operands, ParsedOperand { 489 Op : OpLabel, 490 Label : ParsedLabel { 491 Name: v, 492 Kind: RelativeAddress, 493 }, 494 }) 495 } 496 497 // LineKind indicates the type of ParsedLine. 498 type LineKind int 499 500 const ( 501 // LineLabel means the ParsedLine is a label. 502 LineLabel LineKind = iota + 1 503 504 // LineInstr means the ParsedLine is an instruction. 505 LineInstr 506 507 // LineCommand means the ParsedLine is a ParsedCommand. 508 LineCommand 509 ) 510 511 // ParsedLine represents a parsed source line. 512 type ParsedLine struct { 513 Row int 514 Src []rune 515 Kind LineKind 516 Label ParsedLabel 517 Command ParsedCommand 518 Instruction ParsedInstruction 519 } 520 521 // ParsedCommand represents a parsed assembly directive command. 522 type ParsedCommand struct { 523 Cmd string 524 Args []ParsedCommandArg 525 } 526 527 // ParsedCommandArg represents an argument of a ParsedCommand. 528 type ParsedCommandArg struct { 529 Value string 530 IsString bool 531 } 532 533 // Parser parses the source, and generates a sequence of ParsedInstruction's. 534 type Parser struct { 535 lex _Tokenizer 536 exp expr.Parser 537 } 538 539 const ( 540 rip Register64 = 0xff 541 ) 542 543 var _RegBranch = map[string]bool { 544 "jmp" : true, 545 "jmpq" : true, 546 "call" : true, 547 "callq" : true, 548 } 549 550 var _SegPrefix = map[string]InstructionPrefix { 551 "cs": PrefixSegmentCS, 552 "ds": PrefixSegmentDS, 553 "es": PrefixSegmentES, 554 "fs": PrefixSegmentFS, 555 "gs": PrefixSegmentGS, 556 "ss": PrefixSegmentSS, 557 } 558 559 func (self *Parser) i32(tk _Token, v int64) int32 { 560 if v >= math.MinInt32 && v <= math.MaxUint32 { 561 return int32(v) 562 } else { 563 panic(self.err(tk.pos, fmt.Sprintf("32-bit integer out ouf range: %d", v))) 564 } 565 } 566 567 func (self *Parser) err(pos int, msg string) *SyntaxError { 568 return &SyntaxError { 569 Pos : pos, 570 Row : self.lex.row, 571 Src : self.lex.src, 572 Reason : msg, 573 } 574 } 575 576 func (self *Parser) negv() int64 { 577 tk := self.lex.read() 578 tt := tk.tag 579 580 /* must be an integer */ 581 if tt != _T_int { 582 panic(self.err(tk.pos, "integer expected after '-'")) 583 } else { 584 return -int64(tk.u64) 585 } 586 } 587 588 func (self *Parser) eval(p int) (r int64) { 589 var e error 590 var v *expr.Expr 591 592 /* searching start */ 593 n := 1 594 q := p + 1 595 596 /* find the end of expression */ 597 for n > 0 && q < len(self.lex.src) { 598 switch self.lex.src[q] { 599 case '(' : q++; n++ 600 case ')' : q++; n-- 601 default : q++ 602 } 603 } 604 605 /* check for EOF */ 606 if n != 0 { 607 panic(self.err(q, "unexpected EOF when parsing expressions")) 608 } 609 610 /* evaluate the expression */ 611 if v, e = self.exp.SetSource(string(self.lex.src[p:q - 1])).Parse(nil); e != nil { 612 panic(self.err(p, "cannot evaluate expression: " + e.Error())) 613 } 614 615 /* evaluate the expression */ 616 if r, e = v.Evaluate(); e != nil { 617 panic(self.err(p, "cannot evaluate expression: " + e.Error())) 618 } 619 620 /* skip the last ')' */ 621 v.Free() 622 self.lex.pos = q 623 return 624 } 625 626 func (self *Parser) relx(tk _Token) { 627 if tk.tag != _T_punc || tk.punc() != _P_lbrk { 628 panic(self.err(tk.pos, "'(' expected for RIP-relative addressing")) 629 } else if tk = self.lex.next(); self.regx(tk) != rip { 630 panic(self.err(tk.pos, "RIP-relative addressing expects %rip as the base register")) 631 } else if tk = self.lex.next(); tk.tag != _T_punc || tk.punc() != _P_rbrk { 632 panic(self.err(tk.pos, "RIP-relative addressing does not support indexing or scaling")) 633 } 634 } 635 636 func (self *Parser) immx(tk _Token) int64 { 637 if tk.tag != _T_punc || tk.punc() != _P_dollar { 638 panic(self.err(tk.pos, "'$' expected for registers")) 639 } else if tk = self.lex.read(); tk.tag == _T_int { 640 return int64(tk.u64) 641 } else if tk.tag == _T_punc && tk.punc() == _P_lbrk { 642 return self.eval(self.lex.pos) 643 } else if tk.tag == _T_punc && tk.punc() == _P_minus { 644 return self.negv() 645 } else { 646 panic(self.err(tk.pos, "immediate value expected")) 647 } 648 } 649 650 func (self *Parser) regx(tk _Token) Register { 651 if tk.tag != _T_punc || tk.punc() != _P_percent { 652 panic(self.err(tk.pos, "'%' expected for registers")) 653 } else if tk = self.lex.read(); tk.tag != _T_name { 654 panic(self.err(tk.pos, "register name expected")) 655 } else if tk.str == "rip" { 656 return rip 657 } else if reg, ok := Registers[tk.str]; ok { 658 return reg 659 } else { 660 panic(self.err(tk.pos, "invalid register name: " + strconv.Quote(tk.str))) 661 } 662 } 663 664 func (self *Parser) regv(tk _Token) Register { 665 if reg := self.regx(tk); reg == rip { 666 panic(self.err(tk.pos, "%rip is not accessable as a dedicated register")) 667 } else { 668 return reg 669 } 670 } 671 672 func (self *Parser) disp(vv int32) MemoryAddress { 673 switch tk := self.lex.next(); tk.tag { 674 case _T_end : return MemoryAddress { Displacement: vv } 675 case _T_punc : return self.relm(tk, vv) 676 default : panic(self.err(tk.pos, "',' or '(' expected")) 677 } 678 } 679 680 func (self *Parser) relm(tv _Token, disp int32) MemoryAddress { 681 var tk _Token 682 var tt _TokenKind 683 684 /* check for absolute addressing */ 685 if tv.punc() == _P_comma { 686 self.lex.pos-- 687 return MemoryAddress { Displacement: disp } 688 } 689 690 /* must be '(' now */ 691 if tv.punc() != _P_lbrk { 692 panic(self.err(tv.pos, "',' or '(' expected")) 693 } 694 695 /* read the next token */ 696 tk = self.lex.next() 697 tt = tk.tag 698 699 /* must be a punctuation */ 700 if tt != _T_punc { 701 panic(self.err(tk.pos, "'%' or ',' expected")) 702 } 703 704 /* check for base */ 705 switch tk.punc() { 706 case _P_percent : return self.base(tk, disp) 707 case _P_comma : return self.index(nil, disp) 708 default : panic(self.err(tk.pos, "'%' or ',' expected")) 709 } 710 } 711 712 func (self *Parser) base(tk _Token, disp int32) MemoryAddress { 713 rr := self.regx(tk) 714 nk := self.lex.next() 715 716 /* check for register indirection or base-index addressing */ 717 if !isReg64(rr) { 718 panic(self.err(tk.pos, "not a valid base register")) 719 } else if nk.tag != _T_punc { 720 panic(self.err(nk.pos, "',' or ')' expected")) 721 } else if nk.punc() == _P_comma { 722 return self.index(rr, disp) 723 } else if nk.punc() == _P_rbrk { 724 return MemoryAddress { Base: rr, Displacement: disp } 725 } else { 726 panic(self.err(nk.pos, "',' or ')' expected")) 727 } 728 } 729 730 func (self *Parser) index(base Register, disp int32) MemoryAddress { 731 tk := self.lex.next() 732 rr := self.regx(tk) 733 nk := self.lex.next() 734 735 /* check for scaled indexing */ 736 if base == rip { 737 panic(self.err(tk.pos, "RIP-relative addressing does not support indexing or scaling")) 738 } else if !isIndexable(rr) { 739 panic(self.err(tk.pos, "not a valid index register")) 740 } else if nk.tag != _T_punc { 741 panic(self.err(nk.pos, "',' or ')' expected")) 742 } else if nk.punc() == _P_comma { 743 return self.scale(base, rr, disp) 744 } else if nk.punc() == _P_rbrk { 745 return MemoryAddress { Base: base, Index: rr, Scale: 1, Displacement: disp } 746 } else { 747 panic(self.err(nk.pos, "',' or ')' expected")) 748 } 749 } 750 751 func (self *Parser) scale(base Register, index Register, disp int32) MemoryAddress { 752 tk := self.lex.next() 753 tt := tk.tag 754 tv := tk.u64 755 756 /* must be an integer */ 757 if tt != _T_int { 758 panic(self.err(tk.pos, "integer expected")) 759 } 760 761 /* scale can only be 1, 2, 4 or 8 */ 762 if tv == 0 || (_Scales & (1 << tv)) == 0 { 763 panic(self.err(tk.pos, "scale can only be 1, 2, 4 or 8")) 764 } 765 766 /* read next token */ 767 tk = self.lex.next() 768 tt = tk.tag 769 770 /* check for the closing ')' */ 771 if tt != _T_punc || tk.punc() != _P_rbrk { 772 panic(self.err(tk.pos, "')' expected")) 773 } 774 775 /* construct the memory address */ 776 return MemoryAddress { 777 Base : base, 778 Index : index, 779 Scale : uint8(tv), 780 Displacement : disp, 781 } 782 } 783 784 func (self *Parser) cmds() *ParsedLine { 785 cmd := "" 786 pos := self.lex.pos 787 buf := []ParsedCommandArg(nil) 788 789 /* find the end of command */ 790 for p := pos; pos < len(self.lex.src); pos++ { 791 if unicode.IsSpace(self.lex.src[pos]) { 792 cmd = string(self.lex.src[p:pos]) 793 break 794 } 795 } 796 797 /* parse the arguments */ 798 loop: for { 799 switch self.next(&pos) { 800 case 0 : break loop 801 case '#' : break loop 802 case '"' : pos = self.strings(&buf, pos) 803 default : pos = self.expressions(&buf, pos) 804 } 805 } 806 807 /* construct the line */ 808 return &ParsedLine { 809 Row : self.lex.row, 810 Src : self.lex.src, 811 Kind : LineCommand, 812 Command : ParsedCommand { 813 Cmd : cmd, 814 Args : buf, 815 }, 816 } 817 } 818 819 func (self *Parser) feed(line string) *ParsedLine { 820 ff := true 821 rr := false 822 lk := false 823 824 /* reset the lexer */ 825 self.lex.row++ 826 self.lex.init(line) 827 828 /* parse the first token */ 829 tk := self.lex.next() 830 tt := tk.tag 831 832 /* it is a directive if it starts with a dot */ 833 if tk.tag == _T_punc && tk.punc() == _P_dot { 834 return self.cmds() 835 } 836 837 /* otherwise it could be labels or instructions */ 838 if tt != _T_name { 839 panic(self.err(tk.pos, "identifier expected")) 840 } 841 842 /* peek the next token */ 843 lex := self.lex 844 tkx := lex.next() 845 846 /* check for labels */ 847 if tkx.tag == _T_punc && tkx.punc() == _P_colon { 848 tkx = lex.next() 849 ttx := tkx.tag 850 851 /* the line must end here */ 852 if ttx != _T_end { 853 panic(self.err(tkx.pos, "garbage after label definition")) 854 } 855 856 /* construct the label */ 857 return &ParsedLine { 858 Row : self.lex.row, 859 Src : self.lex.src, 860 Kind : LineLabel, 861 Label : ParsedLabel { 862 Kind: Declaration, 863 Name: tk.str, 864 }, 865 } 866 } 867 868 /* special case for the "lock" prefix */ 869 if tk.tag == _T_name && strings.ToLower(tk.str) == "lock" { 870 lk = true 871 tk = self.lex.next() 872 873 /* must be an instruction */ 874 if tk.tag != _T_name { 875 panic(self.err(tk.pos, "identifier expected")) 876 } 877 } 878 879 /* set the line kind and mnemonic */ 880 ret := &ParsedLine { 881 Row : self.lex.row, 882 Src : self.lex.src, 883 Kind : LineInstr, 884 Instruction : ParsedInstruction { Mnemonic: strings.ToLower(tk.str) }, 885 } 886 887 /* check for LOCK prefix */ 888 if lk { 889 ret.Instruction.Prefixes = append(ret.Instruction.Prefixes, PrefixLock) 890 } 891 892 /* parse all the operands */ 893 for { 894 tk = self.lex.next() 895 tt = tk.tag 896 897 /* check for end of line */ 898 if tt == _T_end { 899 break 900 } 901 902 /* expect a comma if not the first operand */ 903 if !ff { 904 if tt == _T_punc && tk.punc() == _P_comma { 905 tk = self.lex.next() 906 } else { 907 panic(self.err(tk.pos, "',' expected")) 908 } 909 } 910 911 /* not the first operand anymore */ 912 ff = false 913 tt = tk.tag 914 915 /* encountered an integer, must be a SIB memory address */ 916 if tt == _T_int { 917 ret.Instruction.mem(self.disp(self.i32(tk, int64(tk.u64)))) 918 continue 919 } 920 921 /* encountered an identifier, maybe an expression or a jump target, or a segment override prefix */ 922 if tt == _T_name { 923 ts := tk.str 924 tp := self.lex.pos 925 926 /* if the next token is EOF or a comma, it's a jumpt target */ 927 if tk = self.lex.next(); tk.tag == _T_end || (tk.tag == _T_punc && tk.punc() == _P_comma) { 928 self.lex.pos = tp 929 ret.Instruction.target(ts) 930 continue 931 } 932 933 /* if it is a colon, it's a segment override prefix, otherwise it must be an RIP-relative addressing operand */ 934 if tk.tag != _T_punc || tk.punc() != _P_colon { 935 self.relx(tk) 936 ret.Instruction.reference(ts) 937 continue 938 } 939 940 /* lookup segment prefixes */ 941 if p, ok := _SegPrefix[strings.ToLower(ts)]; !ok { 942 panic(self.err(tk.pos, "invalid segment name")) 943 } else { 944 ret.Instruction.Prefixes = append(ret.Instruction.Prefixes, p) 945 } 946 947 /* read the next token */ 948 tk = self.lex.next() 949 tt = tk.tag 950 951 /* encountered an integer, must be a SIB memory address */ 952 if tt == _T_int { 953 ret.Instruction.mem(self.disp(self.i32(tk, int64(tk.u64)))) 954 continue 955 } 956 } 957 958 /* certain instructions may have a "*" before operands */ 959 if tt == _T_punc && tk.punc() == _P_star { 960 tk = self.lex.next() 961 tt = tk.tag 962 rr = true 963 } 964 965 /* ... otherwise it must be a punctuation */ 966 if tt != _T_punc { 967 panic(self.err(tk.pos, "'$', '%', '-' or '(' expected")) 968 } 969 970 /* check the operator */ 971 switch tk.punc() { 972 case _P_lbrk : break 973 case _P_minus : ret.Instruction.mem(self.disp(self.i32(tk, self.negv()))) ; continue 974 case _P_dollar : ret.Instruction.imm(self.immx(tk)) ; continue 975 case _P_percent : ret.Instruction.reg(self.regv(tk)) ; continue 976 default : panic(self.err(tk.pos, "'$', '%', '-' or '(' expected")) 977 } 978 979 /* special case of '(', might be either `(expr)(SIB)` or just `(SIB)` 980 * read one more token to confirm */ 981 tk = self.lex.next() 982 tt = tk.tag 983 984 /* the next token is '%', it's a memory address, 985 * or ',' if it's a memory address without base, 986 * otherwise it must be in `(expr)(SIB)` form */ 987 if tk.tag == _T_punc && tk.punc() == _P_percent { 988 ret.Instruction.mem(self.base(tk, 0)) 989 } else if tk.tag == _T_punc && tk.punc() == _P_comma { 990 ret.Instruction.mem(self.index(nil, 0)) 991 } else { 992 ret.Instruction.mem(self.disp(self.i32(tk, self.eval(tk.pos)))) 993 } 994 } 995 996 /* check "jmp" and "call" instructions */ 997 if !_RegBranch[ret.Instruction.Mnemonic] { 998 return ret 999 } else if len(ret.Instruction.Operands) != 1 { 1000 panic(self.err(tk.pos, fmt.Sprintf(`"%s" requires exact 1 argument`, ret.Instruction.Mnemonic))) 1001 } else if !rr && ret.Instruction.Operands[0].Op != OpReg && ret.Instruction.Operands[0].Op != OpLabel { 1002 panic(self.err(tk.pos, fmt.Sprintf(`invalid operand for "%s" instruction`, ret.Instruction.Mnemonic))) 1003 } else { 1004 return ret 1005 } 1006 } 1007 1008 func (self *Parser) next(p *int) rune { 1009 for { 1010 if *p >= len(self.lex.src) { 1011 return 0 1012 } else if cc := self.lex.src[*p]; !unicode.IsSpace(cc) { 1013 return cc 1014 } else { 1015 *p++ 1016 } 1017 } 1018 } 1019 1020 func (self *Parser) delim(p int) int { 1021 if cc := self.next(&p); cc == 0 { 1022 return p 1023 } else if cc == ',' { 1024 return p + 1 1025 } else { 1026 panic(self.err(p, "',' expected")) 1027 } 1028 } 1029 1030 func (self *Parser) strings(argv *[]ParsedCommandArg, p int) int { 1031 var i int 1032 var e error 1033 var v string 1034 1035 /* find the end of string */ 1036 for i = p + 1; i < len(self.lex.src) && self.lex.src[i] != '"'; i++ { 1037 if self.lex.src[i] == '\\' { 1038 i++ 1039 } 1040 } 1041 1042 /* check for EOF */ 1043 if i == len(self.lex.src) { 1044 panic(self.err(i, "unexpected EOF when scanning strings")) 1045 } 1046 1047 /* unquote the string */ 1048 if v, e = strconv.Unquote(string(self.lex.src[p:i + 1])); e != nil { 1049 panic(self.err(p, "invalid string: " + e.Error())) 1050 } 1051 1052 /* add the argument to buffer */ 1053 *argv = append(*argv, ParsedCommandArg { Value: v, IsString: true }) 1054 return self.delim(i + 1) 1055 } 1056 1057 func (self *Parser) directives(line string) { 1058 self.lex.row++ 1059 self.lex.init(line) 1060 1061 /* parse the first token */ 1062 tk := self.lex.next() 1063 tt := tk.tag 1064 1065 /* check for EOF */ 1066 if tt == _T_end { 1067 return 1068 } 1069 1070 /* must be a directive */ 1071 if tt != _T_punc || tk.punc() != _P_hash { 1072 panic(self.err(tk.pos, "'#' expected")) 1073 } 1074 1075 /* parse the line number */ 1076 tk = self.lex.next() 1077 tt = tk.tag 1078 1079 /* must be a line number, if it is, set the row number, and ignore the rest of the line */ 1080 if tt != _T_int { 1081 panic(self.err(tk.pos, "line number expected")) 1082 } else { 1083 self.lex.row = int(tk.u64) - 1 1084 } 1085 } 1086 1087 func (self *Parser) expressions(argv *[]ParsedCommandArg, p int) int { 1088 var i int 1089 var n int 1090 var s int 1091 1092 /* scan until the first standalone ',' or EOF */ 1093 loop: for i = p; i < len(self.lex.src); i++ { 1094 switch self.lex.src[i] { 1095 case ',' : if s == 0 { if n == 0 { break loop } } 1096 case ']', '}', '>' : if s == 0 { if n == 0 { break loop } else { n-- } } 1097 case '[', '{', '<' : if s == 0 { n++ } 1098 case '\\' : if s != 0 { i++ } 1099 case '\'' : if s != 2 { s ^= 1 } 1100 case '"' : if s != 1 { s ^= 2 } 1101 } 1102 } 1103 1104 /* check for EOF in strings */ 1105 if s != 0 { 1106 panic(self.err(i, "unexpected EOF when scanning strings")) 1107 } 1108 1109 /* check for bracket matching */ 1110 if n != 0 { 1111 panic(self.err(i, "unbalanced '{' or '[' or '<'")) 1112 } 1113 1114 /* add the argument to buffer */ 1115 *argv = append(*argv, ParsedCommandArg { Value: string(self.lex.src[p:i]) }) 1116 return self.delim(i) 1117 } 1118 1119 // Feed feeds the parser with one more line, and the parser 1120 // parses it into a ParsedLine. 1121 // 1122 // NOTE: Feed does not handle empty lines or multiple lines, 1123 // it panics when this happens. Use Parse to parse multiple 1124 // lines of assembly source. 1125 // 1126 func (self *Parser) Feed(src string) (ret *ParsedLine, err error) { 1127 var ok bool 1128 var ss string 1129 var vv interface{} 1130 1131 /* check for multiple lines */ 1132 if strings.ContainsRune(src, '\n') { 1133 return nil, errors.New("passing multiple lines to Feed()") 1134 } 1135 1136 /* check for blank lines */ 1137 if ss = strings.TrimSpace(src); ss == "" || ss[0] == '#' || strings.HasPrefix(ss, "//") { 1138 return nil, errors.New("blank line or line with only comments or line-marks") 1139 } 1140 1141 /* setup error handler */ 1142 defer func() { 1143 if vv = recover(); vv != nil { 1144 if err, ok = vv.(*SyntaxError); !ok { 1145 panic(vv) 1146 } 1147 } 1148 }() 1149 1150 /* call the actual parser */ 1151 ret = self.feed(src) 1152 return 1153 } 1154 1155 // Parse parses the entire assembly source (possibly multiple lines) into 1156 // a sequence of *ParsedLine. 1157 func (self *Parser) Parse(src string) (ret []*ParsedLine, err error) { 1158 var ok bool 1159 var ss string 1160 var vv interface{} 1161 1162 /* setup error handler */ 1163 defer func() { 1164 if vv = recover(); vv != nil { 1165 if err, ok = vv.(*SyntaxError); !ok { 1166 panic(vv) 1167 } 1168 } 1169 }() 1170 1171 /* feed every line */ 1172 for _, line := range strings.Split(src, "\n") { 1173 if ss = strings.TrimSpace(line); ss == "" || strings.HasPrefix(ss, "//") { 1174 self.lex.row++ 1175 } else if ss[0] == '#' { 1176 self.directives(line) 1177 } else { 1178 ret = append(ret, self.feed(line)) 1179 } 1180 } 1181 1182 /* all done */ 1183 err = nil 1184 return 1185 } 1186 1187 // Directive handles the directive. 1188 func (self *Parser) Directive(line string) (err error) { 1189 var ok bool 1190 var ss string 1191 var vv interface{} 1192 1193 /* check for directives */ 1194 if ss = strings.TrimSpace(line); ss == "" || ss[0] != '#' { 1195 return errors.New("not a directive") 1196 } 1197 1198 /* setup error handler */ 1199 defer func() { 1200 if vv = recover(); vv != nil { 1201 if err, ok = vv.(*SyntaxError); !ok { 1202 panic(vv) 1203 } 1204 } 1205 }() 1206 1207 /* call the directive parser */ 1208 self.directives(line) 1209 return 1210 } 1211 1212 type _TermRepo struct { 1213 terms map[string]expr.Term 1214 } 1215 1216 func (self *_TermRepo) Get(name string) (expr.Term, error) { 1217 if ret, ok := self.terms[name]; ok { 1218 return ret, nil 1219 } else { 1220 return nil, errors.New("undefined name: " + name) 1221 } 1222 } 1223 1224 func (self *_TermRepo) label(name string) (*Label, error) { 1225 var ok bool 1226 var lb *Label 1227 var tr expr.Term 1228 1229 /* check for existing terms */ 1230 if tr, ok = self.terms[name]; ok { 1231 if lb, ok = tr.(*Label); ok { 1232 return lb, nil 1233 } else { 1234 return nil, errors.New("name is not a label: " + name) 1235 } 1236 } 1237 1238 /* create a new one as needed */ 1239 lb = new(Label) 1240 lb.Name = name 1241 1242 /* create the map if needed */ 1243 if self.terms == nil { 1244 self.terms = make(map[string]expr.Term, 1) 1245 } 1246 1247 /* register the label */ 1248 self.terms[name] = lb 1249 return lb, nil 1250 } 1251 1252 func (self *_TermRepo) define(name string, term expr.Term) { 1253 var ok bool 1254 var tr expr.Term 1255 1256 /* create the map if needed */ 1257 if self.terms == nil { 1258 self.terms = make(map[string]expr.Term, 1) 1259 } 1260 1261 /* check for existing terms */ 1262 if tr, ok = self.terms[name]; !ok { 1263 self.terms[name] = term 1264 } else if _, ok = tr.(*Label); !ok { 1265 self.terms[name] = term 1266 } else { 1267 panic("conflicting term types: " + name) 1268 } 1269 } 1270 1271 // _Command describes an assembler command. 1272 // 1273 // The _Command.args describes both the arity and argument type with characters, 1274 // the length is the number of arguments, the character itself represents the 1275 // argument type. 1276 // 1277 // Possible values are: 1278 // 1279 // s This argument should be a string 1280 // e This argument should be an expression 1281 // ? The next argument is optional, and must be the last argument. 1282 // 1283 type _Command struct { 1284 args string 1285 handler func(*Assembler, *Program, []ParsedCommandArg) error 1286 } 1287 1288 // Options controls the behavior of Assembler. 1289 type Options struct { 1290 // InstructionAliasing specifies whether to enable instruction aliasing. 1291 // Set to true enables instruction aliasing, and the Assembler will try harder to find instructions. 1292 InstructionAliasing bool 1293 1294 // IgnoreUnknownDirectives specifies whether to report errors when encountered unknown directives. 1295 // Set to true ignores all unknwon directives silently, useful for parsing generated assembly. 1296 IgnoreUnknownDirectives bool 1297 } 1298 1299 // Assembler assembles the entire assembly program and generates the corresponding 1300 // machine code representations. 1301 type Assembler struct { 1302 cc int 1303 ps Parser 1304 pc uintptr 1305 buf []byte 1306 main string 1307 opts Options 1308 repo _TermRepo 1309 expr expr.Parser 1310 line *ParsedLine 1311 } 1312 1313 var asmCommands = map[string]_Command { 1314 "org" : { "e" , (*Assembler).assembleCommandOrg }, 1315 "set" : { "ee" , (*Assembler).assembleCommandSet }, 1316 "byte" : { "e" , (*Assembler).assembleCommandByte }, 1317 "word" : { "e" , (*Assembler).assembleCommandWord }, 1318 "long" : { "e" , (*Assembler).assembleCommandLong }, 1319 "quad" : { "e" , (*Assembler).assembleCommandQuad }, 1320 "fill" : { "e?e" , (*Assembler).assembleCommandFill }, 1321 "space" : { "e?e" , (*Assembler).assembleCommandFill }, 1322 "align" : { "e?e" , (*Assembler).assembleCommandAlign }, 1323 "entry" : { "e" , (*Assembler).assembleCommandEntry }, 1324 "ascii" : { "s" , (*Assembler).assembleCommandAscii }, 1325 "asciz" : { "s" , (*Assembler).assembleCommandAsciz }, 1326 "p2align" : { "e?e" , (*Assembler).assembleCommandP2Align }, 1327 } 1328 1329 func (self *Assembler) err(msg string) *SyntaxError { 1330 return &SyntaxError { 1331 Pos : -1, 1332 Row : self.line.Row, 1333 Src : self.line.Src, 1334 Reason : msg, 1335 } 1336 } 1337 1338 func (self *Assembler) eval(expr string) (int64, error) { 1339 if exp, err := self.expr.SetSource(expr).Parse(nil); err != nil { 1340 return 0, err 1341 } else { 1342 return exp.Evaluate() 1343 } 1344 } 1345 1346 func (self *Assembler) checkArgs(i int, n int, v *ParsedCommand, isString bool) error { 1347 if i >= len(v.Args) { 1348 return self.err(fmt.Sprintf("command %s takes exact %d arguments", strconv.Quote(v.Cmd), n)) 1349 } else if isString && !v.Args[i].IsString { 1350 return self.err(fmt.Sprintf("argument %d of command %s must be a string", i + 1, strconv.Quote(v.Cmd))) 1351 } else if !isString && v.Args[i].IsString { 1352 return self.err(fmt.Sprintf("argument %d of command %s must be an expression", i + 1, strconv.Quote(v.Cmd))) 1353 } else { 1354 return nil 1355 } 1356 } 1357 1358 func (self *Assembler) assembleLabel(p *Program, lb *ParsedLabel) error { 1359 if v, err := self.repo.label(lb.Name); err != nil { 1360 return err 1361 } else { 1362 p.Link(v) 1363 return nil 1364 } 1365 } 1366 1367 func (self *Assembler) assembleInstr(p *Program, line *ParsedInstruction) (err error) { 1368 var ok bool 1369 var pfx []byte 1370 var ops []interface{} 1371 var enc _InstructionEncoder 1372 1373 /* convert to lower-case */ 1374 opts := self.opts 1375 name := strings.ToLower(line.Mnemonic) 1376 1377 /* fix register-addressing branches if needed */ 1378 if opts.InstructionAliasing && len(line.Operands) == 1 { 1379 switch { 1380 case name == "retq" : name = "ret" 1381 case name == "movabsq" : name = "movq" 1382 case name == "jmp" && line.Operands[0].Op != OpLabel : name = "jmpq" 1383 case name == "jmpq" && line.Operands[0].Op == OpLabel : name = "jmp" 1384 case name == "call" && line.Operands[0].Op != OpLabel : name = "callq" 1385 case name == "callq" && line.Operands[0].Op == OpLabel : name = "call" 1386 } 1387 } 1388 1389 /* lookup from the alias table if needed */ 1390 if opts.InstructionAliasing { 1391 enc, ok = _InstructionAliases[name] 1392 } 1393 1394 /* lookup from the instruction table */ 1395 if !ok { 1396 enc, ok = Instructions[name] 1397 } 1398 1399 /* remove size suffix if possible */ 1400 if !ok && opts.InstructionAliasing { 1401 switch i := len(name) - 1; name[i] { 1402 case 'b', 'w', 'l', 'q': { 1403 enc, ok = Instructions[name[:i]] 1404 } 1405 } 1406 } 1407 1408 /* check for instruction name */ 1409 if !ok { 1410 return self.err("no such instruction: " + strconv.Quote(name)) 1411 } 1412 1413 /* allocate memory for prefix if any */ 1414 if len(line.Prefixes) != 0 { 1415 pfx = make([]byte, len(line.Prefixes)) 1416 } 1417 1418 /* convert the prefixes */ 1419 for i, v := range line.Prefixes { 1420 switch v { 1421 case PrefixLock : pfx[i] = _P_lock 1422 case PrefixSegmentCS : pfx[i] = _P_cs 1423 case PrefixSegmentDS : pfx[i] = _P_ds 1424 case PrefixSegmentES : pfx[i] = _P_es 1425 case PrefixSegmentFS : pfx[i] = _P_fs 1426 case PrefixSegmentGS : pfx[i] = _P_gs 1427 case PrefixSegmentSS : pfx[i] = _P_ss 1428 default : panic("unreachable: invalid segment prefix") 1429 } 1430 } 1431 1432 /* convert the operands */ 1433 for _, op := range line.Operands { 1434 switch op.Op { 1435 case OpImm : ops = append(ops, op.Imm) 1436 case OpReg : ops = append(ops, op.Reg) 1437 case OpMem : self.assembleInstrMem(&ops, op.Memory) 1438 case OpLabel : self.assembleInstrLabel(&ops, op.Label) 1439 default : panic("parser yields an invalid operand kind") 1440 } 1441 } 1442 1443 /* catch any exceptions in the encoder */ 1444 defer func() { 1445 if v := recover(); v != nil { 1446 err = self.err(fmt.Sprint(v)) 1447 } 1448 }() 1449 1450 /* encode the instruction */ 1451 enc(p, ops...).prefix = pfx 1452 return nil 1453 } 1454 1455 func (self *Assembler) assembleInstrMem(ops *[]interface{}, addr MemoryAddress) { 1456 mem := new(MemoryOperand) 1457 *ops = append(*ops, mem) 1458 1459 /* check for RIP-relative addressing */ 1460 if addr.Base != rip { 1461 mem.Addr.Type = Memory 1462 mem.Addr.Memory = addr 1463 } else { 1464 mem.Addr.Type = Offset 1465 mem.Addr.Offset = RelativeOffset(addr.Displacement) 1466 } 1467 } 1468 1469 func (self *Assembler) assembleInstrLabel(ops *[]interface{}, label ParsedLabel) { 1470 vk := label.Kind 1471 tr, err := self.repo.label(label.Name) 1472 1473 /* check for errors */ 1474 if err != nil { 1475 panic(err) 1476 } 1477 1478 /* check for branch target */ 1479 if vk == BranchTarget { 1480 *ops = append(*ops, tr) 1481 return 1482 } 1483 1484 /* add to ops */ 1485 *ops = append(*ops, &MemoryOperand { 1486 Addr: Addressable { 1487 Type : Reference, 1488 Reference : tr, 1489 }, 1490 }) 1491 } 1492 1493 func (self *Assembler) assembleCommand(p *Program, line *ParsedCommand) error { 1494 var iv int 1495 var cc rune 1496 var ok bool 1497 var va bool 1498 var fn _Command 1499 1500 /* find the command */ 1501 if fn, ok = asmCommands[line.Cmd]; !ok { 1502 if self.opts.IgnoreUnknownDirectives { 1503 return nil 1504 } else { 1505 return self.err("no such command: " + strconv.Quote(line.Cmd)) 1506 } 1507 } 1508 1509 /* expected & real argument count */ 1510 argx := len(fn.args) 1511 argc := len(line.Args) 1512 1513 /* check the arguments */ 1514 loop: for iv, cc = range fn.args { 1515 switch cc { 1516 case '?' : va = true; break loop 1517 case 's' : if err := self.checkArgs(iv, argx, line, true) ; err != nil { return err } 1518 case 'e' : if err := self.checkArgs(iv, argx, line, false) ; err != nil { return err } 1519 default : panic("invalid argument descriptor: " + strconv.Quote(fn.args)) 1520 } 1521 } 1522 1523 /* simple case: non-variadic command */ 1524 if !va { 1525 if argc == argx { 1526 return fn.handler(self, p, line.Args) 1527 } else { 1528 return self.err(fmt.Sprintf("command %s takes exact %d arguments", strconv.Quote(line.Cmd), argx)) 1529 } 1530 } 1531 1532 /* check for the descriptor */ 1533 if iv != argx - 2 { 1534 panic("invalid argument descriptor: " + strconv.Quote(fn.args)) 1535 } 1536 1537 /* variadic command and the final optional argument is set */ 1538 if argc == argx - 1 { 1539 switch fn.args[argx - 1] { 1540 case 's' : if err := self.checkArgs(iv, -1, line, true) ; err != nil { return err } 1541 case 'e' : if err := self.checkArgs(iv, -1, line, false) ; err != nil { return err } 1542 default : panic("invalid argument descriptor: " + strconv.Quote(fn.args)) 1543 } 1544 } 1545 1546 /* check argument count */ 1547 if argc == argx - 1 || argc == argx - 2 { 1548 return fn.handler(self, p, line.Args) 1549 } else { 1550 return self.err(fmt.Sprintf("command %s takes %d or %d arguments", strconv.Quote(line.Cmd), argx - 2, argx - 1)) 1551 } 1552 } 1553 1554 func (self *Assembler) assembleCommandInt(p *Program, argv []ParsedCommandArg, addfn func(*Program, *expr.Expr) *Instruction) error { 1555 var err error 1556 var val *expr.Expr 1557 1558 /* parse the expression */ 1559 if val, err = self.expr.SetSource(argv[0].Value).Parse(&self.repo); err != nil { 1560 return err 1561 } 1562 1563 /* add to the program */ 1564 addfn(p, val) 1565 return nil 1566 } 1567 1568 func (self *Assembler) assembleCommandOrg(_ *Program, argv []ParsedCommandArg) error { 1569 var err error 1570 var val int64 1571 1572 /* evaluate the expression */ 1573 if val, err = self.eval(argv[0].Value); err != nil { 1574 return err 1575 } 1576 1577 /* check for origin */ 1578 if val < 0 { 1579 return self.err(fmt.Sprintf("negative origin: %d", val)) 1580 } 1581 1582 /* ".org" must be the first command if any */ 1583 if self.cc != 1 { 1584 return self.err(".org must be the first command if present") 1585 } 1586 1587 /* set the initial program counter */ 1588 self.pc = uintptr(val) 1589 return nil 1590 } 1591 1592 func (self *Assembler) assembleCommandSet(_ *Program, argv []ParsedCommandArg) error { 1593 var err error 1594 var val *expr.Expr 1595 1596 /* parse the expression */ 1597 if val, err = self.expr.SetSource(argv[1].Value).Parse(&self.repo); err != nil { 1598 return err 1599 } 1600 1601 /* define the new identifier */ 1602 self.repo.define(argv[0].Value, val) 1603 return nil 1604 } 1605 1606 func (self *Assembler) assembleCommandByte(p *Program, argv []ParsedCommandArg) error { 1607 return self.assembleCommandInt(p, argv, (*Program).Byte) 1608 } 1609 1610 func (self *Assembler) assembleCommandWord(p *Program, argv []ParsedCommandArg) error { 1611 return self.assembleCommandInt(p, argv, (*Program).Word) 1612 } 1613 1614 func (self *Assembler) assembleCommandLong(p *Program, argv []ParsedCommandArg) error { 1615 return self.assembleCommandInt(p, argv, (*Program).Long) 1616 } 1617 1618 func (self *Assembler) assembleCommandQuad(p *Program, argv []ParsedCommandArg) error { 1619 return self.assembleCommandInt(p, argv, (*Program).Quad) 1620 } 1621 1622 func (self *Assembler) assembleCommandFill(p *Program, argv []ParsedCommandArg) error { 1623 var fv byte 1624 var nb int64 1625 var ex error 1626 1627 /* evaluate the size */ 1628 if nb, ex = self.eval(argv[0].Value); ex != nil { 1629 return ex 1630 } 1631 1632 /* check for filling size */ 1633 if nb < 0 { 1634 return self.err(fmt.Sprintf("negative filling size: %d", nb)) 1635 } 1636 1637 /* check for optional filling value */ 1638 if len(argv) == 2 { 1639 if val, err := self.eval(argv[1].Value); err != nil { 1640 return err 1641 } else if val < math.MinInt8 || val > math.MaxUint8 { 1642 return self.err(fmt.Sprintf("value %d cannot be represented with a byte", val)) 1643 } else { 1644 fv = byte(val) 1645 } 1646 } 1647 1648 /* fill with specified byte */ 1649 p.Data(bytes.Repeat([]byte { fv }, int(nb))) 1650 return nil 1651 } 1652 1653 func (self *Assembler) assembleCommandAlign(p *Program, argv []ParsedCommandArg) error { 1654 var nb int64 1655 var ex error 1656 var fv *expr.Expr 1657 1658 /* evaluate the size */ 1659 if nb, ex = self.eval(argv[0].Value); ex != nil { 1660 return ex 1661 } 1662 1663 /* check for alignment value */ 1664 if nb <= 0 { 1665 return self.err(fmt.Sprintf("zero or negative alignment: %d", nb)) 1666 } 1667 1668 /* alignment must be a power of 2 */ 1669 if (nb & (nb - 1)) != 0 { 1670 return self.err(fmt.Sprintf("alignment must be a power of 2: %d", nb)) 1671 } 1672 1673 /* check for optional filling value */ 1674 if len(argv) == 2 { 1675 if v, err := self.expr.SetSource(argv[1].Value).Parse(&self.repo); err == nil { 1676 fv = v 1677 } else { 1678 return err 1679 } 1680 } 1681 1682 /* fill with specified byte, default to 0 if not specified */ 1683 p.Align(uint64(nb), fv) 1684 return nil 1685 } 1686 1687 func (self *Assembler) assembleCommandEntry(_ *Program, argv []ParsedCommandArg) error { 1688 name := argv[0].Value 1689 rbuf := []rune(name) 1690 1691 /* check all the characters */ 1692 for i, cc := range rbuf { 1693 if !isident0(cc) && (i == 0 || !isident(cc)) { 1694 return self.err("entry point must be a label name") 1695 } 1696 } 1697 1698 /* set the main entry point */ 1699 self.main = name 1700 return nil 1701 } 1702 1703 func (self *Assembler) assembleCommandAscii(p *Program, argv []ParsedCommandArg) error { 1704 p.Data([]byte(argv[0].Value)) 1705 return nil 1706 } 1707 1708 func (self *Assembler) assembleCommandAsciz(p *Program, argv []ParsedCommandArg) error { 1709 p.Data(append([]byte(argv[0].Value), 0)) 1710 return nil 1711 } 1712 1713 func (self *Assembler) assembleCommandP2Align(p *Program, argv []ParsedCommandArg) error { 1714 var nb int64 1715 var ex error 1716 var fv *expr.Expr 1717 1718 /* evaluate the size */ 1719 if nb, ex = self.eval(argv[0].Value); ex != nil { 1720 return ex 1721 } 1722 1723 /* check for alignment value */ 1724 if nb <= 0 { 1725 return self.err(fmt.Sprintf("zero or negative alignment: %d", nb)) 1726 } 1727 1728 /* check for optional filling value */ 1729 if len(argv) == 2 { 1730 if v, err := self.expr.SetSource(argv[1].Value).Parse(&self.repo); err == nil { 1731 fv = v 1732 } else { 1733 return err 1734 } 1735 } 1736 1737 /* fill with specified byte, default to 0 if not specified */ 1738 p.Align(1 << nb, fv) 1739 return nil 1740 } 1741 1742 // Base returns the origin. 1743 func (self *Assembler) Base() uintptr { 1744 return self.pc 1745 } 1746 1747 // Code returns the assembled machine code. 1748 func (self *Assembler) Code() []byte { 1749 return self.buf 1750 } 1751 1752 // Entry returns the address of the specified entry point, or the origin if not specified. 1753 func (self *Assembler) Entry() uintptr { 1754 if self.main == "" { 1755 return self.pc 1756 } else if tr, err := self.repo.Get(self.main); err != nil { 1757 panic(err) 1758 } else if val, err := tr.Evaluate(); err != nil { 1759 panic(err) 1760 } else { 1761 return uintptr(val) 1762 } 1763 } 1764 1765 // Options returns the internal options reference, changing it WILL affect this Assembler instance. 1766 func (self *Assembler) Options() *Options { 1767 return &self.opts 1768 } 1769 1770 // WithBase resets the origin to pc. 1771 func (self *Assembler) WithBase(pc uintptr) *Assembler { 1772 self.pc = pc 1773 return self 1774 } 1775 1776 // Assemble assembles the assembly source and save the machine code to internal buffer. 1777 func (self *Assembler) Assemble(src string) error { 1778 var err error 1779 var buf []*ParsedLine 1780 1781 /* parse the source */ 1782 if buf, err = self.ps.Parse(src); err != nil { 1783 return err 1784 } 1785 1786 /* create a new program */ 1787 p := DefaultArch.CreateProgram() 1788 defer p.Free() 1789 1790 /* process every line */ 1791 for _, self.line = range buf { 1792 switch self.cc++; self.line.Kind { 1793 case LineLabel : if err = self.assembleLabel (p, &self.line.Label) ; err != nil { return err } 1794 case LineInstr : if err = self.assembleInstr (p, &self.line.Instruction) ; err != nil { return err } 1795 case LineCommand : if err = self.assembleCommand (p, &self.line.Command) ; err != nil { return err } 1796 default : panic("parser yields an invalid line kind") 1797 } 1798 } 1799 1800 /* assemble the program */ 1801 self.buf = p.Assemble(self.pc) 1802 return nil 1803 }