github.com/cloudwego/iasm@v0.2.0/x86_64/assembler.go (about) 1 // 2 // Copyright 2024 CloudWeGo Authors 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 17 package x86_64 18 19 import ( 20 `bytes` 21 `errors` 22 `fmt` 23 `math` 24 `strconv` 25 `strings` 26 `unicode` 27 28 `github.com/cloudwego/iasm/expr` 29 ) 30 31 type ( 32 _TokenKind int 33 _Punctuation int 34 ) 35 36 const ( 37 _T_end _TokenKind = iota + 1 38 _T_int 39 _T_name 40 _T_punc 41 _T_space 42 ) 43 44 const ( 45 _P_plus _Punctuation = iota + 1 46 _P_minus 47 _P_star 48 _P_slash 49 _P_percent 50 _P_amp 51 _P_bar 52 _P_caret 53 _P_shl 54 _P_shr 55 _P_tilde 56 _P_lbrk 57 _P_rbrk 58 _P_dot 59 _P_comma 60 _P_colon 61 _P_dollar 62 _P_hash 63 ) 64 65 var _PUNC_NAME = map[_Punctuation]string { 66 _P_plus : "+", 67 _P_minus : "-", 68 _P_star : "*", 69 _P_slash : "/", 70 _P_percent : "%", 71 _P_amp : "&", 72 _P_bar : "|", 73 _P_caret : "^", 74 _P_shl : "<<", 75 _P_shr : ">>", 76 _P_tilde : "~", 77 _P_lbrk : "(", 78 _P_rbrk : ")", 79 _P_dot : ".", 80 _P_comma : ",", 81 _P_colon : ":", 82 _P_dollar : "$", 83 _P_hash : "#", 84 } 85 86 func (self _Punctuation) String() string { 87 if v, ok := _PUNC_NAME[self]; ok { 88 return v 89 } else { 90 return fmt.Sprintf("_Punctuation(%d)", self) 91 } 92 } 93 94 type _Token struct { 95 pos int 96 end int 97 u64 uint64 98 str string 99 tag _TokenKind 100 } 101 102 func (self *_Token) punc() _Punctuation { 103 return _Punctuation(self.u64) 104 } 105 106 func (self *_Token) String() string { 107 switch self.tag { 108 case _T_end : return "<END>" 109 case _T_int : return fmt.Sprintf("<INT %d>", self.u64) 110 case _T_punc : return fmt.Sprintf("<PUNC %s>", _Punctuation(self.u64)) 111 case _T_name : return fmt.Sprintf("<NAME %s>", strconv.QuoteToASCII(self.str)) 112 case _T_space : return "<SPACE>" 113 default : return fmt.Sprintf("<UNK:%d %d %s>", self.tag, self.u64, strconv.QuoteToASCII(self.str)) 114 } 115 } 116 117 func tokenEnd(p int, end int) _Token { 118 return _Token { 119 pos: p, 120 end: end, 121 tag: _T_end, 122 } 123 } 124 125 func tokenInt(p int, val uint64) _Token { 126 return _Token { 127 pos: p, 128 u64: val, 129 tag: _T_int, 130 } 131 } 132 133 func tokenName(p int, name string) _Token { 134 return _Token { 135 pos: p, 136 str: name, 137 tag: _T_name, 138 } 139 } 140 141 func tokenPunc(p int, punc _Punctuation) _Token { 142 return _Token { 143 pos: p, 144 tag: _T_punc, 145 u64: uint64(punc), 146 } 147 } 148 149 func tokenSpace(p int, end int) _Token { 150 return _Token { 151 pos: p, 152 end: end, 153 tag: _T_space, 154 } 155 } 156 157 // SyntaxError represents an error in the assembly syntax. 158 type SyntaxError struct { 159 Pos int 160 Row int 161 Src []rune 162 Reason string 163 } 164 165 // Error implements the error interface. 166 func (self *SyntaxError) Error() string { 167 if self.Pos < 0 { 168 return fmt.Sprintf("%s at line %d", self.Reason, self.Row) 169 } else { 170 return fmt.Sprintf("%s at %d:%d", self.Reason, self.Row, self.Pos + 1) 171 } 172 } 173 174 type _Tokenizer struct { 175 pos int 176 row int 177 src []rune 178 } 179 180 func (self *_Tokenizer) ch() rune { 181 return self.src[self.pos] 182 } 183 184 func (self *_Tokenizer) eof() bool { 185 return self.pos >= len(self.src) 186 } 187 188 func (self *_Tokenizer) rch() (ret rune) { 189 ret, self.pos = self.src[self.pos], self.pos + 1 190 return 191 } 192 193 func (self *_Tokenizer) err(pos int, msg string) *SyntaxError { 194 return &SyntaxError { 195 Pos : pos, 196 Row : self.row, 197 Src : self.src, 198 Reason : msg, 199 } 200 } 201 202 type _TrimState int 203 204 const ( 205 _TS_normal _TrimState = iota 206 _TS_slcomm 207 _TS_hscomm 208 _TS_string 209 _TS_escape 210 _TS_accept 211 _TS_nolast 212 ) 213 214 func (self *_Tokenizer) init(src string) { 215 var i int 216 var ch rune 217 var st _TrimState 218 219 /* set the source */ 220 self.pos = 0 221 self.src = []rune(src) 222 223 /* remove commends, including "//" and "##" */ 224 loop: for i, ch = range self.src { 225 switch { 226 case st == _TS_normal && ch == '/' : st = _TS_slcomm 227 case st == _TS_normal && ch == '"' : st = _TS_string 228 case st == _TS_normal && ch == ';' : st = _TS_accept; break loop 229 case st == _TS_normal && ch == '#' : st = _TS_hscomm 230 case st == _TS_slcomm && ch == '/' : st = _TS_nolast; break loop 231 case st == _TS_slcomm : st = _TS_normal 232 case st == _TS_hscomm && ch == '#' : st = _TS_nolast; break loop 233 case st == _TS_hscomm : st = _TS_normal 234 case st == _TS_string && ch == '"' : st = _TS_normal 235 case st == _TS_string && ch == '\\' : st = _TS_escape 236 case st == _TS_escape : st = _TS_string 237 } 238 } 239 240 /* check for errors */ 241 switch st { 242 case _TS_accept: self.src = self.src[:i] 243 case _TS_nolast: self.src = self.src[:i - 1] 244 case _TS_string: panic(self.err(i, "string is not terminated")) 245 case _TS_escape: panic(self.err(i, "escape sequence is not terminated")) 246 } 247 } 248 249 func (self *_Tokenizer) skip(check func(v rune) bool) { 250 for !self.eof() && check(self.ch()) { 251 self.pos++ 252 } 253 } 254 255 func (self *_Tokenizer) find(pos int, check func(v rune) bool) string { 256 self.skip(check) 257 return string(self.src[pos:self.pos]) 258 } 259 260 func (self *_Tokenizer) chrv(p int) _Token { 261 var err error 262 var val uint64 263 264 /* starting and ending position */ 265 p0 := p + 1 266 p1 := p0 + 1 267 268 /* find the end of the literal */ 269 for p1 < len(self.src) && self.src[p1] != '\'' { 270 if p1++; self.src[p1 - 1] == '\\' { 271 p1++ 272 } 273 } 274 275 /* empty literal */ 276 if p1 == p0 { 277 panic(self.err(p1, "empty character constant")) 278 } 279 280 /* check for EOF */ 281 if p1 == len(self.src) { 282 panic(self.err(p1, "unexpected EOF when scanning literals")) 283 } 284 285 /* parse the literal */ 286 if val, err = literal64(string(self.src[p0:p1])); err != nil { 287 panic(self.err(p0, "cannot parse literal: " + err.Error())) 288 } 289 290 /* skip the closing '\'' */ 291 self.pos = p1 + 1 292 return tokenInt(p, val) 293 } 294 295 func (self *_Tokenizer) numv(p int) _Token { 296 if val, err := strconv.ParseUint(self.find(p, isnumber), 0, 64); err != nil { 297 panic(self.err(p, "invalid immediate value: " + err.Error())) 298 } else { 299 return tokenInt(p, val) 300 } 301 } 302 303 func (self *_Tokenizer) defv(p int, cc rune) _Token { 304 if isdigit(cc) { 305 return self.numv(p) 306 } else if isident0(cc) { 307 return tokenName(p, self.find(p, isident)) 308 } else { 309 panic(self.err(p, "invalid char: " + strconv.QuoteRune(cc))) 310 } 311 } 312 313 func (self *_Tokenizer) rep2(p int, pp _Punctuation, cc rune) _Token { 314 if self.eof() { 315 panic(self.err(self.pos, "unexpected EOF when scanning operators")) 316 } else if c := self.rch(); c != cc { 317 panic(self.err(p + 1, strconv.QuoteRune(cc) + " expected, got " + strconv.QuoteRune(c))) 318 } else { 319 return tokenPunc(p, pp) 320 } 321 } 322 323 func (self *_Tokenizer) read() _Token { 324 var p int 325 var c rune 326 var t _Token 327 328 /* check for EOF */ 329 if self.eof() { 330 return tokenEnd(self.pos, self.pos) 331 } 332 333 /* skip spaces as needed */ 334 if p = self.pos; unicode.IsSpace(self.src[p]) { 335 self.skip(unicode.IsSpace) 336 return tokenSpace(p, self.pos) 337 } 338 339 /* check for line comments */ 340 if p = self.pos; p < len(self.src) - 1 && self.src[p] == '/' && self.src[p + 1] == '/' { 341 self.pos = len(self.src) 342 return tokenEnd(p, self.pos) 343 } 344 345 /* read the next character */ 346 p = self.pos 347 c = self.rch() 348 349 /* parse the next character */ 350 switch c { 351 case '+' : t = tokenPunc(p, _P_plus) 352 case '-' : t = tokenPunc(p, _P_minus) 353 case '*' : t = tokenPunc(p, _P_star) 354 case '/' : t = tokenPunc(p, _P_slash) 355 case '%' : t = tokenPunc(p, _P_percent) 356 case '&' : t = tokenPunc(p, _P_amp) 357 case '|' : t = tokenPunc(p, _P_bar) 358 case '^' : t = tokenPunc(p, _P_caret) 359 case '<' : t = self.rep2(p, _P_shl, '<') 360 case '>' : t = self.rep2(p, _P_shr, '>') 361 case '~' : t = tokenPunc(p, _P_tilde) 362 case '(' : t = tokenPunc(p, _P_lbrk) 363 case ')' : t = tokenPunc(p, _P_rbrk) 364 case '.' : t = tokenPunc(p, _P_dot) 365 case ',' : t = tokenPunc(p, _P_comma) 366 case ':' : t = tokenPunc(p, _P_colon) 367 case '$' : t = tokenPunc(p, _P_dollar) 368 case '#' : t = tokenPunc(p, _P_hash) 369 case '\'' : t = self.chrv(p) 370 default : t = self.defv(p, c) 371 } 372 373 /* mark the end of token */ 374 t.end = self.pos 375 return t 376 } 377 378 func (self *_Tokenizer) next() (tk _Token) { 379 for { 380 if tk = self.read(); tk.tag != _T_space { 381 return 382 } 383 } 384 } 385 386 // LabelKind indicates the type of label reference. 387 type LabelKind int 388 389 // OperandKind indicates the type of the operand. 390 type OperandKind int 391 392 // InstructionPrefix indicates the prefix bytes prepended to the instruction. 393 type InstructionPrefix byte 394 395 const ( 396 // OpImm means the operand is an immediate value. 397 OpImm OperandKind = 1 << iota 398 399 // OpReg means the operand is a register. 400 OpReg 401 402 // OpMem means the operand is a memory address. 403 OpMem 404 405 // OpLabel means the operand is a label, specifically for 406 // branch instructions. 407 OpLabel 408 ) 409 410 const ( 411 // Declaration means the label is a declaration. 412 Declaration LabelKind = iota + 1 413 414 // BranchTarget means the label should be treated as a branch target. 415 BranchTarget 416 417 // RelativeAddress means the label should be treated as a reference to 418 // the code section (e.g. RIP-relative addressing). 419 RelativeAddress 420 ) 421 422 const ( 423 // PrefixLock causes the processor's LOCK# signal to be asserted during execution of 424 // the accompanying instruction (turns the instruction into an atomic instruction). 425 // In a multiprocessor environment, the LOCK# signal insures that the processor 426 // has exclusive use of any shared memory while the signal is asserted. 427 PrefixLock InstructionPrefix = iota 428 429 // PrefixSegmentCS overrides the memory operation of this instruction to CS (Code Segment). 430 PrefixSegmentCS 431 432 // PrefixSegmentDS overrides the memory operation of this instruction to DS (Data Segment), 433 // this is the default section for most instructions if not specified. 434 PrefixSegmentDS 435 436 // PrefixSegmentES overrides the memory operation of this instruction to ES (Extra Segment). 437 PrefixSegmentES 438 439 // PrefixSegmentFS overrides the memory operation of this instruction to FS. 440 PrefixSegmentFS 441 442 // PrefixSegmentGS overrides the memory operation of this instruction to GS. 443 PrefixSegmentGS 444 445 // PrefixSegmentSS overrides the memory operation of this instruction to SS (Stack Segment). 446 PrefixSegmentSS 447 ) 448 449 // ParsedLabel represents a label in the source, either a jump target or 450 // an RIP-relative addressing. 451 type ParsedLabel struct { 452 Name string 453 Kind LabelKind 454 } 455 456 // ParsedOperand represents an operand of an instruction in the source. 457 type ParsedOperand struct { 458 Op OperandKind 459 Imm int64 460 Reg Register 461 Label ParsedLabel 462 Memory MemoryAddress 463 } 464 465 // ParsedInstruction represents an instruction in the source. 466 type ParsedInstruction struct { 467 Mnemonic string 468 Operands []ParsedOperand 469 Prefixes []InstructionPrefix 470 } 471 472 func (self *ParsedInstruction) imm(v int64) { 473 self.Operands = append(self.Operands, ParsedOperand { 474 Op : OpImm, 475 Imm : v, 476 }) 477 } 478 479 func (self *ParsedInstruction) reg(v Register) { 480 self.Operands = append(self.Operands, ParsedOperand { 481 Op : OpReg, 482 Reg : v, 483 }) 484 } 485 486 func (self *ParsedInstruction) mem(v MemoryAddress) { 487 self.Operands = append(self.Operands, ParsedOperand { 488 Op : OpMem, 489 Memory : v, 490 }) 491 } 492 493 func (self *ParsedInstruction) target(v string) { 494 self.Operands = append(self.Operands, ParsedOperand { 495 Op : OpLabel, 496 Label : ParsedLabel { 497 Name: v, 498 Kind: BranchTarget, 499 }, 500 }) 501 } 502 503 func (self *ParsedInstruction) reference(v string) { 504 self.Operands = append(self.Operands, ParsedOperand { 505 Op : OpLabel, 506 Label : ParsedLabel { 507 Name: v, 508 Kind: RelativeAddress, 509 }, 510 }) 511 } 512 513 // LineKind indicates the type of ParsedLine. 514 type LineKind int 515 516 const ( 517 // LineLabel means the ParsedLine is a label. 518 LineLabel LineKind = iota + 1 519 520 // LineInstr means the ParsedLine is an instruction. 521 LineInstr 522 523 // LineCommand means the ParsedLine is a ParsedCommand. 524 LineCommand 525 ) 526 527 // ParsedLine represents a parsed source line. 528 type ParsedLine struct { 529 Row int 530 Src []rune 531 Kind LineKind 532 Label ParsedLabel 533 Command ParsedCommand 534 Instruction ParsedInstruction 535 } 536 537 // ParsedCommand represents a parsed assembly directive command. 538 type ParsedCommand struct { 539 Cmd string 540 Args []ParsedCommandArg 541 } 542 543 // ParsedCommandArg represents an argument of a ParsedCommand. 544 type ParsedCommandArg struct { 545 Value string 546 IsString bool 547 } 548 549 // Parser parses the source, and generates a sequence of ParsedInstruction's. 550 type Parser struct { 551 lex _Tokenizer 552 exp expr.Parser 553 } 554 555 const ( 556 rip Register64 = 0xff 557 ) 558 559 var _RegBranch = map[string]bool { 560 "jmp" : true, 561 "jmpq" : true, 562 "call" : true, 563 "callq" : true, 564 } 565 566 var _SegPrefix = map[string]InstructionPrefix { 567 "cs": PrefixSegmentCS, 568 "ds": PrefixSegmentDS, 569 "es": PrefixSegmentES, 570 "fs": PrefixSegmentFS, 571 "gs": PrefixSegmentGS, 572 "ss": PrefixSegmentSS, 573 } 574 575 func (self *Parser) i32(tk _Token, v int64) int32 { 576 if v >= math.MinInt32 && v <= math.MaxUint32 { 577 return int32(v) 578 } else { 579 panic(self.err(tk.pos, fmt.Sprintf("32-bit integer out ouf range: %d", v))) 580 } 581 } 582 583 func (self *Parser) err(pos int, msg string) *SyntaxError { 584 return &SyntaxError { 585 Pos : pos, 586 Row : self.lex.row, 587 Src : self.lex.src, 588 Reason : msg, 589 } 590 } 591 592 func (self *Parser) negv() int64 { 593 tk := self.lex.read() 594 tt := tk.tag 595 596 /* must be an integer */ 597 if tt != _T_int { 598 panic(self.err(tk.pos, "integer expected after '-'")) 599 } else { 600 return -int64(tk.u64) 601 } 602 } 603 604 func (self *Parser) eval(p int) (r int64) { 605 var e error 606 var v *expr.Expr 607 608 /* searching start */ 609 n := 1 610 q := p + 1 611 612 /* find the end of expression */ 613 for n > 0 && q < len(self.lex.src) { 614 switch self.lex.src[q] { 615 case '(' : q++; n++ 616 case ')' : q++; n-- 617 default : q++ 618 } 619 } 620 621 /* check for EOF */ 622 if n != 0 { 623 panic(self.err(q, "unexpected EOF when parsing expressions")) 624 } 625 626 /* evaluate the expression */ 627 if v, e = self.exp.SetSource(string(self.lex.src[p:q - 1])).Parse(nil); e != nil { 628 panic(self.err(p, "cannot evaluate expression: " + e.Error())) 629 } 630 631 /* evaluate the expression */ 632 if r, e = v.Evaluate(); e != nil { 633 panic(self.err(p, "cannot evaluate expression: " + e.Error())) 634 } 635 636 /* skip the last ')' */ 637 v.Free() 638 self.lex.pos = q 639 return 640 } 641 642 func (self *Parser) relx(tk _Token) { 643 if tk.tag != _T_punc || tk.punc() != _P_lbrk { 644 panic(self.err(tk.pos, "'(' expected for RIP-relative addressing")) 645 } else if tk = self.lex.next(); self.regx(tk) != rip { 646 panic(self.err(tk.pos, "RIP-relative addressing expects %rip as the base register")) 647 } else if tk = self.lex.next(); tk.tag != _T_punc || tk.punc() != _P_rbrk { 648 panic(self.err(tk.pos, "RIP-relative addressing does not support indexing or scaling")) 649 } 650 } 651 652 func (self *Parser) immx(tk _Token) int64 { 653 if tk.tag != _T_punc || tk.punc() != _P_dollar { 654 panic(self.err(tk.pos, "'$' expected for registers")) 655 } else if tk = self.lex.read(); tk.tag == _T_int { 656 return int64(tk.u64) 657 } else if tk.tag == _T_punc && tk.punc() == _P_lbrk { 658 return self.eval(self.lex.pos) 659 } else if tk.tag == _T_punc && tk.punc() == _P_minus { 660 return self.negv() 661 } else { 662 panic(self.err(tk.pos, "immediate value expected")) 663 } 664 } 665 666 func (self *Parser) regx(tk _Token) Register { 667 if tk.tag != _T_punc || tk.punc() != _P_percent { 668 panic(self.err(tk.pos, "'%' expected for registers")) 669 } else if tk = self.lex.read(); tk.tag != _T_name { 670 panic(self.err(tk.pos, "register name expected")) 671 } else if tk.str == "rip" { 672 return rip 673 } else if reg, ok := Registers[tk.str]; ok { 674 return reg 675 } else { 676 panic(self.err(tk.pos, "invalid register name: " + strconv.Quote(tk.str))) 677 } 678 } 679 680 func (self *Parser) regv(tk _Token) Register { 681 if reg := self.regx(tk); reg == rip { 682 panic(self.err(tk.pos, "%rip is not accessable as a dedicated register")) 683 } else { 684 return reg 685 } 686 } 687 688 func (self *Parser) disp(vv int32) MemoryAddress { 689 switch tk := self.lex.next(); tk.tag { 690 case _T_end : return MemoryAddress { Displacement: vv } 691 case _T_punc : return self.relm(tk, vv) 692 default : panic(self.err(tk.pos, "',' or '(' expected")) 693 } 694 } 695 696 func (self *Parser) relm(tv _Token, disp int32) MemoryAddress { 697 var tk _Token 698 var tt _TokenKind 699 700 /* check for absolute addressing */ 701 if tv.punc() == _P_comma { 702 self.lex.pos-- 703 return MemoryAddress { Displacement: disp } 704 } 705 706 /* must be '(' now */ 707 if tv.punc() != _P_lbrk { 708 panic(self.err(tv.pos, "',' or '(' expected")) 709 } 710 711 /* read the next token */ 712 tk = self.lex.next() 713 tt = tk.tag 714 715 /* must be a punctuation */ 716 if tt != _T_punc { 717 panic(self.err(tk.pos, "'%' or ',' expected")) 718 } 719 720 /* check for base */ 721 switch tk.punc() { 722 case _P_percent : return self.base(tk, disp) 723 case _P_comma : return self.index(nil, disp) 724 default : panic(self.err(tk.pos, "'%' or ',' expected")) 725 } 726 } 727 728 func (self *Parser) base(tk _Token, disp int32) MemoryAddress { 729 rr := self.regx(tk) 730 nk := self.lex.next() 731 732 /* check for register indirection or base-index addressing */ 733 if !isReg64(rr) { 734 panic(self.err(tk.pos, "not a valid base register")) 735 } else if nk.tag != _T_punc { 736 panic(self.err(nk.pos, "',' or ')' expected")) 737 } else if nk.punc() == _P_comma { 738 return self.index(rr, disp) 739 } else if nk.punc() == _P_rbrk { 740 return MemoryAddress { Base: rr, Displacement: disp } 741 } else { 742 panic(self.err(nk.pos, "',' or ')' expected")) 743 } 744 } 745 746 func (self *Parser) index(base Register, disp int32) MemoryAddress { 747 tk := self.lex.next() 748 rr := self.regx(tk) 749 nk := self.lex.next() 750 751 /* check for scaled indexing */ 752 if base == rip { 753 panic(self.err(tk.pos, "RIP-relative addressing does not support indexing or scaling")) 754 } else if !isIndexable(rr) { 755 panic(self.err(tk.pos, "not a valid index register")) 756 } else if nk.tag != _T_punc { 757 panic(self.err(nk.pos, "',' or ')' expected")) 758 } else if nk.punc() == _P_comma { 759 return self.scale(base, rr, disp) 760 } else if nk.punc() == _P_rbrk { 761 return MemoryAddress { Base: base, Index: rr, Scale: 1, Displacement: disp } 762 } else { 763 panic(self.err(nk.pos, "',' or ')' expected")) 764 } 765 } 766 767 func (self *Parser) scale(base Register, index Register, disp int32) MemoryAddress { 768 tk := self.lex.next() 769 tt := tk.tag 770 tv := tk.u64 771 772 /* must be an integer */ 773 if tt != _T_int { 774 panic(self.err(tk.pos, "integer expected")) 775 } 776 777 /* scale can only be 1, 2, 4 or 8 */ 778 if tv == 0 || (_Scales & (1 << tv)) == 0 { 779 panic(self.err(tk.pos, "scale can only be 1, 2, 4 or 8")) 780 } 781 782 /* read next token */ 783 tk = self.lex.next() 784 tt = tk.tag 785 786 /* check for the closing ')' */ 787 if tt != _T_punc || tk.punc() != _P_rbrk { 788 panic(self.err(tk.pos, "')' expected")) 789 } 790 791 /* construct the memory address */ 792 return MemoryAddress { 793 Base : base, 794 Index : index, 795 Scale : uint8(tv), 796 Displacement : disp, 797 } 798 } 799 800 func (self *Parser) cmds() *ParsedLine { 801 cmd := "" 802 pos := self.lex.pos 803 buf := []ParsedCommandArg(nil) 804 805 /* find the end of command */ 806 for p := pos; pos < len(self.lex.src); pos++ { 807 if unicode.IsSpace(self.lex.src[pos]) { 808 cmd = string(self.lex.src[p:pos]) 809 break 810 } 811 } 812 813 /* parse the arguments */ 814 loop: for { 815 switch self.next(&pos) { 816 case 0 : break loop 817 case '#' : break loop 818 case '"' : pos = self.strings(&buf, pos) 819 default : pos = self.expressions(&buf, pos) 820 } 821 } 822 823 /* construct the line */ 824 return &ParsedLine { 825 Row : self.lex.row, 826 Src : self.lex.src, 827 Kind : LineCommand, 828 Command : ParsedCommand { 829 Cmd : cmd, 830 Args : buf, 831 }, 832 } 833 } 834 835 func (self *Parser) feed(line string) *ParsedLine { 836 ff := true 837 rr := false 838 lk := false 839 840 /* reset the lexer */ 841 self.lex.row++ 842 self.lex.init(line) 843 844 /* parse the first token */ 845 tk := self.lex.next() 846 tt := tk.tag 847 848 /* it is a directive if it starts with a dot */ 849 if tk.tag == _T_punc && tk.punc() == _P_dot { 850 return self.cmds() 851 } 852 853 /* otherwise it could be labels or instructions */ 854 if tt != _T_name { 855 panic(self.err(tk.pos, "identifier expected")) 856 } 857 858 /* peek the next token */ 859 lex := self.lex 860 tkx := lex.next() 861 862 /* check for labels */ 863 if tkx.tag == _T_punc && tkx.punc() == _P_colon { 864 tkx = lex.next() 865 ttx := tkx.tag 866 867 /* the line must end here */ 868 if ttx != _T_end { 869 panic(self.err(tkx.pos, "garbage after label definition")) 870 } 871 872 /* construct the label */ 873 return &ParsedLine { 874 Row : self.lex.row, 875 Src : self.lex.src, 876 Kind : LineLabel, 877 Label : ParsedLabel { 878 Kind: Declaration, 879 Name: tk.str, 880 }, 881 } 882 } 883 884 /* special case for the "lock" prefix */ 885 if tk.tag == _T_name && strings.ToLower(tk.str) == "lock" { 886 lk = true 887 tk = self.lex.next() 888 889 /* must be an instruction */ 890 if tk.tag != _T_name { 891 panic(self.err(tk.pos, "identifier expected")) 892 } 893 } 894 895 /* set the line kind and mnemonic */ 896 ret := &ParsedLine { 897 Row : self.lex.row, 898 Src : self.lex.src, 899 Kind : LineInstr, 900 Instruction : ParsedInstruction { Mnemonic: strings.ToLower(tk.str) }, 901 } 902 903 /* check for LOCK prefix */ 904 if lk { 905 ret.Instruction.Prefixes = append(ret.Instruction.Prefixes, PrefixLock) 906 } 907 908 /* parse all the operands */ 909 for { 910 tk = self.lex.next() 911 tt = tk.tag 912 913 /* check for end of line */ 914 if tt == _T_end { 915 break 916 } 917 918 /* expect a comma if not the first operand */ 919 if !ff { 920 if tt == _T_punc && tk.punc() == _P_comma { 921 tk = self.lex.next() 922 } else { 923 panic(self.err(tk.pos, "',' expected")) 924 } 925 } 926 927 /* not the first operand anymore */ 928 ff = false 929 tt = tk.tag 930 931 /* encountered an integer, must be a SIB memory address */ 932 if tt == _T_int { 933 ret.Instruction.mem(self.disp(self.i32(tk, int64(tk.u64)))) 934 continue 935 } 936 937 /* encountered an identifier, maybe an expression or a jump target, or a segment override prefix */ 938 if tt == _T_name { 939 ts := tk.str 940 tp := self.lex.pos 941 942 /* if the next token is EOF or a comma, it's a jumpt target */ 943 if tk = self.lex.next(); tk.tag == _T_end || (tk.tag == _T_punc && tk.punc() == _P_comma) { 944 self.lex.pos = tp 945 ret.Instruction.target(ts) 946 continue 947 } 948 949 /* if it is a colon, it's a segment override prefix, otherwise it must be an RIP-relative addressing operand */ 950 if tk.tag != _T_punc || tk.punc() != _P_colon { 951 self.relx(tk) 952 ret.Instruction.reference(ts) 953 continue 954 } 955 956 /* lookup segment prefixes */ 957 if p, ok := _SegPrefix[strings.ToLower(ts)]; !ok { 958 panic(self.err(tk.pos, "invalid segment name")) 959 } else { 960 ret.Instruction.Prefixes = append(ret.Instruction.Prefixes, p) 961 } 962 963 /* read the next token */ 964 tk = self.lex.next() 965 tt = tk.tag 966 967 /* encountered an integer, must be a SIB memory address */ 968 if tt == _T_int { 969 ret.Instruction.mem(self.disp(self.i32(tk, int64(tk.u64)))) 970 continue 971 } 972 } 973 974 /* certain instructions may have a "*" before operands */ 975 if tt == _T_punc && tk.punc() == _P_star { 976 tk = self.lex.next() 977 tt = tk.tag 978 rr = true 979 } 980 981 /* ... otherwise it must be a punctuation */ 982 if tt != _T_punc { 983 panic(self.err(tk.pos, "'$', '%', '-' or '(' expected")) 984 } 985 986 /* check the operator */ 987 switch tk.punc() { 988 case _P_lbrk : break 989 case _P_minus : ret.Instruction.mem(self.disp(self.i32(tk, self.negv()))) ; continue 990 case _P_dollar : ret.Instruction.imm(self.immx(tk)) ; continue 991 case _P_percent : ret.Instruction.reg(self.regv(tk)) ; continue 992 default : panic(self.err(tk.pos, "'$', '%', '-' or '(' expected")) 993 } 994 995 /* special case of '(', might be either `(expr)(SIB)` or just `(SIB)` 996 * read one more token to confirm */ 997 tk = self.lex.next() 998 tt = tk.tag 999 1000 /* the next token is '%', it's a memory address, 1001 * or ',' if it's a memory address without base, 1002 * otherwise it must be in `(expr)(SIB)` form */ 1003 if tk.tag == _T_punc && tk.punc() == _P_percent { 1004 ret.Instruction.mem(self.base(tk, 0)) 1005 } else if tk.tag == _T_punc && tk.punc() == _P_comma { 1006 ret.Instruction.mem(self.index(nil, 0)) 1007 } else { 1008 ret.Instruction.mem(self.disp(self.i32(tk, self.eval(tk.pos)))) 1009 } 1010 } 1011 1012 /* check "jmp" and "call" instructions */ 1013 if !_RegBranch[ret.Instruction.Mnemonic] { 1014 return ret 1015 } else if len(ret.Instruction.Operands) != 1 { 1016 panic(self.err(tk.pos, fmt.Sprintf(`"%s" requires exact 1 argument`, ret.Instruction.Mnemonic))) 1017 } else if !rr && ret.Instruction.Operands[0].Op != OpReg && ret.Instruction.Operands[0].Op != OpLabel { 1018 panic(self.err(tk.pos, fmt.Sprintf(`invalid operand for "%s" instruction`, ret.Instruction.Mnemonic))) 1019 } else { 1020 return ret 1021 } 1022 } 1023 1024 func (self *Parser) next(p *int) rune { 1025 for { 1026 if *p >= len(self.lex.src) { 1027 return 0 1028 } else if cc := self.lex.src[*p]; !unicode.IsSpace(cc) { 1029 return cc 1030 } else { 1031 *p++ 1032 } 1033 } 1034 } 1035 1036 func (self *Parser) delim(p int) int { 1037 if cc := self.next(&p); cc == 0 { 1038 return p 1039 } else if cc == ',' { 1040 return p + 1 1041 } else { 1042 panic(self.err(p, "',' expected")) 1043 } 1044 } 1045 1046 func (self *Parser) strings(argv *[]ParsedCommandArg, p int) int { 1047 var i int 1048 var e error 1049 var v string 1050 1051 /* find the end of string */ 1052 for i = p + 1; i < len(self.lex.src) && self.lex.src[i] != '"'; i++ { 1053 if self.lex.src[i] == '\\' { 1054 i++ 1055 } 1056 } 1057 1058 /* check for EOF */ 1059 if i == len(self.lex.src) { 1060 panic(self.err(i, "unexpected EOF when scanning strings")) 1061 } 1062 1063 /* unquote the string */ 1064 if v, e = strconv.Unquote(string(self.lex.src[p:i + 1])); e != nil { 1065 panic(self.err(p, "invalid string: " + e.Error())) 1066 } 1067 1068 /* add the argument to buffer */ 1069 *argv = append(*argv, ParsedCommandArg { Value: v, IsString: true }) 1070 return self.delim(i + 1) 1071 } 1072 1073 func (self *Parser) directives(line string) { 1074 self.lex.row++ 1075 self.lex.init(line) 1076 1077 /* parse the first token */ 1078 tk := self.lex.next() 1079 tt := tk.tag 1080 1081 /* check for EOF */ 1082 if tt == _T_end { 1083 return 1084 } 1085 1086 /* must be a directive */ 1087 if tt != _T_punc || tk.punc() != _P_hash { 1088 panic(self.err(tk.pos, "'#' expected")) 1089 } 1090 1091 /* parse the line number */ 1092 tk = self.lex.next() 1093 tt = tk.tag 1094 1095 /* must be a line number, if it is, set the row number, and ignore the rest of the line */ 1096 if tt != _T_int { 1097 panic(self.err(tk.pos, "line number expected")) 1098 } else { 1099 self.lex.row = int(tk.u64) - 1 1100 } 1101 } 1102 1103 func (self *Parser) expressions(argv *[]ParsedCommandArg, p int) int { 1104 var i int 1105 var n int 1106 var s int 1107 1108 /* scan until the first standalone ',' or EOF */ 1109 loop: for i = p; i < len(self.lex.src); i++ { 1110 switch self.lex.src[i] { 1111 case ',' : if s == 0 { if n == 0 { break loop } } 1112 case ']', '}', '>' : if s == 0 { if n == 0 { break loop } else { n-- } } 1113 case '[', '{', '<' : if s == 0 { n++ } 1114 case '\\' : if s != 0 { i++ } 1115 case '\'' : if s != 2 { s ^= 1 } 1116 case '"' : if s != 1 { s ^= 2 } 1117 } 1118 } 1119 1120 /* check for EOF in strings */ 1121 if s != 0 { 1122 panic(self.err(i, "unexpected EOF when scanning strings")) 1123 } 1124 1125 /* check for bracket matching */ 1126 if n != 0 { 1127 panic(self.err(i, "unbalanced '{' or '[' or '<'")) 1128 } 1129 1130 /* add the argument to buffer */ 1131 *argv = append(*argv, ParsedCommandArg { Value: string(self.lex.src[p:i]) }) 1132 return self.delim(i) 1133 } 1134 1135 // Feed feeds the parser with one more line, and the parser 1136 // parses it into a ParsedLine. 1137 // 1138 // NOTE: Feed does not handle empty lines or multiple lines, 1139 // it panics when this happens. Use Parse to parse multiple 1140 // lines of assembly source. 1141 // 1142 func (self *Parser) Feed(src string) (ret *ParsedLine, err error) { 1143 var ok bool 1144 var ss string 1145 var vv interface{} 1146 1147 /* check for multiple lines */ 1148 if strings.ContainsRune(src, '\n') { 1149 return nil, errors.New("passing multiple lines to Feed()") 1150 } 1151 1152 /* check for blank lines */ 1153 if ss = strings.TrimSpace(src); ss == "" || ss[0] == '#' || strings.HasPrefix(ss, "//") { 1154 return nil, errors.New("blank line or line with only comments or line-marks") 1155 } 1156 1157 /* setup error handler */ 1158 defer func() { 1159 if vv = recover(); vv != nil { 1160 if err, ok = vv.(*SyntaxError); !ok { 1161 panic(vv) 1162 } 1163 } 1164 }() 1165 1166 /* call the actual parser */ 1167 ret = self.feed(src) 1168 return 1169 } 1170 1171 // Parse parses the entire assembly source (possibly multiple lines) into 1172 // a sequence of *ParsedLine. 1173 func (self *Parser) Parse(src string) (ret []*ParsedLine, err error) { 1174 var ok bool 1175 var ss string 1176 var vv interface{} 1177 1178 /* setup error handler */ 1179 defer func() { 1180 if vv = recover(); vv != nil { 1181 if err, ok = vv.(*SyntaxError); !ok { 1182 panic(vv) 1183 } 1184 } 1185 }() 1186 1187 /* feed every line */ 1188 for _, line := range strings.Split(src, "\n") { 1189 if ss = strings.TrimSpace(line); ss == "" || strings.HasPrefix(ss, "//") { 1190 self.lex.row++ 1191 } else if ss[0] == '#' { 1192 self.directives(line) 1193 } else { 1194 ret = append(ret, self.feed(line)) 1195 } 1196 } 1197 1198 /* all done */ 1199 err = nil 1200 return 1201 } 1202 1203 // Directive handles the directive. 1204 func (self *Parser) Directive(line string) (err error) { 1205 var ok bool 1206 var ss string 1207 var vv interface{} 1208 1209 /* check for directives */ 1210 if ss = strings.TrimSpace(line); ss == "" || ss[0] != '#' { 1211 return errors.New("not a directive") 1212 } 1213 1214 /* setup error handler */ 1215 defer func() { 1216 if vv = recover(); vv != nil { 1217 if err, ok = vv.(*SyntaxError); !ok { 1218 panic(vv) 1219 } 1220 } 1221 }() 1222 1223 /* call the directive parser */ 1224 self.directives(line) 1225 return 1226 } 1227 1228 type _TermRepo struct { 1229 terms map[string]expr.Term 1230 } 1231 1232 func (self *_TermRepo) Get(name string) (expr.Term, error) { 1233 if ret, ok := self.terms[name]; ok { 1234 return ret, nil 1235 } else { 1236 return nil, errors.New("undefined name: " + name) 1237 } 1238 } 1239 1240 func (self *_TermRepo) label(name string) (*Label, error) { 1241 var ok bool 1242 var lb *Label 1243 var tr expr.Term 1244 1245 /* check for existing terms */ 1246 if tr, ok = self.terms[name]; ok { 1247 if lb, ok = tr.(*Label); ok { 1248 return lb, nil 1249 } else { 1250 return nil, errors.New("name is not a label: " + name) 1251 } 1252 } 1253 1254 /* create a new one as needed */ 1255 lb = new(Label) 1256 lb.Name = name 1257 1258 /* create the map if needed */ 1259 if self.terms == nil { 1260 self.terms = make(map[string]expr.Term, 1) 1261 } 1262 1263 /* register the label */ 1264 self.terms[name] = lb 1265 return lb, nil 1266 } 1267 1268 func (self *_TermRepo) define(name string, term expr.Term) { 1269 var ok bool 1270 var tr expr.Term 1271 1272 /* create the map if needed */ 1273 if self.terms == nil { 1274 self.terms = make(map[string]expr.Term, 1) 1275 } 1276 1277 /* check for existing terms */ 1278 if tr, ok = self.terms[name]; !ok { 1279 self.terms[name] = term 1280 } else if _, ok = tr.(*Label); !ok { 1281 self.terms[name] = term 1282 } else { 1283 panic("conflicting term types: " + name) 1284 } 1285 } 1286 1287 // _Command describes an assembler command. 1288 // 1289 // The _Command.args describes both the arity and argument type with characters, 1290 // the length is the number of arguments, the character itself represents the 1291 // argument type. 1292 // 1293 // Possible values are: 1294 // 1295 // s This argument should be a string 1296 // e This argument should be an expression 1297 // ? The next argument is optional, and must be the last argument. 1298 // 1299 type _Command struct { 1300 args string 1301 handler func(*Assembler, *Program, []ParsedCommandArg) error 1302 } 1303 1304 // Options controls the behavior of Assembler. 1305 type Options struct { 1306 // InstructionAliasing specifies whether to enable instruction aliasing. 1307 // Set to true enables instruction aliasing, and the Assembler will try harder to find instructions. 1308 InstructionAliasing bool 1309 1310 // IgnoreUnknownDirectives specifies whether to report errors when encountered unknown directives. 1311 // Set to true ignores all unknwon directives silently, useful for parsing generated assembly. 1312 IgnoreUnknownDirectives bool 1313 } 1314 1315 // Assembler assembles the entire assembly program and generates the corresponding 1316 // machine code representations. 1317 type Assembler struct { 1318 cc int 1319 ps Parser 1320 pc uintptr 1321 buf []byte 1322 main string 1323 opts Options 1324 repo _TermRepo 1325 expr expr.Parser 1326 line *ParsedLine 1327 } 1328 1329 var asmCommands = map[string]_Command { 1330 "org" : { "e" , (*Assembler).assembleCommandOrg }, 1331 "set" : { "ee" , (*Assembler).assembleCommandSet }, 1332 "byte" : { "e" , (*Assembler).assembleCommandByte }, 1333 "word" : { "e" , (*Assembler).assembleCommandWord }, 1334 "long" : { "e" , (*Assembler).assembleCommandLong }, 1335 "quad" : { "e" , (*Assembler).assembleCommandQuad }, 1336 "fill" : { "e?e" , (*Assembler).assembleCommandFill }, 1337 "space" : { "e?e" , (*Assembler).assembleCommandFill }, 1338 "align" : { "e?e" , (*Assembler).assembleCommandAlign }, 1339 "entry" : { "e" , (*Assembler).assembleCommandEntry }, 1340 "ascii" : { "s" , (*Assembler).assembleCommandAscii }, 1341 "asciz" : { "s" , (*Assembler).assembleCommandAsciz }, 1342 "p2align" : { "e?e" , (*Assembler).assembleCommandP2Align }, 1343 } 1344 1345 func (self *Assembler) err(msg string) *SyntaxError { 1346 return &SyntaxError { 1347 Pos : -1, 1348 Row : self.line.Row, 1349 Src : self.line.Src, 1350 Reason : msg, 1351 } 1352 } 1353 1354 func (self *Assembler) eval(expr string) (int64, error) { 1355 if exp, err := self.expr.SetSource(expr).Parse(nil); err != nil { 1356 return 0, err 1357 } else { 1358 return exp.Evaluate() 1359 } 1360 } 1361 1362 func (self *Assembler) checkArgs(i int, n int, v *ParsedCommand, isString bool) error { 1363 if i >= len(v.Args) { 1364 return self.err(fmt.Sprintf("command %s takes exact %d arguments", strconv.Quote(v.Cmd), n)) 1365 } else if isString && !v.Args[i].IsString { 1366 return self.err(fmt.Sprintf("argument %d of command %s must be a string", i + 1, strconv.Quote(v.Cmd))) 1367 } else if !isString && v.Args[i].IsString { 1368 return self.err(fmt.Sprintf("argument %d of command %s must be an expression", i + 1, strconv.Quote(v.Cmd))) 1369 } else { 1370 return nil 1371 } 1372 } 1373 1374 func (self *Assembler) assembleLabel(p *Program, lb *ParsedLabel) error { 1375 if v, err := self.repo.label(lb.Name); err != nil { 1376 return err 1377 } else { 1378 p.Link(v) 1379 return nil 1380 } 1381 } 1382 1383 func (self *Assembler) assembleInstr(p *Program, line *ParsedInstruction) (err error) { 1384 var ok bool 1385 var pfx []byte 1386 var ops []interface{} 1387 var enc _InstructionEncoder 1388 1389 /* convert to lower-case */ 1390 opts := self.opts 1391 name := strings.ToLower(line.Mnemonic) 1392 1393 /* fix register-addressing branches if needed */ 1394 if opts.InstructionAliasing && len(line.Operands) == 1 { 1395 switch { 1396 case name == "retq" : name = "ret" 1397 case name == "movabsq" : name = "movq" 1398 case name == "jmp" && line.Operands[0].Op != OpLabel : name = "jmpq" 1399 case name == "jmpq" && line.Operands[0].Op == OpLabel : name = "jmp" 1400 case name == "call" && line.Operands[0].Op != OpLabel : name = "callq" 1401 case name == "callq" && line.Operands[0].Op == OpLabel : name = "call" 1402 } 1403 } 1404 1405 /* lookup from the alias table if needed */ 1406 if opts.InstructionAliasing { 1407 enc, ok = _InstructionAliases[name] 1408 } 1409 1410 /* lookup from the instruction table */ 1411 if !ok { 1412 enc, ok = Instructions[name] 1413 } 1414 1415 /* remove size suffix if possible */ 1416 if !ok && opts.InstructionAliasing { 1417 switch i := len(name) - 1; name[i] { 1418 case 'b', 'w', 'l', 'q': { 1419 enc, ok = Instructions[name[:i]] 1420 } 1421 } 1422 } 1423 1424 /* check for instruction name */ 1425 if !ok { 1426 return self.err("no such instruction: " + strconv.Quote(name)) 1427 } 1428 1429 /* allocate memory for prefix if any */ 1430 if len(line.Prefixes) != 0 { 1431 pfx = make([]byte, len(line.Prefixes)) 1432 } 1433 1434 /* convert the prefixes */ 1435 for i, v := range line.Prefixes { 1436 switch v { 1437 case PrefixLock : pfx[i] = _P_lock 1438 case PrefixSegmentCS : pfx[i] = _P_cs 1439 case PrefixSegmentDS : pfx[i] = _P_ds 1440 case PrefixSegmentES : pfx[i] = _P_es 1441 case PrefixSegmentFS : pfx[i] = _P_fs 1442 case PrefixSegmentGS : pfx[i] = _P_gs 1443 case PrefixSegmentSS : pfx[i] = _P_ss 1444 default : panic("unreachable: invalid segment prefix") 1445 } 1446 } 1447 1448 /* convert the operands */ 1449 for _, op := range line.Operands { 1450 switch op.Op { 1451 case OpImm : ops = append(ops, op.Imm) 1452 case OpReg : ops = append(ops, op.Reg) 1453 case OpMem : self.assembleInstrMem(&ops, op.Memory) 1454 case OpLabel : self.assembleInstrLabel(&ops, op.Label) 1455 default : panic("parser yields an invalid operand kind") 1456 } 1457 } 1458 1459 /* catch any exceptions in the encoder */ 1460 defer func() { 1461 if v := recover(); v != nil { 1462 err = self.err(fmt.Sprint(v)) 1463 } 1464 }() 1465 1466 /* encode the instruction */ 1467 enc(p, ops...).prefix = pfx 1468 return nil 1469 } 1470 1471 func (self *Assembler) assembleInstrMem(ops *[]interface{}, addr MemoryAddress) { 1472 mem := new(MemoryOperand) 1473 *ops = append(*ops, mem) 1474 1475 /* check for RIP-relative addressing */ 1476 if addr.Base != rip { 1477 mem.Addr.Type = Memory 1478 mem.Addr.Memory = addr 1479 } else { 1480 mem.Addr.Type = Offset 1481 mem.Addr.Offset = RelativeOffset(addr.Displacement) 1482 } 1483 } 1484 1485 func (self *Assembler) assembleInstrLabel(ops *[]interface{}, label ParsedLabel) { 1486 vk := label.Kind 1487 tr, err := self.repo.label(label.Name) 1488 1489 /* check for errors */ 1490 if err != nil { 1491 panic(err) 1492 } 1493 1494 /* check for branch target */ 1495 if vk == BranchTarget { 1496 *ops = append(*ops, tr) 1497 return 1498 } 1499 1500 /* add to ops */ 1501 *ops = append(*ops, &MemoryOperand { 1502 Addr: Addressable { 1503 Type : Reference, 1504 Reference : tr, 1505 }, 1506 }) 1507 } 1508 1509 func (self *Assembler) assembleCommand(p *Program, line *ParsedCommand) error { 1510 var iv int 1511 var cc rune 1512 var ok bool 1513 var va bool 1514 var fn _Command 1515 1516 /* find the command */ 1517 if fn, ok = asmCommands[line.Cmd]; !ok { 1518 if self.opts.IgnoreUnknownDirectives { 1519 return nil 1520 } else { 1521 return self.err("no such command: " + strconv.Quote(line.Cmd)) 1522 } 1523 } 1524 1525 /* expected & real argument count */ 1526 argx := len(fn.args) 1527 argc := len(line.Args) 1528 1529 /* check the arguments */ 1530 loop: for iv, cc = range fn.args { 1531 switch cc { 1532 case '?' : va = true; break loop 1533 case 's' : if err := self.checkArgs(iv, argx, line, true) ; err != nil { return err } 1534 case 'e' : if err := self.checkArgs(iv, argx, line, false) ; err != nil { return err } 1535 default : panic("invalid argument descriptor: " + strconv.Quote(fn.args)) 1536 } 1537 } 1538 1539 /* simple case: non-variadic command */ 1540 if !va { 1541 if argc == argx { 1542 return fn.handler(self, p, line.Args) 1543 } else { 1544 return self.err(fmt.Sprintf("command %s takes exact %d arguments", strconv.Quote(line.Cmd), argx)) 1545 } 1546 } 1547 1548 /* check for the descriptor */ 1549 if iv != argx - 2 { 1550 panic("invalid argument descriptor: " + strconv.Quote(fn.args)) 1551 } 1552 1553 /* variadic command and the final optional argument is set */ 1554 if argc == argx - 1 { 1555 switch fn.args[argx - 1] { 1556 case 's' : if err := self.checkArgs(iv, -1, line, true) ; err != nil { return err } 1557 case 'e' : if err := self.checkArgs(iv, -1, line, false) ; err != nil { return err } 1558 default : panic("invalid argument descriptor: " + strconv.Quote(fn.args)) 1559 } 1560 } 1561 1562 /* check argument count */ 1563 if argc == argx - 1 || argc == argx - 2 { 1564 return fn.handler(self, p, line.Args) 1565 } else { 1566 return self.err(fmt.Sprintf("command %s takes %d or %d arguments", strconv.Quote(line.Cmd), argx - 2, argx - 1)) 1567 } 1568 } 1569 1570 func (self *Assembler) assembleCommandInt(p *Program, argv []ParsedCommandArg, addfn func(*Program, *expr.Expr) *Instruction) error { 1571 var err error 1572 var val *expr.Expr 1573 1574 /* parse the expression */ 1575 if val, err = self.expr.SetSource(argv[0].Value).Parse(&self.repo); err != nil { 1576 return err 1577 } 1578 1579 /* add to the program */ 1580 addfn(p, val) 1581 return nil 1582 } 1583 1584 func (self *Assembler) assembleCommandOrg(_ *Program, argv []ParsedCommandArg) error { 1585 var err error 1586 var val int64 1587 1588 /* evaluate the expression */ 1589 if val, err = self.eval(argv[0].Value); err != nil { 1590 return err 1591 } 1592 1593 /* check for origin */ 1594 if val < 0 { 1595 return self.err(fmt.Sprintf("negative origin: %d", val)) 1596 } 1597 1598 /* ".org" must be the first command if any */ 1599 if self.cc != 1 { 1600 return self.err(".org must be the first command if present") 1601 } 1602 1603 /* set the initial program counter */ 1604 self.pc = uintptr(val) 1605 return nil 1606 } 1607 1608 func (self *Assembler) assembleCommandSet(_ *Program, argv []ParsedCommandArg) error { 1609 var err error 1610 var val *expr.Expr 1611 1612 /* parse the expression */ 1613 if val, err = self.expr.SetSource(argv[1].Value).Parse(&self.repo); err != nil { 1614 return err 1615 } 1616 1617 /* define the new identifier */ 1618 self.repo.define(argv[0].Value, val) 1619 return nil 1620 } 1621 1622 func (self *Assembler) assembleCommandByte(p *Program, argv []ParsedCommandArg) error { 1623 return self.assembleCommandInt(p, argv, (*Program).Byte) 1624 } 1625 1626 func (self *Assembler) assembleCommandWord(p *Program, argv []ParsedCommandArg) error { 1627 return self.assembleCommandInt(p, argv, (*Program).Word) 1628 } 1629 1630 func (self *Assembler) assembleCommandLong(p *Program, argv []ParsedCommandArg) error { 1631 return self.assembleCommandInt(p, argv, (*Program).Long) 1632 } 1633 1634 func (self *Assembler) assembleCommandQuad(p *Program, argv []ParsedCommandArg) error { 1635 return self.assembleCommandInt(p, argv, (*Program).Quad) 1636 } 1637 1638 func (self *Assembler) assembleCommandFill(p *Program, argv []ParsedCommandArg) error { 1639 var fv byte 1640 var nb int64 1641 var ex error 1642 1643 /* evaluate the size */ 1644 if nb, ex = self.eval(argv[0].Value); ex != nil { 1645 return ex 1646 } 1647 1648 /* check for filling size */ 1649 if nb < 0 { 1650 return self.err(fmt.Sprintf("negative filling size: %d", nb)) 1651 } 1652 1653 /* check for optional filling value */ 1654 if len(argv) == 2 { 1655 if val, err := self.eval(argv[1].Value); err != nil { 1656 return err 1657 } else if val < math.MinInt8 || val > math.MaxUint8 { 1658 return self.err(fmt.Sprintf("value %d cannot be represented with a byte", val)) 1659 } else { 1660 fv = byte(val) 1661 } 1662 } 1663 1664 /* fill with specified byte */ 1665 p.Data(bytes.Repeat([]byte { fv }, int(nb))) 1666 return nil 1667 } 1668 1669 func (self *Assembler) assembleCommandAlign(p *Program, argv []ParsedCommandArg) error { 1670 var nb int64 1671 var ex error 1672 var fv *expr.Expr 1673 1674 /* evaluate the size */ 1675 if nb, ex = self.eval(argv[0].Value); ex != nil { 1676 return ex 1677 } 1678 1679 /* check for alignment value */ 1680 if nb <= 0 { 1681 return self.err(fmt.Sprintf("zero or negative alignment: %d", nb)) 1682 } 1683 1684 /* alignment must be a power of 2 */ 1685 if (nb & (nb - 1)) != 0 { 1686 return self.err(fmt.Sprintf("alignment must be a power of 2: %d", nb)) 1687 } 1688 1689 /* check for optional filling value */ 1690 if len(argv) == 2 { 1691 if v, err := self.expr.SetSource(argv[1].Value).Parse(&self.repo); err == nil { 1692 fv = v 1693 } else { 1694 return err 1695 } 1696 } 1697 1698 /* fill with specified byte, default to 0 if not specified */ 1699 p.Align(uint64(nb), fv) 1700 return nil 1701 } 1702 1703 func (self *Assembler) assembleCommandEntry(_ *Program, argv []ParsedCommandArg) error { 1704 name := argv[0].Value 1705 rbuf := []rune(name) 1706 1707 /* check all the characters */ 1708 for i, cc := range rbuf { 1709 if !isident0(cc) && (i == 0 || !isident(cc)) { 1710 return self.err("entry point must be a label name") 1711 } 1712 } 1713 1714 /* set the main entry point */ 1715 self.main = name 1716 return nil 1717 } 1718 1719 func (self *Assembler) assembleCommandAscii(p *Program, argv []ParsedCommandArg) error { 1720 p.Data([]byte(argv[0].Value)) 1721 return nil 1722 } 1723 1724 func (self *Assembler) assembleCommandAsciz(p *Program, argv []ParsedCommandArg) error { 1725 p.Data(append([]byte(argv[0].Value), 0)) 1726 return nil 1727 } 1728 1729 func (self *Assembler) assembleCommandP2Align(p *Program, argv []ParsedCommandArg) error { 1730 var nb int64 1731 var ex error 1732 var fv *expr.Expr 1733 1734 /* evaluate the size */ 1735 if nb, ex = self.eval(argv[0].Value); ex != nil { 1736 return ex 1737 } 1738 1739 /* check for alignment value */ 1740 if nb <= 0 { 1741 return self.err(fmt.Sprintf("zero or negative alignment: %d", nb)) 1742 } 1743 1744 /* check for optional filling value */ 1745 if len(argv) == 2 { 1746 if v, err := self.expr.SetSource(argv[1].Value).Parse(&self.repo); err == nil { 1747 fv = v 1748 } else { 1749 return err 1750 } 1751 } 1752 1753 /* fill with specified byte, default to 0 if not specified */ 1754 p.Align(1 << nb, fv) 1755 return nil 1756 } 1757 1758 // Base returns the origin. 1759 func (self *Assembler) Base() uintptr { 1760 return self.pc 1761 } 1762 1763 // Code returns the assembled machine code. 1764 func (self *Assembler) Code() []byte { 1765 return self.buf 1766 } 1767 1768 // Entry returns the address of the specified entry point, or the origin if not specified. 1769 func (self *Assembler) Entry() uintptr { 1770 if self.main == "" { 1771 return self.pc 1772 } else if tr, err := self.repo.Get(self.main); err != nil { 1773 panic(err) 1774 } else if val, err := tr.Evaluate(); err != nil { 1775 panic(err) 1776 } else { 1777 return uintptr(val) 1778 } 1779 } 1780 1781 // Options returns the internal options reference, changing it WILL affect this Assembler instance. 1782 func (self *Assembler) Options() *Options { 1783 return &self.opts 1784 } 1785 1786 // WithBase resets the origin to pc. 1787 func (self *Assembler) WithBase(pc uintptr) *Assembler { 1788 self.pc = pc 1789 return self 1790 } 1791 1792 // Assemble assembles the assembly source and save the machine code to internal buffer. 1793 func (self *Assembler) Assemble(src string) error { 1794 var err error 1795 var buf []*ParsedLine 1796 1797 /* parse the source */ 1798 if buf, err = self.ps.Parse(src); err != nil { 1799 return err 1800 } 1801 1802 /* create a new program */ 1803 p := DefaultArch.CreateProgram() 1804 defer p.Free() 1805 1806 /* process every line */ 1807 for _, self.line = range buf { 1808 switch self.cc++; self.line.Kind { 1809 case LineLabel : if err = self.assembleLabel (p, &self.line.Label) ; err != nil { return err } 1810 case LineInstr : if err = self.assembleInstr (p, &self.line.Instruction) ; err != nil { return err } 1811 case LineCommand : if err = self.assembleCommand (p, &self.line.Command) ; err != nil { return err } 1812 default : panic("parser yields an invalid line kind") 1813 } 1814 } 1815 1816 /* assemble the program */ 1817 self.buf = p.Assemble(self.pc) 1818 return nil 1819 }