github.com/cloudwego/frugal@v0.1.15/internal/atm/rtx/clobber_amd64.go (about)

     1  /*
     2   * Copyright 2022 ByteDance Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package rtx
    18  
    19  import (
    20      `unsafe`
    21  
    22      `github.com/cloudwego/iasm/x86_64`
    23      `github.com/cloudwego/frugal/internal/rt`
    24      `github.com/oleiade/lane`
    25      `golang.org/x/arch/x86/x86asm`
    26  )
    27  
    28  var branchTable = map[x86asm.Op]bool {
    29      x86asm.JA    : true,
    30      x86asm.JAE   : true,
    31      x86asm.JB    : true,
    32      x86asm.JBE   : true,
    33      x86asm.JCXZ  : true,
    34      x86asm.JE    : true,
    35      x86asm.JECXZ : true,
    36      x86asm.JG    : true,
    37      x86asm.JGE   : true,
    38      x86asm.JL    : true,
    39      x86asm.JLE   : true,
    40      x86asm.JMP   : true,
    41      x86asm.JNE   : true,
    42      x86asm.JNO   : true,
    43      x86asm.JNP   : true,
    44      x86asm.JNS   : true,
    45      x86asm.JO    : true,
    46      x86asm.JP    : true,
    47      x86asm.JRCXZ : true,
    48      x86asm.JS    : true,
    49  }
    50  
    51  var registerTable = map[x86asm.Reg]x86_64.Register64 {
    52      x86asm.AL   : x86_64.RAX,
    53      x86asm.CL   : x86_64.RCX,
    54      x86asm.DL   : x86_64.RDX,
    55      x86asm.BL   : x86_64.RBX,
    56      x86asm.AH   : x86_64.RAX,
    57      x86asm.CH   : x86_64.RCX,
    58      x86asm.DH   : x86_64.RDX,
    59      x86asm.BH   : x86_64.RBX,
    60      x86asm.SPB  : x86_64.RSP,
    61      x86asm.BPB  : x86_64.RBP,
    62      x86asm.SIB  : x86_64.RSI,
    63      x86asm.DIB  : x86_64.RDI,
    64      x86asm.R8B  : x86_64.R8,
    65      x86asm.R9B  : x86_64.R9,
    66      x86asm.R10B : x86_64.R10,
    67      x86asm.R11B : x86_64.R11,
    68      x86asm.R12B : x86_64.R12,
    69      x86asm.R13B : x86_64.R13,
    70      x86asm.R14B : x86_64.R14,
    71      x86asm.R15B : x86_64.R15,
    72      x86asm.AX   : x86_64.RAX,
    73      x86asm.CX   : x86_64.RCX,
    74      x86asm.DX   : x86_64.RDX,
    75      x86asm.BX   : x86_64.RBX,
    76      x86asm.SP   : x86_64.RSP,
    77      x86asm.BP   : x86_64.RBP,
    78      x86asm.SI   : x86_64.RSI,
    79      x86asm.DI   : x86_64.RDI,
    80      x86asm.R8W  : x86_64.R8,
    81      x86asm.R9W  : x86_64.R9,
    82      x86asm.R10W : x86_64.R10,
    83      x86asm.R11W : x86_64.R11,
    84      x86asm.R12W : x86_64.R12,
    85      x86asm.R13W : x86_64.R13,
    86      x86asm.R14W : x86_64.R14,
    87      x86asm.R15W : x86_64.R15,
    88      x86asm.EAX  : x86_64.RAX,
    89      x86asm.ECX  : x86_64.RCX,
    90      x86asm.EDX  : x86_64.RDX,
    91      x86asm.EBX  : x86_64.RBX,
    92      x86asm.ESP  : x86_64.RSP,
    93      x86asm.EBP  : x86_64.RBP,
    94      x86asm.ESI  : x86_64.RSI,
    95      x86asm.EDI  : x86_64.RDI,
    96      x86asm.R8L  : x86_64.R8,
    97      x86asm.R9L  : x86_64.R9,
    98      x86asm.R10L : x86_64.R10,
    99      x86asm.R11L : x86_64.R11,
   100      x86asm.R12L : x86_64.R12,
   101      x86asm.R13L : x86_64.R13,
   102      x86asm.R14L : x86_64.R14,
   103      x86asm.R15L : x86_64.R15,
   104      x86asm.RAX  : x86_64.RAX,
   105      x86asm.RCX  : x86_64.RCX,
   106      x86asm.RDX  : x86_64.RDX,
   107      x86asm.RBX  : x86_64.RBX,
   108      x86asm.RSP  : x86_64.RSP,
   109      x86asm.RBP  : x86_64.RBP,
   110      x86asm.RSI  : x86_64.RSI,
   111      x86asm.RDI  : x86_64.RDI,
   112      x86asm.R8   : x86_64.R8,
   113      x86asm.R9   : x86_64.R9,
   114      x86asm.R10  : x86_64.R10,
   115      x86asm.R11  : x86_64.R11,
   116      x86asm.R12  : x86_64.R12,
   117      x86asm.R13  : x86_64.R13,
   118      x86asm.R14  : x86_64.R14,
   119      x86asm.R15  : x86_64.R15,
   120  }
   121  
   122  var freeRegisters = map[x86_64.Register64]bool {
   123      x86_64.RAX: true,
   124      x86_64.RSI: true,
   125      x86_64.RDI: true,
   126  }
   127  
   128  type _InstrBlock struct {
   129      ret    bool
   130      size   uintptr
   131      entry  unsafe.Pointer
   132      links [2]*_InstrBlock
   133  }
   134  
   135  func newInstrBlock(entry unsafe.Pointer) *_InstrBlock {
   136      return &_InstrBlock{entry: entry}
   137  }
   138  
   139  func (self *_InstrBlock) pc() unsafe.Pointer {
   140      return unsafe.Pointer(uintptr(self.entry) + self.size)
   141  }
   142  
   143  func (self *_InstrBlock) code() []byte {
   144      return rt.BytesFrom(self.pc(), 15, 15)
   145  }
   146  
   147  func (self *_InstrBlock) commit(size int) {
   148      self.size += uintptr(size)
   149  }
   150  
   151  func resolveClobberSet(fn interface{}) map[x86_64.Register64]bool {
   152      buf := lane.NewQueue()
   153      ret := make(map[x86_64.Register64]bool)
   154      bmp := make(map[unsafe.Pointer]*_InstrBlock)
   155  
   156      /* build the CFG with BFS */
   157      for buf.Enqueue(newInstrBlock(rt.FuncAddr(fn))); !buf.Empty(); {
   158          val := buf.Dequeue()
   159          cfg := val.(*_InstrBlock)
   160  
   161          /* parse every instruction in the block */
   162          for !cfg.ret {
   163              var err error
   164              var ins x86asm.Inst
   165  
   166              /* decode one instruction */
   167              if ins, err = x86asm.Decode(cfg.code(), 64); err != nil {
   168                  panic(err)
   169              } else {
   170                  cfg.commit(ins.Len)
   171              }
   172  
   173              /* calling to other functions, cannot analyze */
   174              if ins.Op == x86asm.CALL {
   175                  return nil
   176              }
   177  
   178              /* simple algorithm: every write to register is treated as clobbering */
   179              if ins.Op == x86asm.MOV {
   180                  if reg, ok := ins.Args[0].(x86asm.Reg); ok {
   181                      if rr, rok := registerTable[reg]; rok && !freeRegisters[rr] {
   182                          ret[rr] = true
   183                      }
   184                  }
   185              }
   186  
   187              /* check for returns */
   188              if ins.Op == x86asm.RET {
   189                  cfg.ret = true
   190                  break
   191              }
   192  
   193              /* check for branches */
   194              if !branchTable[ins.Op] {
   195                  continue
   196              }
   197  
   198              /* calculate branch address */
   199              links := [2]unsafe.Pointer {
   200                  cfg.pc(),
   201                  unsafe.Pointer(uintptr(cfg.pc()) + uintptr(ins.Args[0].(x86asm.Rel))),
   202              }
   203  
   204              /* link the next blocks */
   205              for i := 0; i < 2; i++ {
   206                  if cfg.links[i] = bmp[links[i]]; cfg.links[i] == nil {
   207                      cfg.links[i] = newInstrBlock(links[i])
   208                      bmp[links[i]] = cfg.links[i]
   209                  }
   210              }
   211  
   212              /* add the branches if not returned, if either one returns, mark the block returned */
   213              for i := 0; i < 2; i++ {
   214                  if cfg.links[i].ret {
   215                      cfg.ret = true
   216                  } else {
   217                      buf.Enqueue(cfg.links[i])
   218                  }
   219              }
   220          }
   221      }
   222  
   223      /* all done */
   224      return ret
   225  }