github.com/cloudwego/frugal@v0.1.15/internal/atm/ssa/pass_fusion_amd64.go (about)

     1  /*
     2   * Copyright 2022 ByteDance Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package ssa
    18  
    19  import (
    20      `github.com/cloudwego/frugal/internal/atm/abi`
    21      `github.com/cloudwego/frugal/internal/cpu`
    22      `github.com/cloudwego/frugal/internal/rt`
    23  )
    24  
    25  // Fusion fuses simple instructions into more complex one, to reduce the instruction count.
    26  type Fusion struct{}
    27  
    28  func (Fusion) flagsafe(bb *BasicBlock, ins IrNode) bool {
    29      i := -1
    30      p := IrNode(nil)
    31  
    32      /* find the instruction */
    33      for i, p = range bb.Ins {
    34          if p == ins {
    35              break
    36          }
    37      }
    38  
    39      /* not found, the instruction is in another basic
    40       * block, which cannot guarantee it's flags preserving */
    41      if p != ins {
    42          return false
    43      }
    44  
    45      /* check for instructions after it, only some instructions that are
    46       * known to preserve flags, all other instructions are assumed to clobber */
    47      for _, p = range bb.Ins[i + 1:] {
    48          switch p.(type) {
    49              case *IrAMD64_INT          : break
    50              case *IrAMD64_LEA          : break
    51              case *IrAMD64_BSWAP        : break
    52              case *IrAMD64_MOVSLQ       : break
    53              case *IrAMD64_MOV_abs      : break
    54              case *IrAMD64_MOV_ptr      : break
    55              case *IrAMD64_MOV_reg      : break
    56              case *IrAMD64_MOV_load     : break
    57              case *IrAMD64_MOV_store_r  : break
    58              case *IrAMD64_MOV_store_i  : break
    59              case *IrAMD64_MOV_load_be  : break
    60              case *IrAMD64_MOV_store_be : break
    61              case *IrAMD64_CALL_gcwb    : break
    62              default                    : return false
    63          }
    64      }
    65  
    66      /* everything checked fine */
    67      return true
    68  }
    69  
    70  func (self Fusion) Apply(cfg *CFG) {
    71      done := false
    72      defs := make(map[Reg]IrNode)
    73  
    74      /* leaq {mem}, %r1       ; op {disp}(%r1), %r2             --> op {disp}{mem}, %r2
    75       * leaq {off1}(%r0), %r2 ; op {off2}(%r1,%r2), %r3         --> op {off1+off2}(%r1,%r0), %r3
    76       * leaq {off1}(%r0), %r1 ; op {off2}(%r1,%r2,{scale}), %r3 --> op {off1+off2}(%r0,%r2,{scale}), %r3
    77       * addsub $imm, %r1, %r2 ; op {disp}(%r3,%r2), %r4         --> op {disp+imm}(%r3,%r1), %r4
    78       * movabsq $imm, %r1     ; op {disp}(%r0,%r1,{scale}), %r2 --> op {disp+imm*scale}(%r0), %r2 */
    79      fusemem := func(m *Mem) {
    80          if m.I == Rz {
    81              if ins, ok := defs[m.M].(*IrAMD64_LEA); ok {
    82                  if x := int64(m.D) + int64(ins.M.D); isi32(x) {
    83                      m.M = ins.M.M
    84                      m.I = ins.M.I
    85                      m.S = ins.M.S
    86                      m.D = int32(x)
    87                      done = false
    88                  }
    89              }
    90          } else {
    91              if ins, ok := defs[m.M].(*IrAMD64_LEA); ok && ins.M.I == Rz {
    92                  if x := int64(m.D) + int64(ins.M.D); isi32(x) {
    93                      m.M = ins.M.M
    94                      m.D = int32(x)
    95                      done = false
    96                  }
    97              } else if ins, ok := defs[m.I].(*IrAMD64_LEA); ok && m.S == 1 && ins.M.I == Rz {
    98                  if x := int64(m.D) + int64(ins.M.D); isi32(x) {
    99                      m.I = ins.M.M
   100                      m.D = int32(x)
   101                      done = false
   102                  }
   103              } else if ins, ok := defs[m.I].(*IrAMD64_BinOp_ri); ok && m.S == 1 && ins.Op.IsAdditive() {
   104                  if x := int64(m.D) + int64(ins.Y * ins.Op.ScaleFactor()); isi32(x) {
   105                      m.I = ins.X
   106                      m.D = int32(x)
   107                      done = false
   108                  }
   109              } else if ins, ok := defs[m.I].(*IrAMD64_MOV_abs); ok {
   110                  if x := int64(m.D) + ins.V; isi32(x) {
   111                      m.I = Rz
   112                      m.D = int32(x)
   113                      done = false
   114                  }
   115              }
   116          }
   117      }
   118  
   119      /* retry until no more modifications */
   120      for !done {
   121          done = true
   122          rt.MapClear(defs)
   123  
   124          /* pseudo-definition for zero registers */
   125          defs[Rz] = &IrAMD64_MOV_abs { R: Rz, V: 0 }
   126          defs[Pn] = &IrAMD64_MOV_ptr { R: Rz, P: nil }
   127  
   128          /* check every block */
   129          for _, bb := range cfg.PostOrder().Reversed() {
   130              var r *Reg
   131              var ok bool
   132  
   133              /* mark all the definitions in Phi nodes */
   134              for _, v := range bb.Phi {
   135                  for _, r = range v.Definitions() {
   136                      if _, ok = defs[*r]; !ok {
   137                          defs[*r] = v
   138                      } else if r.Kind() != K_zero {
   139                          panic("register redefined: " + r.String())
   140                      }
   141                  }
   142              }
   143  
   144              /* scan all the instructions */
   145              for i, v := range bb.Ins {
   146                  var m IrAMD64_MemOp
   147                  var d IrDefinitions
   148  
   149                  /* fuse memory addresses in instructions */
   150                  if m, ok = v.(IrAMD64_MemOp); ok {
   151                      fusemem(m.MemOp())
   152                  }
   153  
   154                  /* fuse instructions if possible */
   155                  switch p := v.(type) {
   156                      default: {
   157                          break
   158                      }
   159  
   160                      /* movx {mem}, %r0; bswapx %r0, %r1 --> movbex {mem}, %r1 */
   161                      case *IrAMD64_BSWAP: {
   162                          if ins, ok := defs[p.V].(*IrAMD64_MOV_load); ok && ins.N != 1 && cpu.HasMOVBE {
   163                              done = false
   164                              bb.Ins[i] = &IrAMD64_MOV_load_be { R: p.R, M: ins.M, N: ins.N }
   165                          }
   166                      }
   167  
   168                      /* movq {i32}, %r0; movx %r0, {mem} --> movx {i32}, {mem}
   169                       * movq {p32}, %r0; movx %r0, {mem} --> movx {p32}, {mem}
   170                       * bswapx %r0, %r1; movx %r1, {mm1} --> movbex %r0, {mm1} */
   171                      case *IrAMD64_MOV_store_r: {
   172                          if ins, ok := defs[p.R].(*IrAMD64_MOV_abs); ok && isi32(ins.V) {
   173                              done = false
   174                              bb.Ins[i] = &IrAMD64_MOV_store_i { V: int32(ins.V), M: p.M, N: p.N }
   175                          } else if ins, ok := defs[p.R].(*IrAMD64_MOV_ptr); ok && isp32(ins.P) && p.N == abi.PtrSize {
   176                              done = false
   177                              bb.Ins[i] = &IrAMD64_MOV_store_p { P: ins.P, M: p.M }
   178                          } else if ins, ok := defs[p.R].(*IrAMD64_BSWAP); ok && p.N != 1 && cpu.HasMOVBE {
   179                              done = false
   180                              bb.Ins[i] = &IrAMD64_MOV_store_be { R: ins.V, M: p.M, N: p.N }
   181                          }
   182                      }
   183  
   184                      /* movq {i32}, %r0; binop %r0, %r1 --> binop {i32}, %r1
   185                       * movq {mem}, %r0; binop %r0, %r1 --> binop {mem}, %r1 */
   186                      case *IrAMD64_BinOp_rr: {
   187                          if ins, ok := defs[p.Y].(*IrAMD64_MOV_abs); ok && isi32(ins.V) {
   188                              done = false
   189                              bb.Ins[i] = &IrAMD64_BinOp_ri { R: p.R, X: p.X, Y: int32(ins.V), Op: p.Op }
   190                          } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_load); ok && ins.N == abi.PtrSize {
   191                              done = false
   192                              bb.Ins[i] = &IrAMD64_BinOp_rm { R: p.R, X: p.X, Y: ins.M, Op: p.Op }
   193                          }
   194                      }
   195  
   196                      /* movq {u8}, %r0; btsq %r0, %r1; setc %r2 --> btsq {u8}, %r1; setc %r2 */
   197                      case *IrAMD64_BTSQ_rr: {
   198                          if ins, ok := defs[p.Y].(*IrAMD64_MOV_abs); ok && isu8(ins.V) {
   199                              done = false
   200                              bb.Ins[i] = &IrAMD64_BTSQ_ri { T: p.T, S: p.S, X: p.X, Y: uint8(ins.V) }
   201                          }
   202                      }
   203  
   204                      /* movq {i32}, %r0; cmpq %r0, %r1 --> cmpq {i32}, %r1
   205                       * movx {ptr}, %p0; cmpq %p0, %p1 --> cmpq {ptr}, %p1
   206                       * movq {mem}, %r0; cmpq %r0, %r1 --> cmpx {mem}, %r1
   207                       * movq {i32}, %r1; cmpq %r0, %r1 --> cmpq %r0, {i32}
   208                       * movq {ptr}, %p1; cmpq %p0, %p1 --> cmpq %p0, {ptr}
   209                       * movx {mem}, %r1; cmpq %r0, %r1 --> cmpx %r0, {mem} */
   210                      case *IrAMD64_CMPQ_rr: {
   211                          if ins, ok := defs[p.X].(*IrAMD64_MOV_abs); ok && isi32(ins.V) {
   212                              done = false
   213                              bb.Ins[i] = &IrAMD64_CMPQ_ir { R: p.R, X: int32(ins.V), Y: p.Y, Op: p.Op }
   214                          } else if ins, ok := defs[p.X].(*IrAMD64_MOV_ptr); ok {
   215                              done = false
   216                              bb.Ins[i] = &IrAMD64_CMPQ_pr { R: p.R, X: ins.P, Y: p.Y, Op: p.Op }
   217                          } else if ins, ok := defs[p.X].(*IrAMD64_MOV_load); ok && ins.N != 16 {
   218                              done = false
   219                              bb.Ins[i] = &IrAMD64_CMPQ_mr { R: p.R, X: ins.M, Y: p.Y, Op: p.Op, N: ins.N }
   220                          } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_abs); ok && isi32(ins.V) {
   221                              done = false
   222                              bb.Ins[i] = &IrAMD64_CMPQ_ri { R: p.R, X: p.X, Y: int32(ins.V), Op: p.Op }
   223                          } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_ptr); ok {
   224                              done = false
   225                              bb.Ins[i] = &IrAMD64_CMPQ_rp { R: p.R, X: p.X, Y: ins.P, Op: p.Op }
   226                          } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_load); ok && ins.N != 16 {
   227                              done = false
   228                              bb.Ins[i] = &IrAMD64_CMPQ_rm { R: p.R, X: p.X, Y: ins.M, Op: p.Op, N: ins.N }
   229                          }
   230                      }
   231  
   232                      /* movq {i32}, %r0; cmpx %r0, {mem} --> cmpx {i32}, {mem}
   233                       * movq {p32}, %p0; cmpq %p0, {mem} --> cmpq {p32}, {mem} */
   234                      case *IrAMD64_CMPQ_rm: {
   235                          if ins, ok := defs[p.X].(*IrAMD64_MOV_abs); ok && isi32(ins.V) {
   236                              done = false
   237                              bb.Ins[i] = &IrAMD64_CMPQ_im { R: p.R, X: int32(ins.V), Y: p.Y, Op: p.Op, N: p.N }
   238                          } else if ins, ok := defs[p.X].(*IrAMD64_MOV_ptr); ok && isp32(ins.P) && p.N == abi.PtrSize {
   239                              done = false
   240                              bb.Ins[i] = &IrAMD64_CMPQ_pm { R: p.R, X: ins.P, Y: p.Y, Op: p.Op }
   241                          }
   242                      }
   243  
   244                      /* movq {i32}, %r0; cmpx {mem}, %r0 --> cmpx {mem}, {i32}
   245                       * movq {p32}, %p0; cmpq {mem}, %p0 --> cmpq {mem}, {p32} */
   246                      case *IrAMD64_CMPQ_mr: {
   247                          if ins, ok := defs[p.Y].(*IrAMD64_MOV_abs); ok && isi32(ins.V) {
   248                              done = false
   249                              bb.Ins[i] = &IrAMD64_CMPQ_mi { R: p.R, X: p.X, Y: int32(ins.V), Op: p.Op, N: p.N }
   250                          } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_ptr); ok && isp32(ins.P) && p.N == abi.PtrSize {
   251                              done = false
   252                              bb.Ins[i] = &IrAMD64_CMPQ_mp { R: p.R, X: p.X, Y: ins.P, Op: p.Op }
   253                          }
   254                      }
   255                  }
   256  
   257                  /* mark all the definitions in instructions */
   258                  if d, ok = v.(IrDefinitions); ok {
   259                      for _, r = range d.Definitions() {
   260                          if _, ok = defs[*r]; !ok {
   261                              defs[*r] = v
   262                          } else if r.Kind() != K_zero {
   263                              panic("register redefined: " + r.String())
   264                          }
   265                      }
   266                  }
   267              }
   268  
   269              /* fuse memory operation in terminators */
   270              if m, ok := bb.Term.(IrAMD64_MemOp); ok {
   271                  fusemem(m.MemOp())
   272              }
   273  
   274              /* fuse terminators if possible */
   275              switch p := bb.Term.(type) {
   276                  default: {
   277                      break
   278                  }
   279  
   280                  /* movq {i32}, %r0; cmpq %r0, %r1; jcc {label} --> cmpq {i32}, %r1; jcc {label}
   281                   * movq {ptr}, %p0; cmpq %p0, %p1; jcc {label} --> cmpq {ptr}, %p1; jcc {label}
   282                   * movx {mem}, %r0; cmpq %r0, %r1; jcc {label} --> cmpx {mem}, %r1; jcc {label}
   283                   * movq {i32}, %r1; cmpq %r0, %r1; jcc {label} --> cmpq %r0, {i32}; jcc {label}
   284                   * movq {ptr}, %p1; cmpq %p0, %p1; jcc {label} --> cmpq %p0, {ptr}; jcc {label}
   285                   * movx {mem}, %r1; cmpq %r0, %r1; jcc {label} --> cmpx %r0, {mem}; jcc {label} */
   286                  case *IrAMD64_Jcc_rr: {
   287                      if ins, ok := defs[p.X].(*IrAMD64_MOV_abs); ok && isi32(ins.V) {
   288                          done = false
   289                          bb.Term = &IrAMD64_Jcc_ir { X: int32(ins.V), Y: p.Y, To: p.To, Ln: p.Ln, Op: p.Op }
   290                      } else if ins, ok := defs[p.X].(*IrAMD64_MOV_ptr); ok {
   291                          done = false
   292                          bb.Term = &IrAMD64_Jcc_pr { X: ins.P, Y: p.Y, To: p.To, Ln: p.Ln, Op: p.Op }
   293                      } else if ins, ok := defs[p.X].(*IrAMD64_MOV_load); ok {
   294                          done = false
   295                          bb.Term = &IrAMD64_Jcc_mr { X: ins.M, Y: p.Y, To: p.To, Ln: p.Ln, Op: p.Op, N: ins.N }
   296                      } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_abs); ok && isi32(ins.V) {
   297                          done = false
   298                          bb.Term = &IrAMD64_Jcc_ri { X: p.X, Y: int32(ins.V), To: p.To, Ln: p.Ln, Op: p.Op }
   299                      } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_ptr); ok {
   300                          done = false
   301                          bb.Term = &IrAMD64_Jcc_rp { X: p.X, Y: ins.P, To: p.To, Ln: p.Ln, Op: p.Op }
   302                      } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_load); ok {
   303                          done = false
   304                          bb.Term = &IrAMD64_Jcc_rm { X: p.X, Y: ins.M, To: p.To, Ln: p.Ln, Op: p.Op, N: ins.N }
   305                      }
   306                  }
   307  
   308                  /* setcc %r0; cmpq %r0, $0; je {label} --> jncc {label} */
   309                  case *IrAMD64_Jcc_ri: {
   310                      if p.Y == 0 && p.Op == IrAMD64_CmpEq {
   311                          if ins, ok := defs[p.X].(*IrAMD64_CMPQ_rr); ok {
   312                              done = false
   313                              bb.Term = &IrAMD64_Jcc_rr { X: ins.X, Y: ins.Y, To: p.To, Ln: p.Ln, Op: ins.Op.Negated() }
   314                          } else if ins, ok := defs[p.X].(*IrAMD64_CMPQ_rm); ok {
   315                              done = false
   316                              bb.Term = &IrAMD64_Jcc_rm { X: ins.X, Y: ins.Y, To: p.To, Ln: p.Ln, Op: ins.Op.Negated(), N: ins.N }
   317                          } else if ins, ok := defs[p.X].(*IrAMD64_CMPQ_mr); ok {
   318                              done = false
   319                              bb.Term = &IrAMD64_Jcc_mr { X: ins.X, Y: ins.Y, To: p.To, Ln: p.Ln, Op: ins.Op.Negated(), N: ins.N }
   320                          } else if ins, ok := defs[p.X].(*IrAMD64_BTSQ_rr); ok && p.X == ins.T && self.flagsafe(bb, ins) {
   321                              done = false
   322                              bb.Term, ins.T = &IrAMD64_JNC { To: p.To, Ln: p.Ln }, Rz
   323                          } else if ins, ok := defs[p.X].(*IrAMD64_BTSQ_ri); ok && p.X == ins.T && self.flagsafe(bb, ins) {
   324                              done = false
   325                              bb.Term, ins.T = &IrAMD64_JNC { To: p.To, Ln: p.Ln }, Rz
   326                          }
   327                      }
   328                  }
   329  
   330                  /* setcc %r0; cmpq $0, %r0; je {label} --> jncc {label} */
   331                  case *IrAMD64_Jcc_ir: {
   332                      if p.Y == 0 && p.Op == IrAMD64_CmpEq {
   333                          if ins, ok := defs[p.Y].(*IrAMD64_CMPQ_rr); ok {
   334                              done = false
   335                              bb.Term = &IrAMD64_Jcc_rr { X: ins.X, Y: ins.Y, To: p.To, Ln: p.Ln, Op: ins.Op.Negated() }
   336                          } else if ins, ok := defs[p.Y].(*IrAMD64_CMPQ_rm); ok {
   337                              done = false
   338                              bb.Term = &IrAMD64_Jcc_rm { X: ins.X, Y: ins.Y, To: p.To, Ln: p.Ln, Op: ins.Op.Negated(), N: ins.N }
   339                          } else if ins, ok := defs[p.Y].(*IrAMD64_CMPQ_mr); ok {
   340                              done = false
   341                              bb.Term = &IrAMD64_Jcc_mr { X: ins.X, Y: ins.Y, To: p.To, Ln: p.Ln, Op: ins.Op.Negated(), N: ins.N }
   342                          } else if ins, ok := defs[p.Y].(*IrAMD64_BTSQ_rr); ok && p.Y == ins.T && self.flagsafe(bb, ins) {
   343                              done = false
   344                              bb.Term, ins.T = &IrAMD64_JNC { To: p.To, Ln: p.Ln }, Rz
   345                          } else if ins, ok := defs[p.Y].(*IrAMD64_BTSQ_ri); ok && p.Y == ins.T && self.flagsafe(bb, ins) {
   346                              done = false
   347                              bb.Term, ins.T = &IrAMD64_JNC { To: p.To, Ln: p.Ln }, Rz
   348                          }
   349                      }
   350                  }
   351  
   352                  /* movq {i32}, %r0; cmpq %r0, {mem}; jcc {label} --> cmpq {i32}, {mem}; jcc {label}
   353                   * movq {p32}, %p0; cmpq %p0, {mem}; jcc {label} --> cmpq {p32}, {mem}; jcc {label} */
   354                  case *IrAMD64_Jcc_rm: {
   355                      if ins, ok := defs[p.X].(*IrAMD64_MOV_abs); ok && isi32(ins.V) {
   356                          done = false
   357                          bb.Term = &IrAMD64_Jcc_im { X: int32(ins.V), Y: p.Y, To: p.To, Ln: p.Ln, Op: p.Op, N: p.N }
   358                      } else if ins, ok := defs[p.X].(*IrAMD64_MOV_ptr); ok && isp32(ins.P) && p.N == abi.PtrSize {
   359                          done = false
   360                          bb.Term = &IrAMD64_Jcc_pm { X: ins.P, Y: p.Y, To: p.To, Ln: p.Ln, Op: p.Op }
   361                      }
   362                  }
   363  
   364                  /* movq {i32}, %r0; cmpq {mem}, %r0; jcc {label} --> cmpq {mem}, {i32}; jcc {label}
   365                   * movq {p32}, %p0; cmpq {mem}, %p0; jcc {label} --> cmpq {mem}, {p32}; jcc {label} */
   366                  case *IrAMD64_Jcc_mr: {
   367                      if ins, ok := defs[p.Y].(*IrAMD64_MOV_abs); ok && isi32(ins.V) {
   368                          done = false
   369                          bb.Term = &IrAMD64_Jcc_mi { X: p.X, Y: int32(ins.V), To: p.To, Ln: p.Ln, Op: p.Op, N: p.N }
   370                      } else if ins, ok := defs[p.Y].(*IrAMD64_MOV_ptr); ok && isp32(ins.P) && p.N == abi.PtrSize {
   371                          done = false
   372                          bb.Term = &IrAMD64_Jcc_mp { X: p.X, Y: ins.P, To: p.To, Ln: p.Ln, Op: p.Op }
   373                      }
   374                  }
   375              }
   376          }
   377  
   378          /* perform TDCE & reorder after each round */
   379          new(TDCE).Apply(cfg)
   380          new(Reorder).Apply(cfg)
   381      }
   382  }