github.com/consensys/gnark-crypto@v0.14.0/field/generator/asm/amd64/asm_macros.go (about)

     1  // Copyright 2020 ConsenSys Software Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package amd64
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"text/template"
    21  
    22  	"github.com/consensys/bavard/amd64"
    23  )
    24  
    25  // LabelRegisters write comment with friendler name to registers
    26  func (f *FFAmd64) LabelRegisters(name string, r ...amd64.Register) {
    27  	switch len(r) {
    28  	case 0:
    29  		return
    30  	case 1:
    31  		f.Comment(fmt.Sprintf("%s -> %s", name, string(r[0])))
    32  	default:
    33  		for i := 0; i < len(r); i++ {
    34  			f.Comment(fmt.Sprintf("%s[%d] -> %s", name, i, string(r[i])))
    35  		}
    36  	}
    37  	// f.WriteLn("")
    38  }
    39  
    40  func (f *FFAmd64) ReduceElement(t, scratch []amd64.Register) {
    41  	if len(t) != len(scratch) {
    42  		panic("invalid call")
    43  	}
    44  
    45  	const tmplReduce = `// reduce element({{- range $i, $a := .A}}{{$a}}{{- if ne $.Last $i}},{{ end}}{{- end}}) using temp registers ({{- range $i, $b := .B}}{{$b}}{{- if ne $.Last $i}},{{ end}}{{- end}})
    46  	REDUCE({{- range $i, $a := .A}}{{$a}},{{- end}}
    47  		{{- range $i, $b := .B}}{{$b}}{{- if ne $.Last $i}},{{ end}}{{- end}})`
    48  
    49  	var buf bytes.Buffer
    50  	err := template.Must(template.New("").
    51  		Parse(tmplReduce)).Execute(&buf, struct {
    52  		A, B []amd64.Register
    53  		Last int
    54  	}{t, scratch, len(scratch) - 1})
    55  
    56  	if err != nil {
    57  		panic(err)
    58  	}
    59  
    60  	f.WriteLn(buf.String())
    61  	f.WriteLn("")
    62  }
    63  
    64  // TODO @gbotrel: figure out if interleaving MOVQ and SUBQ or CMOVQ and MOVQ instructions makes sense
    65  const tmplDefines = `
    66  
    67  // modulus q
    68  {{- range $i, $w := .Q}}
    69  DATA q<>+{{mul $i 8}}(SB)/8, {{imm $w}}
    70  {{- end}}
    71  GLOBL q<>(SB), (RODATA+NOPTR), ${{mul 8 $.NbWords}}
    72  
    73  // qInv0 q'[0]
    74  DATA qInv0<>(SB)/8, {{$qinv0 := index .QInverse 0}}{{imm $qinv0}}
    75  GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
    76  
    77  #define REDUCE(	{{- range $i := .NbWordsIndexesFull}}ra{{$i}},{{- end}}
    78  				{{- range $i := .NbWordsIndexesFull}}rb{{$i}}{{- if ne $.NbWordsLastIndex $i}},{{- end}}{{- end}}) \
    79  	MOVQ ra0, rb0;  \
    80  	SUBQ    q<>(SB), ra0; \
    81  	{{- range $i := .NbWordsIndexesNoZero}}
    82  	MOVQ ra{{$i}}, rb{{$i}};  \
    83  	SBBQ  q<>+{{mul $i 8}}(SB), ra{{$i}}; \
    84  	{{- end}}
    85  	{{- range $i := .NbWordsIndexesFull}}
    86  	CMOVQCS rb{{$i}}, ra{{$i}};  \
    87  	{{- end}}
    88  
    89  
    90  `
    91  
    92  func (f *FFAmd64) GenerateDefines() {
    93  	tmpl := template.Must(template.New("").
    94  		Funcs(helpers()).
    95  		Parse(tmplDefines))
    96  
    97  	// execute template
    98  	var buf bytes.Buffer
    99  	if err := tmpl.Execute(&buf, f); err != nil {
   100  		panic(err)
   101  	}
   102  
   103  	f.WriteLn(buf.String())
   104  }
   105  
   106  func (f *FFAmd64) Mov(i1, i2 interface{}, offsets ...int) {
   107  	var o1, o2 int
   108  	if len(offsets) >= 1 {
   109  		o1 = offsets[0]
   110  		if len(offsets) >= 2 {
   111  			o2 = offsets[1]
   112  		}
   113  	}
   114  	switch c1 := i1.(type) {
   115  	case []uint64:
   116  		switch c2 := i2.(type) {
   117  		default:
   118  			panic("unsupported")
   119  		case []amd64.Register:
   120  			for i := 0; i < f.NbWords; i++ {
   121  				f.MOVQ(c1[i+o1], c2[i+o2])
   122  			}
   123  		}
   124  	case amd64.Register:
   125  		switch c2 := i2.(type) {
   126  		case amd64.Register:
   127  			for i := 0; i < f.NbWords; i++ {
   128  				f.MOVQ(c1.At(i+o1), c2.At(i+o2))
   129  			}
   130  		case []amd64.Register:
   131  			for i := 0; i < f.NbWords; i++ {
   132  				f.MOVQ(c1.At(i+o1), c2[i+o2])
   133  			}
   134  		default:
   135  			panic("unsupported")
   136  		}
   137  	case []amd64.Register:
   138  		switch c2 := i2.(type) {
   139  		case amd64.Register:
   140  			for i := 0; i < f.NbWords; i++ {
   141  				f.MOVQ(c1[i+o1], c2.At(i+o2))
   142  			}
   143  		case []amd64.Register:
   144  			// f.copyElement(c1[o1:], c2[o2:])
   145  			for i := 0; i < f.NbWords; i++ {
   146  				f.MOVQ(c1[i+o1], c2[i+o2])
   147  			}
   148  		default:
   149  			panic("unsupported")
   150  		}
   151  	default:
   152  		panic("unsupported")
   153  	}
   154  
   155  }
   156  
   157  func (f *FFAmd64) Add(i1, i2 interface{}, offsets ...int) {
   158  	var o1, o2 int
   159  	if len(offsets) >= 1 {
   160  		o1 = offsets[0]
   161  		if len(offsets) >= 2 {
   162  			o2 = offsets[1]
   163  		}
   164  	}
   165  	switch c1 := i1.(type) {
   166  
   167  	case amd64.Register:
   168  		switch c2 := i2.(type) {
   169  		default:
   170  			panic("unsupported")
   171  		case []amd64.Register:
   172  			for i := 0; i < f.NbWords; i++ {
   173  				if i == 0 {
   174  					f.ADDQ(c1.At(i+o1), c2[i+o2])
   175  				} else {
   176  					f.ADCQ(c1.At(i+o1), c2[i+o2])
   177  				}
   178  			}
   179  		}
   180  	case []amd64.Register:
   181  		switch c2 := i2.(type) {
   182  		default:
   183  			panic("unsupported")
   184  		case []amd64.Register:
   185  			for i := 0; i < f.NbWords; i++ {
   186  				if i == 0 {
   187  					f.ADDQ(c1[i+o1], c2[i+o2])
   188  				} else {
   189  					f.ADCQ(c1[i+o1], c2[i+o2])
   190  				}
   191  			}
   192  		}
   193  	default:
   194  		panic("unsupported")
   195  	}
   196  }
   197  
   198  func (f *FFAmd64) Sub(i1, i2 interface{}, offsets ...int) {
   199  	var o1, o2 int
   200  	if len(offsets) >= 1 {
   201  		o1 = offsets[0]
   202  		if len(offsets) >= 2 {
   203  			o2 = offsets[1]
   204  		}
   205  	}
   206  	switch c1 := i1.(type) {
   207  
   208  	case amd64.Register:
   209  		switch c2 := i2.(type) {
   210  		default:
   211  			panic("unsupported")
   212  		case []amd64.Register:
   213  			for i := 0; i < f.NbWords; i++ {
   214  				if i == 0 {
   215  					f.SUBQ(c1.At(i+o1), c2[i+o2])
   216  				} else {
   217  					f.SBBQ(c1.At(i+o1), c2[i+o2])
   218  				}
   219  			}
   220  		}
   221  	case []amd64.Register:
   222  		switch c2 := i2.(type) {
   223  		default:
   224  			panic("unsupported")
   225  		case []amd64.Register:
   226  			for i := 0; i < f.NbWords; i++ {
   227  				if i == 0 {
   228  					f.SUBQ(c1[i+o1], c2[i+o2])
   229  				} else {
   230  					f.SBBQ(c1[i+o1], c2[i+o2])
   231  				}
   232  			}
   233  		}
   234  	default:
   235  		panic("unsupported")
   236  	}
   237  }
   238  
   239  // Template helpers (txt/template)
   240  func helpers() template.FuncMap {
   241  	// functions used in template
   242  	return template.FuncMap{
   243  		"mul": mul,
   244  		"imm": imm,
   245  		"sub": sub,
   246  	}
   247  }
   248  
   249  func sub(a, b int) int {
   250  	return a - b
   251  }
   252  
   253  func mul(a, b int) int {
   254  	return a * b
   255  }
   256  
   257  func imm(t uint64) string {
   258  	switch t {
   259  	case 0:
   260  		return "$0"
   261  	case 1:
   262  		return "$1"
   263  	default:
   264  		return fmt.Sprintf("$%#016x", t)
   265  	}
   266  }