github.com/consensys/gnark-crypto@v0.14.0/field/generator/internal/templates/element/mul_nocarry.go (about) 1 package element 2 3 // MulNoCarry see https://eprint.iacr.org/2022/1400.pdf annex for more info on the algorithm 4 // Note that these templates are optimized for arm64 target, since x86 benefits from assembly impl. 5 const MulNoCarry = ` 6 {{ define "mul_nocarry" }} 7 var {{range $i := .all.NbWordsIndexesFull}}t{{$i}}{{- if ne $i $.all.NbWordsLastIndex}},{{- end}}{{- end}} uint64 8 var {{range $i := .all.NbWordsIndexesFull}}u{{$i}}{{- if ne $i $.all.NbWordsLastIndex}},{{- end}}{{- end}} uint64 9 {{- range $i := .all.NbWordsIndexesFull}} 10 { 11 var c0, c1, c2 uint64 12 v := {{$.V1}}[{{$i}}] 13 {{- if eq $i 0}} 14 {{- range $j := $.all.NbWordsIndexesFull}} 15 u{{$j}}, t{{$j}} = bits.Mul64(v, {{$.V2}}[{{$j}}]) 16 {{- end}} 17 {{- else}} 18 {{- range $j := $.all.NbWordsIndexesFull}} 19 u{{$j}}, c1 = bits.Mul64(v, {{$.V2}}[{{$j}}]) 20 {{- if eq $j 0}} 21 t{{$j}}, c0 = bits.Add64(c1, t{{$j}}, 0) 22 {{- else }} 23 t{{$j}}, c0 = bits.Add64(c1, t{{$j}}, c0) 24 {{- end}} 25 {{- if eq $j $.all.NbWordsLastIndex}} 26 {{/* yes, we're tempted to write c2 = c0, but that slow the whole MUL by 20% */}} 27 c2, _ = bits.Add64(0, 0, c0) 28 {{- end}} 29 {{- end}} 30 {{- end}} 31 32 {{- range $j := $.all.NbWordsIndexesFull}} 33 {{- if eq $j 0}} 34 t{{add $j 1}}, c0 = bits.Add64(u{{$j}}, t{{add $j 1}}, 0) 35 {{- else if eq $j $.all.NbWordsLastIndex}} 36 {{- if eq $i 0}} 37 c2, _ = bits.Add64(u{{$j}}, 0, c0) 38 {{- else}} 39 c2, _ = bits.Add64(u{{$j}},c2, c0) 40 {{- end}} 41 {{- else }} 42 t{{add $j 1}}, c0 = bits.Add64(u{{$j}}, t{{add $j 1}}, c0) 43 {{- end}} 44 {{- end}} 45 46 {{- $k := $.all.NbWordsLastIndex}} 47 48 m := qInvNeg * t0 49 50 u0, c1 = bits.Mul64(m, q0) 51 {{- range $j := $.all.NbWordsIndexesFull}} 52 {{- if ne $j 0}} 53 {{- if eq $j 1}} 54 _, c0 = bits.Add64(t0, c1, 0) 55 {{- else}} 56 t{{sub $j 2}}, c0 = bits.Add64(t{{sub $j 1}}, c1, c0) 57 {{- end}} 58 u{{$j}}, c1 = bits.Mul64(m, q{{$j}}) 59 {{- end}} 60 {{- end}} 61 {{/* TODO @gbotrel it seems this can create a carry (c0) -- study the bounds */}} 62 t{{sub $.all.NbWordsLastIndex 1}}, c0 = bits.Add64(0, c1, c0) 63 u{{$k}}, _ = bits.Add64(u{{$k}}, 0, c0) 64 65 {{- range $j := $.all.NbWordsIndexesFull}} 66 {{- if eq $j 0}} 67 t{{$j}}, c0 = bits.Add64(u{{$j}}, t{{$j}}, 0) 68 {{- else if eq $j $.all.NbWordsLastIndex}} 69 c2, _ = bits.Add64(c2, 0, c0) 70 {{- else}} 71 t{{$j}}, c0 = bits.Add64(u{{$j}}, t{{$j}}, c0) 72 {{- end}} 73 {{- end}} 74 75 {{- $l := sub $.all.NbWordsLastIndex 1}} 76 t{{$l}}, c0 = bits.Add64(t{{$k}}, t{{$l}}, 0) 77 t{{$k}}, _ = bits.Add64(u{{$k}}, c2, c0) 78 79 } 80 {{- end}} 81 82 83 {{- range $i := $.all.NbWordsIndexesFull}} 84 z[{{$i}}] = t{{$i}} 85 {{- end}} 86 87 {{ end }} 88 89 90 91 {{ define "square_nocarry" }} 92 var {{range $i := .all.NbWordsIndexesFull}}t{{$i}}{{- if ne $i $.all.NbWordsLastIndex}},{{- end}}{{- end}} uint64 93 var {{range $i := $.all.NbWordsIndexesFull}}u{{$i}}{{- if ne $i $.all.NbWordsLastIndex}},{{- end}}{{- end}} uint64 94 var {{range $i := interval 0 (add $.all.NbWordsLastIndex 1)}}lo{{$i}}{{- if ne $i $.all.NbWordsLastIndex}},{{- end}}{{- end}} uint64 95 96 // note that if hi, _ = bits.Mul64() didn't generate 97 // UMULH and MUL, (but just UMULH) we could use same pattern 98 // as in mulRaw and reduce the stack space of this function (no need for lo..) 99 100 {{- range $i := .all.NbWordsIndexesFull}} 101 { 102 103 {{$jStart := add $i 1}} 104 {{$jEnd := add $.all.NbWordsLastIndex 1}} 105 106 var c0, c2 uint64 107 108 109 // for j=i+1 to N-1 110 // p,C,t[j] = 2*a[j]*a[i] + t[j] + (p,C) 111 // A = C 112 113 {{- if eq $i 0}} 114 u{{$i}}, lo1 = bits.Mul64(x[{{$i}}], x[{{$i}}]) 115 {{- range $j := interval $jStart $jEnd}} 116 u{{$j}}, t{{$j}} = bits.Mul64(x[{{$j}}], x[{{$i}}]) 117 {{- end}} 118 119 // propagate lo, from t[j] to end, twice. 120 {{- range $j := interval $jStart $jEnd}} 121 {{- if eq $j $jStart}} 122 t{{$j}}, c0 = bits.Add64(t{{$j}}, t{{$j}}, 0) 123 {{- else }} 124 t{{$j}}, c0 = bits.Add64(t{{$j}}, t{{$j}}, c0) 125 {{- end}} 126 {{- if eq $j $.all.NbWordsLastIndex}} 127 c2, _ = bits.Add64(c2, 0, c0) 128 {{- end}} 129 {{- end}} 130 131 t{{$i}}, c0 = bits.Add64( lo1,t{{$i}}, 0) 132 {{- else}} 133 {{- range $j := interval (sub $jStart 1) $jEnd}} 134 u{{$j}}, lo{{$j}} = bits.Mul64(x[{{$j}}], x[{{$i}}]) 135 {{- end}} 136 137 // propagate lo, from t[j] to end, twice. 138 {{- range $j := interval $jStart $jEnd}} 139 {{- if eq $j $jStart}} 140 lo{{$j}}, c0 = bits.Add64(lo{{$j}}, lo{{$j}}, 0) 141 {{- else }} 142 lo{{$j}}, c0 = bits.Add64(lo{{$j}}, lo{{$j}}, c0) 143 {{- end}} 144 {{- if eq $j $.all.NbWordsLastIndex}} 145 c2, _ = bits.Add64(c2, 0, c0) 146 {{- end}} 147 {{- end}} 148 {{- range $j := interval $jStart $jEnd}} 149 {{- if eq $j $jStart}} 150 t{{$j}}, c0 = bits.Add64(lo{{$j}}, t{{$j}}, 0) 151 {{- else }} 152 t{{$j}}, c0 = bits.Add64(lo{{$j}}, t{{$j}}, c0) 153 {{- end}} 154 {{- if eq $j $.all.NbWordsLastIndex}} 155 c2, _ = bits.Add64(c2, 0, c0) 156 {{- end}} 157 {{- end}} 158 159 t{{$i}}, c0 = bits.Add64( lo{{$i}},t{{$i}}, 0) 160 {{- end}} 161 162 163 // propagate u{{$i}} + hi 164 {{- range $j := interval $jStart $jEnd}} 165 t{{$j}}, c0 = bits.Add64(u{{sub $j 1}}, t{{$j}}, c0) 166 {{- end}} 167 c2, _ = bits.Add64(u{{$.all.NbWordsLastIndex}}, c2, c0) 168 169 // hi again 170 {{- range $j := interval $jStart $jEnd}} 171 {{- if eq $j $.all.NbWordsLastIndex}} 172 c2, _ = bits.Add64(c2, u{{$j}}, {{- if eq $j $jStart}} 0 {{- else}}c0{{- end}}) 173 {{- else if eq $j $jStart}} 174 t{{add $j 1}}, c0 = bits.Add64(u{{$j}}, t{{add $j 1}}, 0) 175 {{- else }} 176 t{{add $j 1}}, c0 = bits.Add64(u{{$j}}, t{{add $j 1}}, c0) 177 {{- end}} 178 {{- end}} 179 180 {{- $k := $.all.NbWordsLastIndex}} 181 182 // this part is unchanged. 183 m := qInvNeg * t0 184 {{- range $j := $.all.NbWordsIndexesFull}} 185 u{{$j}}, lo{{$j}} = bits.Mul64(m, q{{$j}}) 186 {{- end}} 187 {{- range $j := $.all.NbWordsIndexesFull}} 188 {{- if ne $j 0}} 189 {{- if eq $j 1}} 190 _, c0 = bits.Add64(t0, lo{{sub $j 1}}, 0) 191 {{- else}} 192 t{{sub $j 2}}, c0 = bits.Add64(t{{sub $j 1}}, lo{{sub $j 1}}, c0) 193 {{- end}} 194 {{- end}} 195 {{- end}} 196 t{{sub $.all.NbWordsLastIndex 1}}, c0 = bits.Add64(0, lo{{$.all.NbWordsLastIndex}}, c0) 197 u{{$k}}, _ = bits.Add64(u{{$k}}, 0, c0) 198 199 {{- range $j := $.all.NbWordsIndexesFull}} 200 {{- if eq $j 0}} 201 t{{$j}}, c0 = bits.Add64(u{{$j}}, t{{$j}}, 0) 202 {{- else if eq $j $.all.NbWordsLastIndex}} 203 c2, _ = bits.Add64(c2, 0, c0) 204 {{- else}} 205 t{{$j}}, c0 = bits.Add64(u{{$j}}, t{{$j}}, c0) 206 {{- end}} 207 {{- end}} 208 209 {{- $k := sub $.all.NbWordsLastIndex 0}} 210 {{- $l := sub $.all.NbWordsLastIndex 1}} 211 t{{$l}}, c0 = bits.Add64(t{{$k}}, t{{$l}}, 0) 212 t{{$k}}, _ = bits.Add64(u{{$k}}, c2, c0) 213 } 214 {{- end}} 215 216 217 {{- range $i := $.all.NbWordsIndexesFull}} 218 z[{{$i}}] = t{{$i}} 219 {{- end}} 220 221 {{ end }} 222 223 224 `