github.com/consensys/gnark-crypto@v0.14.0/ecc/bw6-633/fr/fft/bitreverse.go (about) 1 // Copyright 2020 Consensys Software Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Code generated by consensys/gnark-crypto DO NOT EDIT 16 17 package fft 18 19 import ( 20 "math/bits" 21 "runtime" 22 23 "github.com/consensys/gnark-crypto/ecc/bw6-633/fr" 24 ) 25 26 // BitReverse applies the bit-reversal permutation to v. 27 // len(v) must be a power of 2 28 func BitReverse(v []fr.Element) { 29 n := uint64(len(v)) 30 if bits.OnesCount64(n) != 1 { 31 panic("len(a) must be a power of 2") 32 } 33 34 if runtime.GOARCH == "arm64" { 35 bitReverseNaive(v) 36 } else { 37 bitReverseCobra(v) 38 } 39 } 40 41 // bitReverseNaive applies the bit-reversal permutation to v. 42 // len(v) must be a power of 2 43 func bitReverseNaive(v []fr.Element) { 44 n := uint64(len(v)) 45 nn := uint64(64 - bits.TrailingZeros64(n)) 46 47 for i := uint64(0); i < n; i++ { 48 iRev := bits.Reverse64(i) >> nn 49 if iRev > i { 50 v[i], v[iRev] = v[iRev], v[i] 51 } 52 } 53 } 54 55 // bitReverseCobraInPlace applies the bit-reversal permutation to v. 56 // len(v) must be a power of 2 57 // This is derived from: 58 // 59 // - Towards an Optimal Bit-Reversal Permutation Program 60 // Larry Carter and Kang Su Gatlin, 1998 61 // https://csaws.cs.technion.ac.il/~itai/Courses/Cache/bit.pdf 62 // 63 // - Practically efficient methods for performing bit-reversed 64 // permutation in C++11 on the x86-64 architecture 65 // Knauth, Adas, Whitfield, Wang, Ickler, Conrad, Serang, 2017 66 // https://arxiv.org/pdf/1708.01873.pdf 67 // 68 // - and more specifically, constantine implementation: 69 // https://github.com/mratsim/constantine/blob/d51699248db04e29c7b1ad97e0bafa1499db00b5/constantine/math/polynomials/fft.nim#L205 70 // by Mamy Ratsimbazafy (@mratsim). 71 func bitReverseCobraInPlace(v []fr.Element) { 72 logN := uint64(bits.Len64(uint64(len(v))) - 1) 73 logTileSize := deriveLogTileSize(logN) 74 logBLen := logN - 2*logTileSize 75 bLen := uint64(1) << logBLen 76 bShift := logBLen + logTileSize 77 tileSize := uint64(1) << logTileSize 78 79 // rough idea; 80 // bit reversal permutation naive implementation may have some cache associativity issues, 81 // since we are accessing elements by strides of powers of 2. 82 // on large inputs, this is noticeable and can be improved by using a t buffer. 83 // idea is for t buffer to be small enough to fit in cache. 84 // in the first inner loop, we copy the elements of v into t in a bit-reversed order. 85 // in the subsequent inner loops, accesses have much better cache locality than the naive implementation. 86 // hence even if we apparently do more work (swaps / copies), we are faster. 87 // 88 // on arm64 (and particularly on M1 macs), this is not noticeable, and the naive implementation is faster, 89 // in most cases. 90 // on x86 (and particularly on aws hpc6a) this is noticeable, and the t buffer implementation is faster (up to 3x). 91 // 92 // optimal choice for the tile size is cache dependent; in theory, we want the t buffer to fit in the L1 cache; 93 // in practice, a common size for L1 is 64kb, a field element is 32bytes or more. 94 // hence we can fit 2k elements in the L1 cache, which corresponds to a tile size of 2**5 with some margin for cache conflicts. 95 // 96 // for most sizes of interest, this tile size choice doesn't yield good results; 97 // we find that a tile size of 2**9 gives best results for input sizes from 2**21 up to 2**27+. 98 t := make([]fr.Element, tileSize*tileSize) 99 100 // see https://csaws.cs.technion.ac.il/~itai/Courses/Cache/bit.pdf 101 // for a detailed explanation of the algorithm. 102 for b := uint64(0); b < bLen; b++ { 103 104 for a := uint64(0); a < tileSize; a++ { 105 aRev := (bits.Reverse64(a) >> (64 - logTileSize)) << logTileSize 106 for c := uint64(0); c < tileSize; c++ { 107 idx := (a << bShift) | (b << logTileSize) | c 108 t[aRev|c] = v[idx] 109 } 110 } 111 112 bRev := (bits.Reverse64(b) >> (64 - logBLen)) << logTileSize 113 114 for c := uint64(0); c < tileSize; c++ { 115 cRev := ((bits.Reverse64(c) >> (64 - logTileSize)) << bShift) | bRev 116 for aRev := uint64(0); aRev < tileSize; aRev++ { 117 a := bits.Reverse64(aRev) >> (64 - logTileSize) 118 idx := (a << bShift) | (b << logTileSize) | c 119 idxRev := cRev | aRev 120 if idx < idxRev { 121 tIdx := (aRev << logTileSize) | c 122 v[idxRev], t[tIdx] = t[tIdx], v[idxRev] 123 } 124 } 125 } 126 127 for a := uint64(0); a < tileSize; a++ { 128 aRev := bits.Reverse64(a) >> (64 - logTileSize) 129 for c := uint64(0); c < tileSize; c++ { 130 cRev := (bits.Reverse64(c) >> (64 - logTileSize)) << bShift 131 idx := (a << bShift) | (b << logTileSize) | c 132 idxRev := cRev | bRev | aRev 133 if idx < idxRev { 134 tIdx := (aRev << logTileSize) | c 135 v[idx], t[tIdx] = t[tIdx], v[idx] 136 } 137 } 138 } 139 } 140 } 141 142 func bitReverseCobra(v []fr.Element) { 143 switch len(v) { 144 case 1 << 21: 145 bitReverseCobraInPlace_9_21(v) 146 case 1 << 22: 147 bitReverseCobraInPlace_9_22(v) 148 case 1 << 23: 149 bitReverseCobraInPlace_9_23(v) 150 case 1 << 24: 151 bitReverseCobraInPlace_9_24(v) 152 case 1 << 25: 153 bitReverseCobraInPlace_9_25(v) 154 case 1 << 26: 155 bitReverseCobraInPlace_9_26(v) 156 case 1 << 27: 157 bitReverseCobraInPlace_9_27(v) 158 default: 159 if len(v) > 1<<27 { 160 bitReverseCobraInPlace(v) 161 } else { 162 bitReverseNaive(v) 163 } 164 } 165 } 166 167 func deriveLogTileSize(logN uint64) uint64 { 168 q := uint64(9) // see bitReverseCobraInPlace for more details 169 170 for int(logN)-int(2*q) <= 0 { 171 q-- 172 } 173 174 return q 175 } 176 177 // bitReverseCobraInPlace_9_21 applies the bit-reversal permutation to v. 178 // len(v) must be 1 << 21. 179 // see bitReverseCobraInPlace for more details; this function is specialized for 9, 180 // as it declares the t buffer and various constants statically for performance. 181 func bitReverseCobraInPlace_9_21(v []fr.Element) { 182 const ( 183 logTileSize = uint64(9) 184 tileSize = uint64(1) << logTileSize 185 logN = 21 186 logBLen = logN - 2*logTileSize 187 bShift = logBLen + logTileSize 188 bLen = uint64(1) << logBLen 189 ) 190 191 var t [tileSize * tileSize]fr.Element 192 193 for b := uint64(0); b < bLen; b++ { 194 195 for a := uint64(0); a < tileSize; a++ { 196 aRev := (bits.Reverse64(a) >> 55) << logTileSize 197 for c := uint64(0); c < tileSize; c++ { 198 idx := (a << bShift) | (b << logTileSize) | c 199 t[aRev|c] = v[idx] 200 } 201 } 202 203 bRev := (bits.Reverse64(b) >> (64 - logBLen)) << logTileSize 204 205 for c := uint64(0); c < tileSize; c++ { 206 cRev := ((bits.Reverse64(c) >> 55) << bShift) | bRev 207 for aRev := uint64(0); aRev < tileSize; aRev++ { 208 a := bits.Reverse64(aRev) >> 55 209 idx := (a << bShift) | (b << logTileSize) | c 210 idxRev := cRev | aRev 211 if idx < idxRev { 212 tIdx := (aRev << logTileSize) | c 213 v[idxRev], t[tIdx] = t[tIdx], v[idxRev] 214 } 215 } 216 } 217 218 for a := uint64(0); a < tileSize; a++ { 219 aRev := bits.Reverse64(a) >> 55 220 for c := uint64(0); c < tileSize; c++ { 221 cRev := (bits.Reverse64(c) >> 55) << bShift 222 idx := (a << bShift) | (b << logTileSize) | c 223 idxRev := cRev | bRev | aRev 224 if idx < idxRev { 225 tIdx := (aRev << logTileSize) | c 226 v[idx], t[tIdx] = t[tIdx], v[idx] 227 } 228 } 229 } 230 } 231 232 } 233 234 // bitReverseCobraInPlace_9_22 applies the bit-reversal permutation to v. 235 // len(v) must be 1 << 22. 236 // see bitReverseCobraInPlace for more details; this function is specialized for 9, 237 // as it declares the t buffer and various constants statically for performance. 238 func bitReverseCobraInPlace_9_22(v []fr.Element) { 239 const ( 240 logTileSize = uint64(9) 241 tileSize = uint64(1) << logTileSize 242 logN = 22 243 logBLen = logN - 2*logTileSize 244 bShift = logBLen + logTileSize 245 bLen = uint64(1) << logBLen 246 ) 247 248 var t [tileSize * tileSize]fr.Element 249 250 for b := uint64(0); b < bLen; b++ { 251 252 for a := uint64(0); a < tileSize; a++ { 253 aRev := (bits.Reverse64(a) >> 55) << logTileSize 254 for c := uint64(0); c < tileSize; c++ { 255 idx := (a << bShift) | (b << logTileSize) | c 256 t[aRev|c] = v[idx] 257 } 258 } 259 260 bRev := (bits.Reverse64(b) >> (64 - logBLen)) << logTileSize 261 262 for c := uint64(0); c < tileSize; c++ { 263 cRev := ((bits.Reverse64(c) >> 55) << bShift) | bRev 264 for aRev := uint64(0); aRev < tileSize; aRev++ { 265 a := bits.Reverse64(aRev) >> 55 266 idx := (a << bShift) | (b << logTileSize) | c 267 idxRev := cRev | aRev 268 if idx < idxRev { 269 tIdx := (aRev << logTileSize) | c 270 v[idxRev], t[tIdx] = t[tIdx], v[idxRev] 271 } 272 } 273 } 274 275 for a := uint64(0); a < tileSize; a++ { 276 aRev := bits.Reverse64(a) >> 55 277 for c := uint64(0); c < tileSize; c++ { 278 cRev := (bits.Reverse64(c) >> 55) << bShift 279 idx := (a << bShift) | (b << logTileSize) | c 280 idxRev := cRev | bRev | aRev 281 if idx < idxRev { 282 tIdx := (aRev << logTileSize) | c 283 v[idx], t[tIdx] = t[tIdx], v[idx] 284 } 285 } 286 } 287 } 288 289 } 290 291 // bitReverseCobraInPlace_9_23 applies the bit-reversal permutation to v. 292 // len(v) must be 1 << 23. 293 // see bitReverseCobraInPlace for more details; this function is specialized for 9, 294 // as it declares the t buffer and various constants statically for performance. 295 func bitReverseCobraInPlace_9_23(v []fr.Element) { 296 const ( 297 logTileSize = uint64(9) 298 tileSize = uint64(1) << logTileSize 299 logN = 23 300 logBLen = logN - 2*logTileSize 301 bShift = logBLen + logTileSize 302 bLen = uint64(1) << logBLen 303 ) 304 305 var t [tileSize * tileSize]fr.Element 306 307 for b := uint64(0); b < bLen; b++ { 308 309 for a := uint64(0); a < tileSize; a++ { 310 aRev := (bits.Reverse64(a) >> 55) << logTileSize 311 for c := uint64(0); c < tileSize; c++ { 312 idx := (a << bShift) | (b << logTileSize) | c 313 t[aRev|c] = v[idx] 314 } 315 } 316 317 bRev := (bits.Reverse64(b) >> (64 - logBLen)) << logTileSize 318 319 for c := uint64(0); c < tileSize; c++ { 320 cRev := ((bits.Reverse64(c) >> 55) << bShift) | bRev 321 for aRev := uint64(0); aRev < tileSize; aRev++ { 322 a := bits.Reverse64(aRev) >> 55 323 idx := (a << bShift) | (b << logTileSize) | c 324 idxRev := cRev | aRev 325 if idx < idxRev { 326 tIdx := (aRev << logTileSize) | c 327 v[idxRev], t[tIdx] = t[tIdx], v[idxRev] 328 } 329 } 330 } 331 332 for a := uint64(0); a < tileSize; a++ { 333 aRev := bits.Reverse64(a) >> 55 334 for c := uint64(0); c < tileSize; c++ { 335 cRev := (bits.Reverse64(c) >> 55) << bShift 336 idx := (a << bShift) | (b << logTileSize) | c 337 idxRev := cRev | bRev | aRev 338 if idx < idxRev { 339 tIdx := (aRev << logTileSize) | c 340 v[idx], t[tIdx] = t[tIdx], v[idx] 341 } 342 } 343 } 344 } 345 346 } 347 348 // bitReverseCobraInPlace_9_24 applies the bit-reversal permutation to v. 349 // len(v) must be 1 << 24. 350 // see bitReverseCobraInPlace for more details; this function is specialized for 9, 351 // as it declares the t buffer and various constants statically for performance. 352 func bitReverseCobraInPlace_9_24(v []fr.Element) { 353 const ( 354 logTileSize = uint64(9) 355 tileSize = uint64(1) << logTileSize 356 logN = 24 357 logBLen = logN - 2*logTileSize 358 bShift = logBLen + logTileSize 359 bLen = uint64(1) << logBLen 360 ) 361 362 var t [tileSize * tileSize]fr.Element 363 364 for b := uint64(0); b < bLen; b++ { 365 366 for a := uint64(0); a < tileSize; a++ { 367 aRev := (bits.Reverse64(a) >> 55) << logTileSize 368 for c := uint64(0); c < tileSize; c++ { 369 idx := (a << bShift) | (b << logTileSize) | c 370 t[aRev|c] = v[idx] 371 } 372 } 373 374 bRev := (bits.Reverse64(b) >> (64 - logBLen)) << logTileSize 375 376 for c := uint64(0); c < tileSize; c++ { 377 cRev := ((bits.Reverse64(c) >> 55) << bShift) | bRev 378 for aRev := uint64(0); aRev < tileSize; aRev++ { 379 a := bits.Reverse64(aRev) >> 55 380 idx := (a << bShift) | (b << logTileSize) | c 381 idxRev := cRev | aRev 382 if idx < idxRev { 383 tIdx := (aRev << logTileSize) | c 384 v[idxRev], t[tIdx] = t[tIdx], v[idxRev] 385 } 386 } 387 } 388 389 for a := uint64(0); a < tileSize; a++ { 390 aRev := bits.Reverse64(a) >> 55 391 for c := uint64(0); c < tileSize; c++ { 392 cRev := (bits.Reverse64(c) >> 55) << bShift 393 idx := (a << bShift) | (b << logTileSize) | c 394 idxRev := cRev | bRev | aRev 395 if idx < idxRev { 396 tIdx := (aRev << logTileSize) | c 397 v[idx], t[tIdx] = t[tIdx], v[idx] 398 } 399 } 400 } 401 } 402 403 } 404 405 // bitReverseCobraInPlace_9_25 applies the bit-reversal permutation to v. 406 // len(v) must be 1 << 25. 407 // see bitReverseCobraInPlace for more details; this function is specialized for 9, 408 // as it declares the t buffer and various constants statically for performance. 409 func bitReverseCobraInPlace_9_25(v []fr.Element) { 410 const ( 411 logTileSize = uint64(9) 412 tileSize = uint64(1) << logTileSize 413 logN = 25 414 logBLen = logN - 2*logTileSize 415 bShift = logBLen + logTileSize 416 bLen = uint64(1) << logBLen 417 ) 418 419 var t [tileSize * tileSize]fr.Element 420 421 for b := uint64(0); b < bLen; b++ { 422 423 for a := uint64(0); a < tileSize; a++ { 424 aRev := (bits.Reverse64(a) >> 55) << logTileSize 425 for c := uint64(0); c < tileSize; c++ { 426 idx := (a << bShift) | (b << logTileSize) | c 427 t[aRev|c] = v[idx] 428 } 429 } 430 431 bRev := (bits.Reverse64(b) >> (64 - logBLen)) << logTileSize 432 433 for c := uint64(0); c < tileSize; c++ { 434 cRev := ((bits.Reverse64(c) >> 55) << bShift) | bRev 435 for aRev := uint64(0); aRev < tileSize; aRev++ { 436 a := bits.Reverse64(aRev) >> 55 437 idx := (a << bShift) | (b << logTileSize) | c 438 idxRev := cRev | aRev 439 if idx < idxRev { 440 tIdx := (aRev << logTileSize) | c 441 v[idxRev], t[tIdx] = t[tIdx], v[idxRev] 442 } 443 } 444 } 445 446 for a := uint64(0); a < tileSize; a++ { 447 aRev := bits.Reverse64(a) >> 55 448 for c := uint64(0); c < tileSize; c++ { 449 cRev := (bits.Reverse64(c) >> 55) << bShift 450 idx := (a << bShift) | (b << logTileSize) | c 451 idxRev := cRev | bRev | aRev 452 if idx < idxRev { 453 tIdx := (aRev << logTileSize) | c 454 v[idx], t[tIdx] = t[tIdx], v[idx] 455 } 456 } 457 } 458 } 459 460 } 461 462 // bitReverseCobraInPlace_9_26 applies the bit-reversal permutation to v. 463 // len(v) must be 1 << 26. 464 // see bitReverseCobraInPlace for more details; this function is specialized for 9, 465 // as it declares the t buffer and various constants statically for performance. 466 func bitReverseCobraInPlace_9_26(v []fr.Element) { 467 const ( 468 logTileSize = uint64(9) 469 tileSize = uint64(1) << logTileSize 470 logN = 26 471 logBLen = logN - 2*logTileSize 472 bShift = logBLen + logTileSize 473 bLen = uint64(1) << logBLen 474 ) 475 476 var t [tileSize * tileSize]fr.Element 477 478 for b := uint64(0); b < bLen; b++ { 479 480 for a := uint64(0); a < tileSize; a++ { 481 aRev := (bits.Reverse64(a) >> 55) << logTileSize 482 for c := uint64(0); c < tileSize; c++ { 483 idx := (a << bShift) | (b << logTileSize) | c 484 t[aRev|c] = v[idx] 485 } 486 } 487 488 bRev := (bits.Reverse64(b) >> (64 - logBLen)) << logTileSize 489 490 for c := uint64(0); c < tileSize; c++ { 491 cRev := ((bits.Reverse64(c) >> 55) << bShift) | bRev 492 for aRev := uint64(0); aRev < tileSize; aRev++ { 493 a := bits.Reverse64(aRev) >> 55 494 idx := (a << bShift) | (b << logTileSize) | c 495 idxRev := cRev | aRev 496 if idx < idxRev { 497 tIdx := (aRev << logTileSize) | c 498 v[idxRev], t[tIdx] = t[tIdx], v[idxRev] 499 } 500 } 501 } 502 503 for a := uint64(0); a < tileSize; a++ { 504 aRev := bits.Reverse64(a) >> 55 505 for c := uint64(0); c < tileSize; c++ { 506 cRev := (bits.Reverse64(c) >> 55) << bShift 507 idx := (a << bShift) | (b << logTileSize) | c 508 idxRev := cRev | bRev | aRev 509 if idx < idxRev { 510 tIdx := (aRev << logTileSize) | c 511 v[idx], t[tIdx] = t[tIdx], v[idx] 512 } 513 } 514 } 515 } 516 517 } 518 519 // bitReverseCobraInPlace_9_27 applies the bit-reversal permutation to v. 520 // len(v) must be 1 << 27. 521 // see bitReverseCobraInPlace for more details; this function is specialized for 9, 522 // as it declares the t buffer and various constants statically for performance. 523 func bitReverseCobraInPlace_9_27(v []fr.Element) { 524 const ( 525 logTileSize = uint64(9) 526 tileSize = uint64(1) << logTileSize 527 logN = 27 528 logBLen = logN - 2*logTileSize 529 bShift = logBLen + logTileSize 530 bLen = uint64(1) << logBLen 531 ) 532 533 var t [tileSize * tileSize]fr.Element 534 535 for b := uint64(0); b < bLen; b++ { 536 537 for a := uint64(0); a < tileSize; a++ { 538 aRev := (bits.Reverse64(a) >> 55) << logTileSize 539 for c := uint64(0); c < tileSize; c++ { 540 idx := (a << bShift) | (b << logTileSize) | c 541 t[aRev|c] = v[idx] 542 } 543 } 544 545 bRev := (bits.Reverse64(b) >> (64 - logBLen)) << logTileSize 546 547 for c := uint64(0); c < tileSize; c++ { 548 cRev := ((bits.Reverse64(c) >> 55) << bShift) | bRev 549 for aRev := uint64(0); aRev < tileSize; aRev++ { 550 a := bits.Reverse64(aRev) >> 55 551 idx := (a << bShift) | (b << logTileSize) | c 552 idxRev := cRev | aRev 553 if idx < idxRev { 554 tIdx := (aRev << logTileSize) | c 555 v[idxRev], t[tIdx] = t[tIdx], v[idxRev] 556 } 557 } 558 } 559 560 for a := uint64(0); a < tileSize; a++ { 561 aRev := bits.Reverse64(a) >> 55 562 for c := uint64(0); c < tileSize; c++ { 563 cRev := (bits.Reverse64(c) >> 55) << bShift 564 idx := (a << bShift) | (b << logTileSize) | c 565 idxRev := cRev | bRev | aRev 566 if idx < idxRev { 567 tIdx := (aRev << logTileSize) | c 568 v[idx], t[tIdx] = t[tIdx], v[idx] 569 } 570 } 571 } 572 } 573 574 }