github.com/Bytom/bytom@v1.1.2-0.20210127130405-ae40204c0b09/mining/tensority/cgo_algorithm/lib/BytomPoW.h (about)

     1  /* BytomPoW.h */
     2  #ifndef BYTOMPOW_H
     3  #define BYTOMPOW_H
     4  
     5  #include "scrypt.h"
     6  #include "sha3-allInOne.h"
     7  #include <iostream>
     8  #include <vector>
     9  #include <time.h>
    10  #include <assert.h>
    11  #include <stdint.h>
    12  #include <x86intrin.h>
    13  #include <omp.h>
    14  
    15  #define FNV(v1,v2) int32_t( ((v1)*FNV_PRIME) ^ (v2) )
    16  const int FNV_PRIME = 0x01000193;
    17  
    18  struct Mat256x256i8 {
    19      int8_t d[256][256];
    20  
    21      void toIdentityMatrix() {
    22          for(int i = 0; i < 256; i++) {
    23              for(int j = 0; j < 256; j++) {
    24                  d[i][j] = (i==j)?1:0; // diagonal
    25              }
    26          }
    27      }
    28  
    29      void copyFrom(const Mat256x256i8& other) {
    30          for(int i = 0; i < 256; i++) {
    31              for(int j = 0; j < 256; j++) {
    32                  this->d[j][i] = other.d[j][i];
    33              }
    34          }
    35      }
    36  
    37      Mat256x256i8() {
    38  //        this->toIdentityMatrix();
    39      }
    40  
    41      Mat256x256i8(const Mat256x256i8& other) {
    42          this->copyFrom(other);
    43      }
    44  
    45      void copyFrom_helper(LTCMemory& ltcMem, int offset) {
    46          for(int i = 0; i < 256; i++) {
    47              const Words32& lo=ltcMem.get(i*4 + offset);
    48              const Words32& hi=ltcMem.get(i*4 + 2 + offset);
    49              for(int j = 0; j < 64; j++) {
    50                  uint32_t i32 = j>=32?hi.get(j-32):lo.get(j);
    51                  d[j*4+0][i] = (i32>> 0) & 0xFF;
    52                  d[j*4+1][i] = (i32>> 8) & 0xFF;
    53                  d[j*4+2][i] = (i32>>16) & 0xFF;
    54                  d[j*4+3][i] = (i32>>24) & 0xFF;
    55              }
    56          }
    57      }
    58  
    59      void copyFromEven(LTCMemory& ltcMem) {
    60          copyFrom_helper(ltcMem, 0);
    61      }
    62  
    63      void copyFromOdd(LTCMemory& ltcMem) {
    64          copyFrom_helper(ltcMem, 1);
    65      }
    66  
    67      void add(Mat256x256i8& a, Mat256x256i8& b) {
    68          for(int i = 0; i < 256; i++) {
    69              for(int j = 0; j < 256; j++) {
    70                  int tmp = int(a.d[i][j]) + int(b.d[i][j]);
    71                  this->d[i][j] = (tmp & 0xFF);
    72              }
    73          }
    74      }
    75  };
    76  
    77  struct Mat256x256i16 {
    78      int16_t d[256][256];
    79  
    80      void toIdentityMatrix() {
    81          for(int i = 0; i < 256; i++) {
    82              for(int j = 0; j < 256; j++) {
    83                  d[i][j] = (i==j?1:0); // diagonal
    84              }
    85          }
    86      }
    87  
    88      void copyFrom(const Mat256x256i8& other) {
    89          for(int i = 0; i < 256; i++) {
    90              for(int j = 0; j < 256; j++) {
    91                  this->d[j][i] = int16_t(other.d[j][i]);
    92                  assert(this->d[j][i] == other.d[j][i]);
    93              }
    94          }
    95      }
    96  
    97      void copyFrom(const Mat256x256i16& other) {
    98          for(int i = 0; i < 256; i++) {
    99              for(int j = 0; j < 256; j++) {
   100                  this->d[j][i] = other.d[j][i];
   101              }
   102          }
   103      }
   104  
   105      Mat256x256i16() {
   106  //        this->toIdentityMatrix();
   107      }
   108  
   109      Mat256x256i16(const Mat256x256i16& other) {
   110          this->copyFrom(other);
   111      }
   112  
   113      void copyFrom_helper(LTCMemory& ltcMem, int offset) {
   114          for(int i = 0; i < 256; i++) {
   115              const Words32& lo = ltcMem.get(i*4 + offset);
   116              const Words32& hi = ltcMem.get(i*4 + 2 + offset);
   117              for(int j = 0; j < 64; j++) {
   118                  uint32_t i32 = j>=32?hi.get(j-32):lo.get(j);
   119                  d[j*4+0][i] = int8_t((i32>> 0) & 0xFF);
   120                  d[j*4+1][i] = int8_t((i32>> 8) & 0xFF);
   121                  d[j*4+2][i] = int8_t((i32>>16) & 0xFF);
   122                  d[j*4+3][i] = int8_t((i32>>24) & 0xFF);
   123              }
   124          }
   125      }
   126  
   127      void copyFromEven(LTCMemory& ltcMem) {
   128          copyFrom_helper(ltcMem, 0);
   129      }
   130  
   131      void copyFromOdd(LTCMemory& ltcMem) {
   132          copyFrom_helper(ltcMem, 1);
   133      }
   134  
   135      void mul(const Mat256x256i16& a, const Mat256x256i16& b) {
   136          for(int i = 0; i < 256; i += 16) {
   137              for(int j = 0; j < 256; j += 16) {
   138                  for(int ii = i; ii < i+16; ii += 8) {
   139                      __m256i r[8],s,t[8],u[8],m[8];
   140                      r[0] = _mm256_set1_epi16(0);
   141                      r[1] = _mm256_set1_epi16(0);
   142                      r[2] = _mm256_set1_epi16(0);
   143                      r[3] = _mm256_set1_epi16(0);
   144                      r[4] = _mm256_set1_epi16(0);
   145                      r[5] = _mm256_set1_epi16(0);
   146                      r[6] = _mm256_set1_epi16(0);
   147                      r[7] = _mm256_set1_epi16(0);
   148                      for(int k = 0; k < 256; k++) {
   149                          s = *((__m256i*)(&(b.d[k][j])));
   150                          u[0] = _mm256_set1_epi16(a.d[ii+0][k]);
   151                          u[1] = _mm256_set1_epi16(a.d[ii+1][k]);
   152                          u[2] = _mm256_set1_epi16(a.d[ii+2][k]);
   153                          u[3] = _mm256_set1_epi16(a.d[ii+3][k]);
   154                          u[4] = _mm256_set1_epi16(a.d[ii+4][k]);
   155                          u[5] = _mm256_set1_epi16(a.d[ii+5][k]);
   156                          u[6] = _mm256_set1_epi16(a.d[ii+6][k]);
   157                          u[7] = _mm256_set1_epi16(a.d[ii+7][k]);
   158                          m[0] = _mm256_mullo_epi16(u[0],s);
   159                          m[1] = _mm256_mullo_epi16(u[1],s);
   160                          m[2] = _mm256_mullo_epi16(u[2],s);
   161                          m[3] = _mm256_mullo_epi16(u[3],s);
   162                          m[4] = _mm256_mullo_epi16(u[4],s);
   163                          m[5] = _mm256_mullo_epi16(u[5],s);
   164                          m[6] = _mm256_mullo_epi16(u[6],s);
   165                          m[7] = _mm256_mullo_epi16(u[7],s);
   166                          r[0] = _mm256_add_epi16(r[0],m[0]);
   167                          r[1] = _mm256_add_epi16(r[1],m[1]);
   168                          r[2] = _mm256_add_epi16(r[2],m[2]);
   169                          r[3] = _mm256_add_epi16(r[3],m[3]);
   170                          r[4] = _mm256_add_epi16(r[4],m[4]);
   171                          r[5] = _mm256_add_epi16(r[5],m[5]);
   172                          r[6] = _mm256_add_epi16(r[6],m[6]);
   173                          r[7] = _mm256_add_epi16(r[7],m[7]);
   174                      }
   175                      t[0] = _mm256_slli_epi16(r[0],8);
   176                      t[1] = _mm256_slli_epi16(r[1],8);
   177                      t[2] = _mm256_slli_epi16(r[2],8);
   178                      t[3] = _mm256_slli_epi16(r[3],8);
   179                      t[4] = _mm256_slli_epi16(r[4],8);
   180                      t[5] = _mm256_slli_epi16(r[5],8);
   181                      t[6] = _mm256_slli_epi16(r[6],8);
   182                      t[7] = _mm256_slli_epi16(r[7],8);
   183                      t[0] = _mm256_add_epi16(r[0],t[0]);
   184                      t[1] = _mm256_add_epi16(r[1],t[1]);
   185                      t[2] = _mm256_add_epi16(r[2],t[2]);
   186                      t[3] = _mm256_add_epi16(r[3],t[3]);
   187                      t[4] = _mm256_add_epi16(r[4],t[4]);
   188                      t[5] = _mm256_add_epi16(r[5],t[5]);
   189                      t[6] = _mm256_add_epi16(r[6],t[6]);
   190                      t[7] = _mm256_add_epi16(r[7],t[7]);
   191                      for(int x = 0; x < 8; x++) {
   192                          this->d[ii+x][j+0 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*0 +1)));
   193                          this->d[ii+x][j+1 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*1 +1)));
   194                          this->d[ii+x][j+2 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*2 +1)));
   195                          this->d[ii+x][j+3 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*3 +1)));
   196                          this->d[ii+x][j+4 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*4 +1)));
   197                          this->d[ii+x][j+5 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*5 +1)));
   198                          this->d[ii+x][j+6 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*6 +1)));
   199                          this->d[ii+x][j+7 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*7 +1)));
   200                          this->d[ii+x][j+8 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*8 +1)));
   201                          this->d[ii+x][j+9 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*9 +1)));
   202                          this->d[ii+x][j+10] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*10+1)));
   203                          this->d[ii+x][j+11] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*11+1)));
   204                          this->d[ii+x][j+12] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*12+1)));
   205                          this->d[ii+x][j+13] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*13+1)));
   206                          this->d[ii+x][j+14] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*14+1)));
   207                          this->d[ii+x][j+15] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*15+1)));
   208                      }
   209                  }
   210              }
   211          }
   212      }
   213  
   214      void add(Mat256x256i16& a, Mat256x256i16& b) {
   215          for(int i = 0; i < 256; i++) {
   216              for(int j = 0; j < 256; j++) {
   217                  int tmp = int(a.d[i][j]) + int(b.d[i][j]);
   218                  this->d[i][j] = (tmp & 0xFF);
   219              }
   220          }
   221      }
   222  
   223      void toMatI8(Mat256x256i8& other) {
   224          for(int i = 0; i < 256; i++) {
   225              for(int j = 0; j < 256; j++) {
   226                  other.d[j][i] = (this->d[j][i]) & 0xFF;
   227              }
   228          }
   229      }
   230  
   231      void topup(Mat256x256i8& other) {
   232          for(int i = 0; i < 256; i++) {
   233              for(int j = 0; j < 256; j++) {
   234                  other.d[j][i] += (this->d[j][i]) & 0xFF;
   235              }
   236          }
   237      }
   238  };
   239  
   240  
   241  struct Arr256x64i32 {
   242      uint32_t d[256][64];
   243  
   244      uint8_t* d0RawPtr() {
   245          return (uint8_t*)(d[0]);
   246      }
   247  
   248      Arr256x64i32(const Mat256x256i8& mat) {
   249          for(int j = 0; j < 256; j++) {
   250              for(int i = 0; i < 64; i++) {
   251                  d[j][i] = ((uint32_t(uint8_t(mat.d[j][i + 192]))) << 24) |
   252                            ((uint32_t(uint8_t(mat.d[j][i + 128]))) << 16) |
   253                            ((uint32_t(uint8_t(mat.d[j][i +  64]))) <<  8) |
   254                            ((uint32_t(uint8_t(mat.d[j][i]))) << 0);
   255              }
   256          }
   257      }
   258  
   259      void reduceFNV() {
   260          for(int k = 256; k > 1; k = k/2) {
   261              for(int j = 0; j < k/2; j++) {
   262                  for(int i = 0; i < 64; i++) {
   263                      d[j][i] = FNV(d[j][i], d[j + k/2][i]);
   264                  }
   265              }
   266          }
   267      }
   268  };
   269  
   270  // struct BytomMatList8 {
   271  //     std::vector<Mat256x256i8*> matVec;
   272  
   273  //     Mat256x256i8 at(int i) {
   274  //         return *(matVec[i]);
   275  //     }
   276  
   277  //     BytomMatList8() {
   278  //         for(int i=0; i<256; i++) {
   279  //             Mat256x256i8* ptr = new Mat256x256i8;
   280  //             assert(ptr!=NULL);
   281  //             matVec.push_back(ptr);
   282  //         }
   283  //     }
   284  
   285  //     ~BytomMatList8() {
   286  //         for(int i=0; i<256; i++) {
   287  //             delete matVec[i];
   288  //         }
   289  //     }
   290  
   291  //     void init(const Words32& X_in) {
   292  //         Words32 X = X_in;
   293  //         LTCMemory ltcMem;
   294  //         for(int i=0; i<128; i++) {
   295  //             ltcMem.scrypt(X);
   296  //             matVec[2*i]->copyFromEven(ltcMem);
   297  //             matVec[2*i+1]->copyFromOdd(ltcMem);
   298  //         }
   299  //     }
   300  // };
   301  
   302  struct BytomMatList16 {
   303      std::vector<Mat256x256i16*> matVec;
   304  
   305      Mat256x256i16 at(int i) {
   306          return *(matVec[i]);
   307      }
   308  
   309      BytomMatList16() {
   310          for(int i = 0; i < 256; i++) {
   311              Mat256x256i16* ptr = new Mat256x256i16;
   312              assert(ptr != NULL);
   313              matVec.push_back(ptr);
   314          }
   315      }
   316  
   317      ~BytomMatList16() {
   318          for(int i = 0; i < 256; i++)
   319              delete matVec[i];
   320      }
   321  
   322      void init(const Words32& X_in) {
   323          Words32 X = X_in;
   324          LTCMemory ltcMem;
   325          for(int i = 0; i < 128; i++) {
   326              ltcMem.scrypt(X);
   327              matVec[2*i]->copyFromEven(ltcMem);
   328              matVec[2*i + 1]->copyFromOdd(ltcMem);
   329          }
   330      }
   331  
   332      // void copyFrom(BytomMatList8& other) {
   333      //     for(int i=0; i<256; i++) {
   334      //         matVec[i]->copyFrom(*other.matVec[i]);
   335      //     }
   336      // }
   337  
   338      // void copyFrom(BytomMatList16& other) {
   339      //     for(int i=0; i<256; i++) {
   340      //         matVec[i]->copyFrom(*other.matVec[i]);
   341      //     }
   342      // }
   343  };
   344  
   345  // extern BytomMatList8* matList_int8;
   346  extern BytomMatList16* matList_int16;
   347  
   348  inline void iter_mineBytom(const uint8_t *fixedMessage,
   349                              uint32_t len,
   350                              // uint8_t nonce[8],
   351                              uint8_t result[32]) {
   352      Mat256x256i8 *resArr8 = new Mat256x256i8[4];
   353  
   354      clock_t start, end;
   355      start = clock();
   356      // Itz faster using single thread ...
   357      #pragma omp parallel for simd
   358      for(int k = 0; k < 4; k++) { // The k-loop
   359          sha3_ctx *ctx = new sha3_ctx;
   360          Mat256x256i16 *mat16 = new Mat256x256i16;
   361          Mat256x256i16 *tmp16 = new Mat256x256i16;
   362          uint8_t sequence[32];
   363          rhash_sha3_256_init(ctx);
   364          rhash_sha3_update(ctx, fixedMessage + (len*k/4), len/4);//分四轮消耗掉fixedMessage
   365          rhash_sha3_final(ctx, sequence);
   366          tmp16->toIdentityMatrix();
   367  
   368          for(int j = 0; j < 2; j++) {
   369              // equivalent as tmp=tmp*matlist, i+=1 
   370              for(int i = 0; i < 32; i += 2) {
   371                  // "mc = ma dot mb.T" in GoLang code
   372                  mat16->mul(*tmp16, matList_int16->at(sequence[i]));
   373                  // "ma = mc" in GoLang code
   374                  tmp16->mul(*mat16, matList_int16->at(sequence[i+1]));
   375              }
   376          }
   377          // "res[k] = mc" in GoLang code
   378          tmp16->toMatI8(resArr8[k]); // 0.00018s
   379          delete mat16;
   380          delete tmp16;
   381          delete ctx;
   382      }
   383  
   384      // 3.7e-05s
   385      Mat256x256i8 *res8 = new Mat256x256i8;
   386      res8->add(resArr8[0], resArr8[1]);
   387      res8->add(*res8, resArr8[2]);
   388      res8->add(*res8, resArr8[3]);
   389  
   390      end = clock();    
   391      // std::cout << "\tTime for getting MulMatix: "
   392      //           << (double)(end - start) / CLOCKS_PER_SEC * 1000 << "ms"
   393      //           << std::endl;
   394  
   395      Arr256x64i32 arr(*res8);
   396      arr.reduceFNV();
   397      sha3_ctx *ctx = new sha3_ctx;
   398      rhash_sha3_256_init(ctx);
   399      rhash_sha3_update(ctx, arr.d0RawPtr(), 256);
   400      rhash_sha3_final(ctx, result);
   401  
   402      delete res8;
   403      delete[] resArr8;
   404      delete ctx;
   405  }
   406  
   407  inline void incrNonce(uint8_t nonce[8]) {
   408      for(int i = 0; i < 8; i++) {
   409          if(nonce[i] != 255) {
   410              nonce[i]++;
   411              break;
   412          } else {
   413              nonce[i] = 0;
   414          }
   415      }
   416  }
   417  
   418  inline int countLeadingZero(uint8_t result[32]) {
   419      int count = 0;
   420      for(int i = 31; i >= 0; i--) { // NOTE: reverse
   421          if(result[i] < 1) {
   422              count += 8;
   423          } else if(result[i]<2)  {
   424              count += 7;
   425              break;
   426          } else if(result[i]<4)  {
   427              count += 6;
   428              break;
   429          } else if(result[i]<8)  {
   430              count += 5;
   431              break;
   432          } else if(result[i]<16) {
   433              count += 4;
   434              break;
   435          } else if(result[i]<32) {
   436              count += 3;
   437              break;
   438          } else if(result[i]<64) {
   439              count += 2;
   440              break;
   441          } else if(result[i]<128) {
   442              count += 1;
   443              break;
   444          }
   445      }
   446      return count;
   447  }
   448  
   449  // inline int test_mineBytom(
   450  //     const uint8_t *fixedMessage,
   451  //     uint32_t len,
   452  //     uint8_t nonce[32],
   453  //     int count,
   454  //     int leadingZeroThres)
   455  // {
   456  //   assert(len%4==0);
   457  //   int step;
   458  //   for(step=0; step<count; step++) {
   459  //     uint8_t result[32];
   460  //     //std::cerr<<"Mine step "<<step<<std::endl;
   461  //     iter_mineBytom(fixedMessage,100,nonce,result);
   462  //     std::cerr<<"Mine step "<<step<<std::endl;
   463  //     for (int i = 0; i < 32; i++) {
   464  //       printf("%02x ", result[i]);
   465  //       if (i % 8 == 7)
   466  //         printf("\n");
   467  //     }
   468  //     if (countLeadingZero(result) > leadingZeroThres)
   469  //       return step;
   470  //     incrNonce(nonce);
   471  //   }
   472  //   return step;
   473  // }
   474  
   475  
   476  #endif
   477