github.com/Bytom/bytom@v1.1.2-0.20210127130405-ae40204c0b09/mining/tensority/cgo_algorithm/lib/BytomPoW.h (about) 1 /* BytomPoW.h */ 2 #ifndef BYTOMPOW_H 3 #define BYTOMPOW_H 4 5 #include "scrypt.h" 6 #include "sha3-allInOne.h" 7 #include <iostream> 8 #include <vector> 9 #include <time.h> 10 #include <assert.h> 11 #include <stdint.h> 12 #include <x86intrin.h> 13 #include <omp.h> 14 15 #define FNV(v1,v2) int32_t( ((v1)*FNV_PRIME) ^ (v2) ) 16 const int FNV_PRIME = 0x01000193; 17 18 struct Mat256x256i8 { 19 int8_t d[256][256]; 20 21 void toIdentityMatrix() { 22 for(int i = 0; i < 256; i++) { 23 for(int j = 0; j < 256; j++) { 24 d[i][j] = (i==j)?1:0; // diagonal 25 } 26 } 27 } 28 29 void copyFrom(const Mat256x256i8& other) { 30 for(int i = 0; i < 256; i++) { 31 for(int j = 0; j < 256; j++) { 32 this->d[j][i] = other.d[j][i]; 33 } 34 } 35 } 36 37 Mat256x256i8() { 38 // this->toIdentityMatrix(); 39 } 40 41 Mat256x256i8(const Mat256x256i8& other) { 42 this->copyFrom(other); 43 } 44 45 void copyFrom_helper(LTCMemory& ltcMem, int offset) { 46 for(int i = 0; i < 256; i++) { 47 const Words32& lo=ltcMem.get(i*4 + offset); 48 const Words32& hi=ltcMem.get(i*4 + 2 + offset); 49 for(int j = 0; j < 64; j++) { 50 uint32_t i32 = j>=32?hi.get(j-32):lo.get(j); 51 d[j*4+0][i] = (i32>> 0) & 0xFF; 52 d[j*4+1][i] = (i32>> 8) & 0xFF; 53 d[j*4+2][i] = (i32>>16) & 0xFF; 54 d[j*4+3][i] = (i32>>24) & 0xFF; 55 } 56 } 57 } 58 59 void copyFromEven(LTCMemory& ltcMem) { 60 copyFrom_helper(ltcMem, 0); 61 } 62 63 void copyFromOdd(LTCMemory& ltcMem) { 64 copyFrom_helper(ltcMem, 1); 65 } 66 67 void add(Mat256x256i8& a, Mat256x256i8& b) { 68 for(int i = 0; i < 256; i++) { 69 for(int j = 0; j < 256; j++) { 70 int tmp = int(a.d[i][j]) + int(b.d[i][j]); 71 this->d[i][j] = (tmp & 0xFF); 72 } 73 } 74 } 75 }; 76 77 struct Mat256x256i16 { 78 int16_t d[256][256]; 79 80 void toIdentityMatrix() { 81 for(int i = 0; i < 256; i++) { 82 for(int j = 0; j < 256; j++) { 83 d[i][j] = (i==j?1:0); // diagonal 84 } 85 } 86 } 87 88 void copyFrom(const Mat256x256i8& other) { 89 for(int i = 0; i < 256; i++) { 90 for(int j = 0; j < 256; j++) { 91 this->d[j][i] = int16_t(other.d[j][i]); 92 assert(this->d[j][i] == other.d[j][i]); 93 } 94 } 95 } 96 97 void copyFrom(const Mat256x256i16& other) { 98 for(int i = 0; i < 256; i++) { 99 for(int j = 0; j < 256; j++) { 100 this->d[j][i] = other.d[j][i]; 101 } 102 } 103 } 104 105 Mat256x256i16() { 106 // this->toIdentityMatrix(); 107 } 108 109 Mat256x256i16(const Mat256x256i16& other) { 110 this->copyFrom(other); 111 } 112 113 void copyFrom_helper(LTCMemory& ltcMem, int offset) { 114 for(int i = 0; i < 256; i++) { 115 const Words32& lo = ltcMem.get(i*4 + offset); 116 const Words32& hi = ltcMem.get(i*4 + 2 + offset); 117 for(int j = 0; j < 64; j++) { 118 uint32_t i32 = j>=32?hi.get(j-32):lo.get(j); 119 d[j*4+0][i] = int8_t((i32>> 0) & 0xFF); 120 d[j*4+1][i] = int8_t((i32>> 8) & 0xFF); 121 d[j*4+2][i] = int8_t((i32>>16) & 0xFF); 122 d[j*4+3][i] = int8_t((i32>>24) & 0xFF); 123 } 124 } 125 } 126 127 void copyFromEven(LTCMemory& ltcMem) { 128 copyFrom_helper(ltcMem, 0); 129 } 130 131 void copyFromOdd(LTCMemory& ltcMem) { 132 copyFrom_helper(ltcMem, 1); 133 } 134 135 void mul(const Mat256x256i16& a, const Mat256x256i16& b) { 136 for(int i = 0; i < 256; i += 16) { 137 for(int j = 0; j < 256; j += 16) { 138 for(int ii = i; ii < i+16; ii += 8) { 139 __m256i r[8],s,t[8],u[8],m[8]; 140 r[0] = _mm256_set1_epi16(0); 141 r[1] = _mm256_set1_epi16(0); 142 r[2] = _mm256_set1_epi16(0); 143 r[3] = _mm256_set1_epi16(0); 144 r[4] = _mm256_set1_epi16(0); 145 r[5] = _mm256_set1_epi16(0); 146 r[6] = _mm256_set1_epi16(0); 147 r[7] = _mm256_set1_epi16(0); 148 for(int k = 0; k < 256; k++) { 149 s = *((__m256i*)(&(b.d[k][j]))); 150 u[0] = _mm256_set1_epi16(a.d[ii+0][k]); 151 u[1] = _mm256_set1_epi16(a.d[ii+1][k]); 152 u[2] = _mm256_set1_epi16(a.d[ii+2][k]); 153 u[3] = _mm256_set1_epi16(a.d[ii+3][k]); 154 u[4] = _mm256_set1_epi16(a.d[ii+4][k]); 155 u[5] = _mm256_set1_epi16(a.d[ii+5][k]); 156 u[6] = _mm256_set1_epi16(a.d[ii+6][k]); 157 u[7] = _mm256_set1_epi16(a.d[ii+7][k]); 158 m[0] = _mm256_mullo_epi16(u[0],s); 159 m[1] = _mm256_mullo_epi16(u[1],s); 160 m[2] = _mm256_mullo_epi16(u[2],s); 161 m[3] = _mm256_mullo_epi16(u[3],s); 162 m[4] = _mm256_mullo_epi16(u[4],s); 163 m[5] = _mm256_mullo_epi16(u[5],s); 164 m[6] = _mm256_mullo_epi16(u[6],s); 165 m[7] = _mm256_mullo_epi16(u[7],s); 166 r[0] = _mm256_add_epi16(r[0],m[0]); 167 r[1] = _mm256_add_epi16(r[1],m[1]); 168 r[2] = _mm256_add_epi16(r[2],m[2]); 169 r[3] = _mm256_add_epi16(r[3],m[3]); 170 r[4] = _mm256_add_epi16(r[4],m[4]); 171 r[5] = _mm256_add_epi16(r[5],m[5]); 172 r[6] = _mm256_add_epi16(r[6],m[6]); 173 r[7] = _mm256_add_epi16(r[7],m[7]); 174 } 175 t[0] = _mm256_slli_epi16(r[0],8); 176 t[1] = _mm256_slli_epi16(r[1],8); 177 t[2] = _mm256_slli_epi16(r[2],8); 178 t[3] = _mm256_slli_epi16(r[3],8); 179 t[4] = _mm256_slli_epi16(r[4],8); 180 t[5] = _mm256_slli_epi16(r[5],8); 181 t[6] = _mm256_slli_epi16(r[6],8); 182 t[7] = _mm256_slli_epi16(r[7],8); 183 t[0] = _mm256_add_epi16(r[0],t[0]); 184 t[1] = _mm256_add_epi16(r[1],t[1]); 185 t[2] = _mm256_add_epi16(r[2],t[2]); 186 t[3] = _mm256_add_epi16(r[3],t[3]); 187 t[4] = _mm256_add_epi16(r[4],t[4]); 188 t[5] = _mm256_add_epi16(r[5],t[5]); 189 t[6] = _mm256_add_epi16(r[6],t[6]); 190 t[7] = _mm256_add_epi16(r[7],t[7]); 191 for(int x = 0; x < 8; x++) { 192 this->d[ii+x][j+0 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*0 +1))); 193 this->d[ii+x][j+1 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*1 +1))); 194 this->d[ii+x][j+2 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*2 +1))); 195 this->d[ii+x][j+3 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*3 +1))); 196 this->d[ii+x][j+4 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*4 +1))); 197 this->d[ii+x][j+5 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*5 +1))); 198 this->d[ii+x][j+6 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*6 +1))); 199 this->d[ii+x][j+7 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*7 +1))); 200 this->d[ii+x][j+8 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*8 +1))); 201 this->d[ii+x][j+9 ] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*9 +1))); 202 this->d[ii+x][j+10] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*10+1))); 203 this->d[ii+x][j+11] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*11+1))); 204 this->d[ii+x][j+12] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*12+1))); 205 this->d[ii+x][j+13] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*13+1))); 206 this->d[ii+x][j+14] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*14+1))); 207 this->d[ii+x][j+15] = int16_t(int8_t(_mm256_extract_epi8(t[x],2*15+1))); 208 } 209 } 210 } 211 } 212 } 213 214 void add(Mat256x256i16& a, Mat256x256i16& b) { 215 for(int i = 0; i < 256; i++) { 216 for(int j = 0; j < 256; j++) { 217 int tmp = int(a.d[i][j]) + int(b.d[i][j]); 218 this->d[i][j] = (tmp & 0xFF); 219 } 220 } 221 } 222 223 void toMatI8(Mat256x256i8& other) { 224 for(int i = 0; i < 256; i++) { 225 for(int j = 0; j < 256; j++) { 226 other.d[j][i] = (this->d[j][i]) & 0xFF; 227 } 228 } 229 } 230 231 void topup(Mat256x256i8& other) { 232 for(int i = 0; i < 256; i++) { 233 for(int j = 0; j < 256; j++) { 234 other.d[j][i] += (this->d[j][i]) & 0xFF; 235 } 236 } 237 } 238 }; 239 240 241 struct Arr256x64i32 { 242 uint32_t d[256][64]; 243 244 uint8_t* d0RawPtr() { 245 return (uint8_t*)(d[0]); 246 } 247 248 Arr256x64i32(const Mat256x256i8& mat) { 249 for(int j = 0; j < 256; j++) { 250 for(int i = 0; i < 64; i++) { 251 d[j][i] = ((uint32_t(uint8_t(mat.d[j][i + 192]))) << 24) | 252 ((uint32_t(uint8_t(mat.d[j][i + 128]))) << 16) | 253 ((uint32_t(uint8_t(mat.d[j][i + 64]))) << 8) | 254 ((uint32_t(uint8_t(mat.d[j][i]))) << 0); 255 } 256 } 257 } 258 259 void reduceFNV() { 260 for(int k = 256; k > 1; k = k/2) { 261 for(int j = 0; j < k/2; j++) { 262 for(int i = 0; i < 64; i++) { 263 d[j][i] = FNV(d[j][i], d[j + k/2][i]); 264 } 265 } 266 } 267 } 268 }; 269 270 // struct BytomMatList8 { 271 // std::vector<Mat256x256i8*> matVec; 272 273 // Mat256x256i8 at(int i) { 274 // return *(matVec[i]); 275 // } 276 277 // BytomMatList8() { 278 // for(int i=0; i<256; i++) { 279 // Mat256x256i8* ptr = new Mat256x256i8; 280 // assert(ptr!=NULL); 281 // matVec.push_back(ptr); 282 // } 283 // } 284 285 // ~BytomMatList8() { 286 // for(int i=0; i<256; i++) { 287 // delete matVec[i]; 288 // } 289 // } 290 291 // void init(const Words32& X_in) { 292 // Words32 X = X_in; 293 // LTCMemory ltcMem; 294 // for(int i=0; i<128; i++) { 295 // ltcMem.scrypt(X); 296 // matVec[2*i]->copyFromEven(ltcMem); 297 // matVec[2*i+1]->copyFromOdd(ltcMem); 298 // } 299 // } 300 // }; 301 302 struct BytomMatList16 { 303 std::vector<Mat256x256i16*> matVec; 304 305 Mat256x256i16 at(int i) { 306 return *(matVec[i]); 307 } 308 309 BytomMatList16() { 310 for(int i = 0; i < 256; i++) { 311 Mat256x256i16* ptr = new Mat256x256i16; 312 assert(ptr != NULL); 313 matVec.push_back(ptr); 314 } 315 } 316 317 ~BytomMatList16() { 318 for(int i = 0; i < 256; i++) 319 delete matVec[i]; 320 } 321 322 void init(const Words32& X_in) { 323 Words32 X = X_in; 324 LTCMemory ltcMem; 325 for(int i = 0; i < 128; i++) { 326 ltcMem.scrypt(X); 327 matVec[2*i]->copyFromEven(ltcMem); 328 matVec[2*i + 1]->copyFromOdd(ltcMem); 329 } 330 } 331 332 // void copyFrom(BytomMatList8& other) { 333 // for(int i=0; i<256; i++) { 334 // matVec[i]->copyFrom(*other.matVec[i]); 335 // } 336 // } 337 338 // void copyFrom(BytomMatList16& other) { 339 // for(int i=0; i<256; i++) { 340 // matVec[i]->copyFrom(*other.matVec[i]); 341 // } 342 // } 343 }; 344 345 // extern BytomMatList8* matList_int8; 346 extern BytomMatList16* matList_int16; 347 348 inline void iter_mineBytom(const uint8_t *fixedMessage, 349 uint32_t len, 350 // uint8_t nonce[8], 351 uint8_t result[32]) { 352 Mat256x256i8 *resArr8 = new Mat256x256i8[4]; 353 354 clock_t start, end; 355 start = clock(); 356 // Itz faster using single thread ... 357 #pragma omp parallel for simd 358 for(int k = 0; k < 4; k++) { // The k-loop 359 sha3_ctx *ctx = new sha3_ctx; 360 Mat256x256i16 *mat16 = new Mat256x256i16; 361 Mat256x256i16 *tmp16 = new Mat256x256i16; 362 uint8_t sequence[32]; 363 rhash_sha3_256_init(ctx); 364 rhash_sha3_update(ctx, fixedMessage + (len*k/4), len/4);//分四轮消耗掉fixedMessage 365 rhash_sha3_final(ctx, sequence); 366 tmp16->toIdentityMatrix(); 367 368 for(int j = 0; j < 2; j++) { 369 // equivalent as tmp=tmp*matlist, i+=1 370 for(int i = 0; i < 32; i += 2) { 371 // "mc = ma dot mb.T" in GoLang code 372 mat16->mul(*tmp16, matList_int16->at(sequence[i])); 373 // "ma = mc" in GoLang code 374 tmp16->mul(*mat16, matList_int16->at(sequence[i+1])); 375 } 376 } 377 // "res[k] = mc" in GoLang code 378 tmp16->toMatI8(resArr8[k]); // 0.00018s 379 delete mat16; 380 delete tmp16; 381 delete ctx; 382 } 383 384 // 3.7e-05s 385 Mat256x256i8 *res8 = new Mat256x256i8; 386 res8->add(resArr8[0], resArr8[1]); 387 res8->add(*res8, resArr8[2]); 388 res8->add(*res8, resArr8[3]); 389 390 end = clock(); 391 // std::cout << "\tTime for getting MulMatix: " 392 // << (double)(end - start) / CLOCKS_PER_SEC * 1000 << "ms" 393 // << std::endl; 394 395 Arr256x64i32 arr(*res8); 396 arr.reduceFNV(); 397 sha3_ctx *ctx = new sha3_ctx; 398 rhash_sha3_256_init(ctx); 399 rhash_sha3_update(ctx, arr.d0RawPtr(), 256); 400 rhash_sha3_final(ctx, result); 401 402 delete res8; 403 delete[] resArr8; 404 delete ctx; 405 } 406 407 inline void incrNonce(uint8_t nonce[8]) { 408 for(int i = 0; i < 8; i++) { 409 if(nonce[i] != 255) { 410 nonce[i]++; 411 break; 412 } else { 413 nonce[i] = 0; 414 } 415 } 416 } 417 418 inline int countLeadingZero(uint8_t result[32]) { 419 int count = 0; 420 for(int i = 31; i >= 0; i--) { // NOTE: reverse 421 if(result[i] < 1) { 422 count += 8; 423 } else if(result[i]<2) { 424 count += 7; 425 break; 426 } else if(result[i]<4) { 427 count += 6; 428 break; 429 } else if(result[i]<8) { 430 count += 5; 431 break; 432 } else if(result[i]<16) { 433 count += 4; 434 break; 435 } else if(result[i]<32) { 436 count += 3; 437 break; 438 } else if(result[i]<64) { 439 count += 2; 440 break; 441 } else if(result[i]<128) { 442 count += 1; 443 break; 444 } 445 } 446 return count; 447 } 448 449 // inline int test_mineBytom( 450 // const uint8_t *fixedMessage, 451 // uint32_t len, 452 // uint8_t nonce[32], 453 // int count, 454 // int leadingZeroThres) 455 // { 456 // assert(len%4==0); 457 // int step; 458 // for(step=0; step<count; step++) { 459 // uint8_t result[32]; 460 // //std::cerr<<"Mine step "<<step<<std::endl; 461 // iter_mineBytom(fixedMessage,100,nonce,result); 462 // std::cerr<<"Mine step "<<step<<std::endl; 463 // for (int i = 0; i < 32; i++) { 464 // printf("%02x ", result[i]); 465 // if (i % 8 == 7) 466 // printf("\n"); 467 // } 468 // if (countLeadingZero(result) > leadingZeroThres) 469 // return step; 470 // incrNonce(nonce); 471 // } 472 // return step; 473 // } 474 475 476 #endif 477