github.com/deroproject/derosuite@v2.1.6-1.0.20200307070847-0f2e589c7a2b+incompatible/astrobwt/astrobwt_optimized.go (about) 1 package astrobwt 2 3 //import "os" 4 //import "fmt" 5 6 import "sync" 7 import "encoding/binary" 8 import "golang.org/x/crypto/sha3" 9 10 import "golang.org/x/crypto/salsa20/salsa" 11 12 // see here to improve the algorithms more https://github.com/y-256/libdivsufsort/blob/wiki/SACA_Benchmarks.md 13 14 // Original implementation was in xmrig miner, however it had a flaw which has been fixed 15 // this optimized algorithm is used only in the miner and not in the blockchain 16 17 //const stage1_length int = 147253 // it is a prime 18 //const max_length int = 1024*1024 + stage1_length + 1024 19 20 type Data struct { 21 stage1 [stage1_length + 64]byte // stages are taken from it 22 stage1_result [stage1_length + 1]byte 23 stage2 [1024*1024 + stage1_length + 1 + 64]byte 24 stage2_result [1024*1024 + stage1_length + 1]byte 25 indices [ALLOCATION_SIZE]uint64 26 tmp_indices [ALLOCATION_SIZE]uint64 27 } 28 29 var pool = sync.Pool{New: func() interface{} { return &Data{} }} 30 31 func POW_optimized_v1(inputdata []byte, max_limit int) (outputhash [32]byte, success bool) { 32 data := pool.Get().(*Data) 33 outputhash, success = POW_optimized_v2(inputdata,max_limit,data) 34 pool.Put(data) 35 return 36 } 37 func POW_optimized_v2(inputdata []byte, max_limit int, data *Data) (outputhash [32]byte, success bool) { 38 39 var counter [16]byte 40 41 42 for i := range data.stage1 { 43 data.stage1[i] = 0 44 } 45 /* for i := range data.stage1_result{ 46 data.stage1_result[i] =0 47 }*/ 48 49 key := sha3.Sum256(inputdata) 50 salsa.XORKeyStream(data.stage1[1:stage1_length+1], data.stage1[1:stage1_length+1], &counter, &key) 51 sort_indices(stage1_length+1, data.stage1[:], data.stage1_result[:], data) 52 key = sha3.Sum256(data.stage1_result[:]) 53 stage2_length := stage1_length + int(binary.LittleEndian.Uint32(key[:])&0xfffff) 54 55 if stage2_length > max_limit { 56 for i := range outputhash { // will be optimized by compiler 57 outputhash[i] = 0xff 58 } 59 success = false 60 return 61 } 62 63 for i := range counter { // will be optimized by compiler 64 counter[i] = 0 65 } 66 67 salsa.XORKeyStream(data.stage2[1:stage2_length+1], data.stage2[1:stage2_length+1], &counter, &key) 68 sort_indices(stage2_length+1, data.stage2[:], data.stage2_result[:], data) 69 key = sha3.Sum256(data.stage2_result[:stage2_length+1]) 70 for i := range data.stage2{ 71 data.stage2[i] =0 72 } 73 74 copy(outputhash[:], key[:]) 75 success = true 76 return 77 } 78 79 const COUNTING_SORT_BITS uint64 = 10 80 const COUNTING_SORT_SIZE uint64 = 1 << COUNTING_SORT_BITS 81 82 const ALLOCATION_SIZE = MAX_LENGTH 83 84 func BigEndian_Uint64(b []byte) uint64 { 85 _ = b[7] // bounds check hint to compiler; see golang.org/issue/14808 86 return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | 87 uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56 88 } 89 90 func smaller(input []uint8, a, b uint64) bool { 91 value_a := a >> 21 92 value_b := b >> 21 93 94 if value_a < value_b { 95 return true 96 } 97 98 if value_a > value_b { 99 return false 100 } 101 102 data_a := BigEndian_Uint64(input[(a%(1<<21))+5:]) 103 data_b := BigEndian_Uint64(input[(b%(1<<21))+5:]) 104 return data_a < data_b 105 } 106 107 // basically 108 func sort_indices(N int, input_extra []byte, output []byte, d *Data) { 109 110 var counters [2][COUNTING_SORT_SIZE]uint32 111 indices := d.indices[:] 112 tmp_indices := d.tmp_indices[:] 113 114 input := input_extra[1:] 115 116 loop3 := N / 3 * 3 117 for i := 0; i < loop3; i += 3 { 118 k0 := BigEndian_Uint64(input[i:]) 119 counters[0][(k0>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)]++ 120 counters[1][k0>>(64-COUNTING_SORT_BITS)]++ 121 k1 := k0 << 8 122 counters[0][(k1>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)]++ 123 counters[1][k1>>(64-COUNTING_SORT_BITS)]++ 124 k2 := k0 << 16 125 counters[0][(k2>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)]++ 126 counters[1][k2>>(64-COUNTING_SORT_BITS)]++ 127 } 128 129 if N%3 != 0 { 130 for i := loop3; i < N; i++ { 131 k := BigEndian_Uint64(input[i:]) 132 counters[0][(k>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)]++ 133 counters[1][k>>(64-COUNTING_SORT_BITS)]++ 134 } 135 } 136 137 /* 138 for i := 0; i < N ; i++{ 139 k := BigEndian_Uint64(input[i:]) 140 counters[0][(k >> (64 - COUNTING_SORT_BITS * 2)) & (COUNTING_SORT_SIZE - 1)]++ 141 counters[1][k >> (64 - COUNTING_SORT_BITS)]++ 142 } 143 */ 144 145 prev := [2]uint32{counters[0][0], counters[1][0]} 146 counters[0][0] = prev[0] - 1 147 counters[1][0] = prev[1] - 1 148 var cur [2]uint32 149 for i := uint64(1); i < COUNTING_SORT_SIZE; i++ { 150 cur[0], cur[1] = counters[0][i]+prev[0], counters[1][i]+prev[1] 151 counters[0][i] = cur[0] - 1 152 counters[1][i] = cur[1] - 1 153 prev[0] = cur[0] 154 prev[1] = cur[1] 155 } 156 157 for i := N - 1; i >= 0; i-- { 158 k := BigEndian_Uint64(input[i:]) 159 // FFFFFFFFFFE00000 = (0xFFFFFFFFFFFFFFF<< 21) // to clear bottom 21 bits 160 tmp := counters[0][(k>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)] 161 counters[0][(k>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)]-- 162 163 tmp_indices[tmp] = (k & 0xFFFFFFFFFFE00000) | uint64(i) 164 } 165 166 for i := N - 1; i >= 0; i-- { 167 data := tmp_indices[i] 168 tmp := counters[1][data>>(64-COUNTING_SORT_BITS)] 169 counters[1][data>>(64-COUNTING_SORT_BITS)]-- 170 indices[tmp] = data 171 } 172 173 prev_t := indices[0] 174 for i := 1; i < N; i++ { 175 t := indices[i] 176 if smaller(input, t, prev_t) { 177 t2 := prev_t 178 j := i - 1 179 for { 180 indices[j+1] = prev_t 181 j-- 182 if j < 0 { 183 break 184 } 185 prev_t = indices[j] 186 if !smaller(input, t, prev_t) { 187 break 188 } 189 } 190 indices[j+1] = t 191 t = t2 192 } 193 prev_t = t 194 } 195 196 // optimized unrolled code below this comment 197 /*for i := 0; i < N;i++{ 198 output[i] = input_extra[indices[i] & ((1 << 21) - 1) ] 199 }*/ 200 201 loop4 := ((N + 1) / 4) * 4 202 for i := 0; i < loop4; i += 4 { 203 output[i+0] = input_extra[indices[i+0]&((1<<21)-1)] 204 output[i+1] = input_extra[indices[i+1]&((1<<21)-1)] 205 output[i+2] = input_extra[indices[i+2]&((1<<21)-1)] 206 output[i+3] = input_extra[indices[i+3]&((1<<21)-1)] 207 } 208 for i := loop4; i < N; i++ { 209 output[i] = input_extra[indices[i]&((1<<21)-1)] 210 } 211 212 // there is an issue above, if the last byte of input is 0x00, initialbytes are wrong, this fix may not be complete 213 if N > 3 && input[N-2] == 0 { 214 backup_byte := output[0] 215 output[0] = 0 216 for i := 1; i < N; i++ { 217 if output[i] != 0 { 218 output[i-1] = backup_byte 219 break 220 } 221 } 222 } 223 224 }