github.com/deroproject/derosuite@v2.1.6-1.0.20200307070847-0f2e589c7a2b+incompatible/astrobwt/astrobwt_optimized.go (about)

     1  package astrobwt
     2  
     3  //import "os"
     4  //import "fmt"
     5  
     6  import "sync"
     7  import "encoding/binary"
     8  import "golang.org/x/crypto/sha3"
     9  
    10  import "golang.org/x/crypto/salsa20/salsa"
    11  
    12  // see here to improve the algorithms more https://github.com/y-256/libdivsufsort/blob/wiki/SACA_Benchmarks.md
    13  
    14  // Original implementation was in xmrig miner, however it had a flaw which has been fixed
    15  // this optimized algorithm is used only  in the miner and not in the blockchain
    16  
    17  //const stage1_length int = 147253 // it is a prime
    18  //const max_length int = 1024*1024 + stage1_length + 1024
    19  
    20  type Data struct {
    21  	stage1        [stage1_length + 64]byte // stages are taken from it
    22  	stage1_result [stage1_length + 1]byte
    23  	stage2        [1024*1024 + stage1_length + 1 + 64]byte
    24  	stage2_result [1024*1024 + stage1_length + 1]byte
    25  	indices       [ALLOCATION_SIZE]uint64
    26  	tmp_indices   [ALLOCATION_SIZE]uint64
    27  }
    28  
    29  var pool = sync.Pool{New: func() interface{} { return &Data{} }}
    30  
    31  func POW_optimized_v1(inputdata []byte, max_limit int) (outputhash [32]byte, success bool) {
    32  	data := pool.Get().(*Data)
    33  	outputhash, success = POW_optimized_v2(inputdata,max_limit,data)
    34  	pool.Put(data)
    35  	return
    36  }
    37  func POW_optimized_v2(inputdata []byte, max_limit int, data *Data) (outputhash [32]byte, success bool) {
    38  
    39  	var counter [16]byte
    40  
    41  
    42  	for i := range data.stage1 {
    43  		data.stage1[i] = 0
    44  	}
    45  	/* for i := range data.stage1_result{
    46  	    data.stage1_result[i] =0
    47  	}*/
    48  
    49  	key := sha3.Sum256(inputdata)
    50  	salsa.XORKeyStream(data.stage1[1:stage1_length+1], data.stage1[1:stage1_length+1], &counter, &key)
    51  	sort_indices(stage1_length+1, data.stage1[:], data.stage1_result[:], data)
    52  	key = sha3.Sum256(data.stage1_result[:])
    53  	stage2_length := stage1_length + int(binary.LittleEndian.Uint32(key[:])&0xfffff)
    54  
    55      if stage2_length > max_limit {
    56          for i := range outputhash { // will be optimized by compiler
    57  		    outputhash[i] = 0xff
    58  	    }
    59          success = false
    60        return
    61      }
    62  
    63  	for i := range counter { // will be optimized by compiler
    64  		counter[i] = 0
    65  	}
    66  
    67  	salsa.XORKeyStream(data.stage2[1:stage2_length+1], data.stage2[1:stage2_length+1], &counter, &key)
    68  	sort_indices(stage2_length+1, data.stage2[:], data.stage2_result[:], data)
    69  	key = sha3.Sum256(data.stage2_result[:stage2_length+1])
    70  	for i := range data.stage2{
    71  	    data.stage2[i] =0
    72  	}
    73  
    74  	copy(outputhash[:], key[:])
    75      success = true
    76  	return
    77  }
    78  
    79  const COUNTING_SORT_BITS uint64 = 10
    80  const COUNTING_SORT_SIZE uint64 = 1 << COUNTING_SORT_BITS
    81  
    82  const ALLOCATION_SIZE = MAX_LENGTH
    83  
    84  func BigEndian_Uint64(b []byte) uint64 {
    85  	_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
    86  	return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 |
    87  		uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56
    88  }
    89  
    90  func smaller(input []uint8, a, b uint64) bool {
    91  	value_a := a >> 21
    92  	value_b := b >> 21
    93  
    94  	if value_a < value_b {
    95  		return true
    96  	}
    97  
    98  	if value_a > value_b {
    99  		return false
   100  	}
   101  
   102  	data_a := BigEndian_Uint64(input[(a%(1<<21))+5:])
   103  	data_b := BigEndian_Uint64(input[(b%(1<<21))+5:])
   104  	return data_a < data_b
   105  }
   106  
   107  // basically
   108  func sort_indices(N int, input_extra []byte, output []byte, d *Data) {
   109  
   110  	var counters [2][COUNTING_SORT_SIZE]uint32
   111  	indices := d.indices[:]
   112  	tmp_indices := d.tmp_indices[:]
   113  
   114  	input := input_extra[1:]
   115  
   116  	loop3 := N / 3 * 3
   117  	for i := 0; i < loop3; i += 3 {
   118  		k0 := BigEndian_Uint64(input[i:])
   119  		counters[0][(k0>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)]++
   120  		counters[1][k0>>(64-COUNTING_SORT_BITS)]++
   121  		k1 := k0 << 8
   122  		counters[0][(k1>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)]++
   123  		counters[1][k1>>(64-COUNTING_SORT_BITS)]++
   124  		k2 := k0 << 16
   125  		counters[0][(k2>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)]++
   126  		counters[1][k2>>(64-COUNTING_SORT_BITS)]++
   127  	}
   128  
   129  	if N%3 != 0 {
   130  		for i := loop3; i < N; i++ {
   131  			k := BigEndian_Uint64(input[i:])
   132  			counters[0][(k>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)]++
   133  			counters[1][k>>(64-COUNTING_SORT_BITS)]++
   134  		}
   135  	}
   136  
   137  	/*
   138  	   	for i := 0; i < N ; i++{
   139  	   		k := BigEndian_Uint64(input[i:])
   140  	     		counters[0][(k >> (64 - COUNTING_SORT_BITS * 2)) & (COUNTING_SORT_SIZE - 1)]++
   141  	   		counters[1][k >> (64 - COUNTING_SORT_BITS)]++
   142  	   	}
   143  	*/
   144  
   145  	prev := [2]uint32{counters[0][0], counters[1][0]}
   146  	counters[0][0] = prev[0] - 1
   147  	counters[1][0] = prev[1] - 1
   148  	var cur [2]uint32
   149  	for i := uint64(1); i < COUNTING_SORT_SIZE; i++ {
   150  		cur[0], cur[1] = counters[0][i]+prev[0], counters[1][i]+prev[1]
   151  		counters[0][i] = cur[0] - 1
   152  		counters[1][i] = cur[1] - 1
   153  		prev[0] = cur[0]
   154  		prev[1] = cur[1]
   155  	}
   156  
   157  	for i := N - 1; i >= 0; i-- {
   158  		k := BigEndian_Uint64(input[i:])
   159  		// FFFFFFFFFFE00000 =  (0xFFFFFFFFFFFFFFF<< 21)  // to clear bottom 21 bits
   160  		tmp := counters[0][(k>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)]
   161  		counters[0][(k>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)]--
   162  
   163  		tmp_indices[tmp] = (k & 0xFFFFFFFFFFE00000) | uint64(i)
   164  	}
   165  
   166  	for i := N - 1; i >= 0; i-- {
   167  		data := tmp_indices[i]
   168  		tmp := counters[1][data>>(64-COUNTING_SORT_BITS)]
   169  		counters[1][data>>(64-COUNTING_SORT_BITS)]--
   170  		indices[tmp] = data
   171  	}
   172  
   173  	prev_t := indices[0]
   174  	for i := 1; i < N; i++ {
   175  		t := indices[i]
   176  		if smaller(input, t, prev_t) {
   177  			t2 := prev_t
   178  			j := i - 1
   179  			for {
   180  				indices[j+1] = prev_t
   181  				j--
   182  				if j < 0 {
   183  					break
   184  				}
   185  				prev_t = indices[j]
   186  				if !smaller(input, t, prev_t) {
   187  					break
   188  				}
   189  			}
   190  			indices[j+1] = t
   191  			t = t2
   192  		}
   193  		prev_t = t
   194  	}
   195  
   196  	// optimized unrolled code below this comment
   197  	/*for i := 0; i < N;i++{
   198  		output[i] =  input_extra[indices[i] & ((1 << 21) - 1) ]
   199  	}*/
   200  
   201  	loop4 := ((N + 1) / 4) * 4
   202  	for i := 0; i < loop4; i += 4 {
   203  		output[i+0] = input_extra[indices[i+0]&((1<<21)-1)]
   204  		output[i+1] = input_extra[indices[i+1]&((1<<21)-1)]
   205  		output[i+2] = input_extra[indices[i+2]&((1<<21)-1)]
   206  		output[i+3] = input_extra[indices[i+3]&((1<<21)-1)]
   207  	}
   208  	for i := loop4; i < N; i++ {
   209  		output[i] = input_extra[indices[i]&((1<<21)-1)]
   210  	}
   211  
   212  	// there is an issue above, if the last byte of input is 0x00, initialbytes are wrong, this fix may not be complete
   213  	if N > 3 && input[N-2] == 0 {
   214  		backup_byte := output[0]
   215  		output[0] = 0
   216  		for i := 1; i < N; i++ {
   217  			if output[i] != 0 {
   218  				output[i-1] = backup_byte
   219  				break
   220  			}
   221  		}
   222  	}
   223  
   224  }