gitee.com/quant1x/num@v0.3.2/vectors/vectors.go

gitee.com/quant1x/num@v0.3.2/vectors/vectors.go (about)

     1  package vectors
     2  
     3  import "unsafe"
     4  
     5  // Since we cannot return an error on overflow,
     6  //	// we should panic if the repeat will generate an overflow.
     7  //	// See golang.org/issue/16237.
     8  //	if count < 0 {
     9  //		panic("bytes: negative Repeat count")
    10  //	}
    11  //	if len(b) >= maxInt/count {
    12  //		panic("bytes: Repeat output length overflow")
    13  //	}
    14  //	n := len(b) * count
    15  //
    16  //	if len(b) == 0 {
    17  //		return []byte{}
    18  //	}
    19  //
    20  //	// Past a certain chunk size it is counterproductive to use
    21  //	// larger chunks as the source of the write, as when the source
    22  //	// is too large we are basically just thrashing the CPU D-cache.
    23  //	// So if the result length is larger than an empirically-found
    24  //	// limit (8KB), we stop growing the source string once the limit
    25  //	// is reached and keep reusing the same source string - that
    26  //	// should therefore be always resident in the L1 cache - until we
    27  //	// have completed the construction of the result.
    28  //	// This yields significant speedups (up to +100%) in cases where
    29  //	// the result length is large (roughly, over L2 cache size).
    30  //	const chunkLimit = 8 * 1024
    31  //	chunkMax := n
    32  //	if chunkMax > chunkLimit {
    33  //		chunkMax = chunkLimit / len(b) * len(b)
    34  //		if chunkMax == 0 {
    35  //			chunkMax = len(b)
    36  //		}
    37  //	}
    38  
    39  const (
    40  	// Past a certain chunk size it is counterproductive to use
    41  	// larger chunks as the source of the write, as when the source
    42  	// is too large we are basically just thrashing the CPU D-cache.
    43  	// So if the result length is larger than an empirically-found
    44  	// limit (8KB), we stop growing the source string once the limit
    45  	// is reached and keep reusing the same source string - that
    46  	// should therefore be always resident in the L1 cache - until we
    47  	// have completed the construction of the result.
    48  	// This yields significant speedups (up to +100%) in cases where
    49  	// the result length is large (roughly, over L2 cache size).
    50  	// see bytes.Repeat
    51  	cpuDCacheChunkLimit = 8 * 1024
    52  )
    53  
    54  // CpuChunkMax 计算一次最多可以copy多少个E
    55  func CpuChunkMax[E any](t E, count int) int {
    56  	elementSize := int(unsafe.Sizeof(t))
    57  	chunkMax := count
    58  	if chunkMax*elementSize > cpuDCacheChunkLimit {
    59  		chunkMax = cpuDCacheChunkLimit / elementSize
    60  		if chunkMax == 0 {
    61  			chunkMax = 1
    62  		}
    63  	}
    64  	return chunkMax
    65  }
    66  
    67  // Repeat 在已申请内存的x切片中, 重复count次a
    68  func Repeat[E any](x []E, a E, count int) {
    69  	low := copy(x[0:], []E{a})
    70  	if low == 0 {
    71  		return
    72  	}
    73  	n := len(x)
    74  	if count > n {
    75  		count = n
    76  	}
    77  	chunkMax := CpuChunkMax(a, count)
    78  	for low < count {
    79  		chunk := low
    80  		if chunk > chunkMax {
    81  			chunk = chunkMax
    82  		}
    83  		low += copy(x[low:], x[:chunk])
    84  	}
    85  }