gitee.com/quant1x/num@v0.3.2/vectors/vectors.go (about) 1 package vectors 2 3 import "unsafe" 4 5 // Since we cannot return an error on overflow, 6 // // we should panic if the repeat will generate an overflow. 7 // // See golang.org/issue/16237. 8 // if count < 0 { 9 // panic("bytes: negative Repeat count") 10 // } 11 // if len(b) >= maxInt/count { 12 // panic("bytes: Repeat output length overflow") 13 // } 14 // n := len(b) * count 15 // 16 // if len(b) == 0 { 17 // return []byte{} 18 // } 19 // 20 // // Past a certain chunk size it is counterproductive to use 21 // // larger chunks as the source of the write, as when the source 22 // // is too large we are basically just thrashing the CPU D-cache. 23 // // So if the result length is larger than an empirically-found 24 // // limit (8KB), we stop growing the source string once the limit 25 // // is reached and keep reusing the same source string - that 26 // // should therefore be always resident in the L1 cache - until we 27 // // have completed the construction of the result. 28 // // This yields significant speedups (up to +100%) in cases where 29 // // the result length is large (roughly, over L2 cache size). 30 // const chunkLimit = 8 * 1024 31 // chunkMax := n 32 // if chunkMax > chunkLimit { 33 // chunkMax = chunkLimit / len(b) * len(b) 34 // if chunkMax == 0 { 35 // chunkMax = len(b) 36 // } 37 // } 38 39 const ( 40 // Past a certain chunk size it is counterproductive to use 41 // larger chunks as the source of the write, as when the source 42 // is too large we are basically just thrashing the CPU D-cache. 43 // So if the result length is larger than an empirically-found 44 // limit (8KB), we stop growing the source string once the limit 45 // is reached and keep reusing the same source string - that 46 // should therefore be always resident in the L1 cache - until we 47 // have completed the construction of the result. 48 // This yields significant speedups (up to +100%) in cases where 49 // the result length is large (roughly, over L2 cache size). 50 // see bytes.Repeat 51 cpuDCacheChunkLimit = 8 * 1024 52 ) 53 54 // CpuChunkMax 计算一次最多可以copy多少个E 55 func CpuChunkMax[E any](t E, count int) int { 56 elementSize := int(unsafe.Sizeof(t)) 57 chunkMax := count 58 if chunkMax*elementSize > cpuDCacheChunkLimit { 59 chunkMax = cpuDCacheChunkLimit / elementSize 60 if chunkMax == 0 { 61 chunkMax = 1 62 } 63 } 64 return chunkMax 65 } 66 67 // Repeat 在已申请内存的x切片中, 重复count次a 68 func Repeat[E any](x []E, a E, count int) { 69 low := copy(x[0:], []E{a}) 70 if low == 0 { 71 return 72 } 73 n := len(x) 74 if count > n { 75 count = n 76 } 77 chunkMax := CpuChunkMax(a, count) 78 for low < count { 79 chunk := low 80 if chunk > chunkMax { 81 chunk = chunkMax 82 } 83 low += copy(x[low:], x[:chunk]) 84 } 85 }