github.com/aclements/go-misc@v0.0.0-20240129233631-2f6ede80790c/split/value.go (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Defensively block building on untested versions: 6 // +build go1.8,!go1.12 7 8 // Package split provides a logical value type that is split across 9 // one or more shards to achieve better parallelism. 10 // 11 // Split values have many uses, but are primarily for optimizing 12 // "write-mostly" shared data structures that have commutative 13 // operations. Split values allow concurrent updates to happen on 14 // different shards, which minimizes contention between updates. 15 // However, reading the entire value requires combining all of these 16 // shards, which is a potentially expensive operation. 17 // 18 // WARNING: This package depends on Go runtime internals. It has been 19 // tested with Go 1.8 through Go 1.10, but may not work with older or 20 // newer versions. 21 package split 22 23 import ( 24 "fmt" 25 "reflect" 26 "runtime" 27 "unsafe" 28 ) 29 30 const cacheLineBytes = 128 31 32 // Value represents a logical value split across one or more shards. 33 // The shards are arranged to minimize contention when different 34 // shards are accessed concurrently. 35 type Value struct { 36 store unsafe.Pointer 37 ptrType unsafe.Pointer 38 shardSize uintptr 39 len int 40 cbType reflect.Type 41 } 42 43 type emptyInterface struct { 44 typ unsafe.Pointer 45 word unsafe.Pointer 46 } 47 48 // New returns a new Value. The constructor argument must be a 49 // function with type func(*T), where T determines the type that will 50 // be stored in each shard. New will initialize each shard to the zero 51 // value of T and then call constructor with a pointer to the shard to 52 // perform any further initialization. The constructor function may 53 // also be called in the future if new shards are created. 54 func New(constructor interface{}) *Value { 55 ct := reflect.TypeOf(constructor) 56 if ct.Kind() != reflect.Func || ct.NumIn() != 1 || ct.NumOut() != 0 || ct.In(0).Kind() != reflect.Ptr { 57 panic("New constructor must be func(*T) for some type T") 58 } 59 et := ct.In(0).Elem() 60 61 // Embed et in a struct so we can pad it out to a cache line. 62 // 63 // TODO: If et is small, this can stride-allocate multiple 64 // Values together. Would need non-trivial runtime support, 65 // but would save a lot of space. We could do this for 66 // pointer-free types without runtime support and maybe types 67 // that are just a pointer. 68 shardSize := (et.Size() + (cacheLineBytes - 1)) &^ (cacheLineBytes - 1) 69 padding := shardSize - et.Size() 70 padded := reflect.StructOf([]reflect.StructField{ 71 {Name: "X", Type: et}, 72 {Name: "Pad", Type: reflect.ArrayOf(int(padding), byteType)}, 73 }) 74 75 // Allocate backing store. 76 nproc := runtime.GOMAXPROCS(-1) 77 store := reflect.New(reflect.ArrayOf(nproc, padded)) 78 79 // Get pointer-to-element type. 80 pet := reflect.PtrTo(et) 81 petz := reflect.Zero(pet).Interface() 82 ptrType := (*emptyInterface)(unsafe.Pointer(&petz)).typ 83 84 v := &Value{ 85 store: unsafe.Pointer(store.Pointer()), 86 ptrType: ptrType, 87 shardSize: shardSize, 88 len: nproc, 89 cbType: ct, // func(T*) type, same as constructor. 90 } 91 92 // Initialize each shard. 93 v.Range(constructor) 94 95 return v 96 } 97 98 var byteType = reflect.TypeOf(byte(0)) 99 100 // Get returns a pointer to some shard of v. 101 // 102 // Get may return the same pointer to multiple goroutines, so the 103 // caller is responsible for synchronizing concurrent access to the 104 // returned value. This can be done using atomic operations or locks, 105 // just like any other shared structure. 106 // 107 // Get attempts to maintain CPU locality and contention-freedom of 108 // shards. That is, two calls to Get from the same CPU are likely to 109 // return the same pointer, while calls to Get from different CPUs are 110 // likely to return different pointers. Furthermore, accessing 111 // different shards in parallel is unlikely to result in cache 112 // contention. 113 func (v *Value) Get() interface{} { 114 // Get the P ID. 115 // 116 // TODO: Could use CPU ID instead of P ID. Would get even 117 // better cache locality and limit might be more fixed. 118 // 119 // TODO: We don't need pinning here. 120 pid := runtime_procPin() 121 runtime_procUnpin() 122 123 // This is 10% faster than procPin/procUnpin. It requires the 124 // following patch to the runtime: 125 ////go:linkname sync_split_procID sync/split.procID 126 //func sync_split_procID() int { 127 // return int(getg().m.p.ptr().id) 128 //} 129 //pid := procID() 130 131 // This is 30% faster than procPin/procUnpin. It requires the 132 // following patch to the runtime: 133 //func ProcID() int { 134 // return int(getg().m.p.ptr().id) 135 //} 136 // However, it's unclear how to do this without exposing public API. 137 //pid := runtime.ProcID() 138 139 if pid > v.len { 140 // TODO: Grow the backing store if pid is larger than 141 // store. This is tricky because we may have handed 142 // out pointers into the current store. Probably this 143 // is only possible with a level of indirection that 144 // lets us allocate the backing store in multiple 145 // segments. Then we can do an RCU-style update on the 146 // index structure. We may want to limit the number of 147 // shards to something sane anyway (e.g., 1024). How 148 // would this synchronize with Range? E.g., if Range 149 // iterator is going through locking everything, it 150 // would be bad if Get then made a new, unlocked 151 // element. 152 pid = int(uint(pid) % uint(v.len)) 153 } 154 val := emptyInterface{ 155 typ: v.ptrType, 156 word: v.shard(pid), 157 } 158 return *(*interface{})(unsafe.Pointer(&val)) 159 } 160 161 func (v *Value) shard(shard int) unsafe.Pointer { 162 // The caller must ensure that 0 <= shard < v.len. 163 return unsafe.Pointer(uintptr(v.store) + v.shardSize*uintptr(shard)) 164 } 165 166 // Range calls each of its argument functions with pointers to all of 167 // the shards in v. Each argument must be a function with type 168 // func(*T), where T is the shard type of the Value. 169 // 170 // Range calls its first argument N times with a pointer to each of 171 // the N shards of v. It then calls its second argument with each 172 // shard, and so on. Range guarantees that the set of shards and their 173 // order will not change during this process. This makes it safe to 174 // implement multi-pass algorithms, such as locking all of the shards 175 // and then unlocking all of the shards. 176 // 177 // Multiple calls to Range are not guaranteed to observe the same set 178 // of shards, so algorithms that need a consistent view of the shards 179 // must make a single call to Range with multiple functions. 180 // 181 // Multiple calls to Range are guaranteed to traverse the shards in a 182 // consistent order. While different calls may traverse more or fewer 183 // shards, if any Range traverses shard A before shard B, all Range 184 // calls will do so. Uses of Range that acquire locks on multiple 185 // shards can depend on this for lock ordering. 186 // 187 // Range calls each function sequentially, so it's safe to update 188 // local state without synchronization. However, the functions may run 189 // concurrently with other goroutines calling Get or Range, so they 190 // must synchronize access to shard values. 191 func (v *Value) Range(fn ...interface{}) { 192 // "Type check" all of the fn arguments before calling 193 // anything. 194 // 195 // TODO: Accept any func(U) where *T is assignable to U (like 196 // runtime.SetFinalizer). 197 for _, fn1 := range fn { 198 if reflect.TypeOf(fn1) != v.cbType { 199 panic(fmt.Sprintf("Range expected %s, got %T", v.cbType, fn1)) 200 } 201 } 202 203 // TODO: If we grow the backing store, this needs to block 204 // growing if there are multiple passes (it doesn't have to if 205 // there's one pass, but it has to handle it very carefully). 206 for _, fn1 := range fn { 207 // Cast fn1 to a function with equivalent calling 208 // convention. 209 var fn1Generic func(unsafe.Pointer) 210 *(*unsafe.Pointer)(unsafe.Pointer(&fn1Generic)) = ((*emptyInterface)(unsafe.Pointer(&fn1)).word) 211 // Call function on each shard. 212 for i := 0; i < v.len; i++ { 213 fn1Generic(v.shard(i)) 214 } 215 } 216 } 217 218 //go:linkname runtime_procPin runtime.procPin 219 func runtime_procPin() int 220 221 //go:linkname runtime_procUnpin runtime.procUnpin 222 func runtime_procUnpin() 223 224 // Provided by the runtime (with patch above). 225 func procID() int