github.com/aclements/go-misc@v0.0.0-20240129233631-2f6ede80790c/split/value.go (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Defensively block building on untested versions:
     6  // +build go1.8,!go1.12
     7  
     8  // Package split provides a logical value type that is split across
     9  // one or more shards to achieve better parallelism.
    10  //
    11  // Split values have many uses, but are primarily for optimizing
    12  // "write-mostly" shared data structures that have commutative
    13  // operations. Split values allow concurrent updates to happen on
    14  // different shards, which minimizes contention between updates.
    15  // However, reading the entire value requires combining all of these
    16  // shards, which is a potentially expensive operation.
    17  //
    18  // WARNING: This package depends on Go runtime internals. It has been
    19  // tested with Go 1.8 through Go 1.10, but may not work with older or
    20  // newer versions.
    21  package split
    22  
    23  import (
    24  	"fmt"
    25  	"reflect"
    26  	"runtime"
    27  	"unsafe"
    28  )
    29  
    30  const cacheLineBytes = 128
    31  
    32  // Value represents a logical value split across one or more shards.
    33  // The shards are arranged to minimize contention when different
    34  // shards are accessed concurrently.
    35  type Value struct {
    36  	store     unsafe.Pointer
    37  	ptrType   unsafe.Pointer
    38  	shardSize uintptr
    39  	len       int
    40  	cbType    reflect.Type
    41  }
    42  
    43  type emptyInterface struct {
    44  	typ  unsafe.Pointer
    45  	word unsafe.Pointer
    46  }
    47  
    48  // New returns a new Value. The constructor argument must be a
    49  // function with type func(*T), where T determines the type that will
    50  // be stored in each shard. New will initialize each shard to the zero
    51  // value of T and then call constructor with a pointer to the shard to
    52  // perform any further initialization. The constructor function may
    53  // also be called in the future if new shards are created.
    54  func New(constructor interface{}) *Value {
    55  	ct := reflect.TypeOf(constructor)
    56  	if ct.Kind() != reflect.Func || ct.NumIn() != 1 || ct.NumOut() != 0 || ct.In(0).Kind() != reflect.Ptr {
    57  		panic("New constructor must be func(*T) for some type T")
    58  	}
    59  	et := ct.In(0).Elem()
    60  
    61  	// Embed et in a struct so we can pad it out to a cache line.
    62  	//
    63  	// TODO: If et is small, this can stride-allocate multiple
    64  	// Values together. Would need non-trivial runtime support,
    65  	// but would save a lot of space. We could do this for
    66  	// pointer-free types without runtime support and maybe types
    67  	// that are just a pointer.
    68  	shardSize := (et.Size() + (cacheLineBytes - 1)) &^ (cacheLineBytes - 1)
    69  	padding := shardSize - et.Size()
    70  	padded := reflect.StructOf([]reflect.StructField{
    71  		{Name: "X", Type: et},
    72  		{Name: "Pad", Type: reflect.ArrayOf(int(padding), byteType)},
    73  	})
    74  
    75  	// Allocate backing store.
    76  	nproc := runtime.GOMAXPROCS(-1)
    77  	store := reflect.New(reflect.ArrayOf(nproc, padded))
    78  
    79  	// Get pointer-to-element type.
    80  	pet := reflect.PtrTo(et)
    81  	petz := reflect.Zero(pet).Interface()
    82  	ptrType := (*emptyInterface)(unsafe.Pointer(&petz)).typ
    83  
    84  	v := &Value{
    85  		store:     unsafe.Pointer(store.Pointer()),
    86  		ptrType:   ptrType,
    87  		shardSize: shardSize,
    88  		len:       nproc,
    89  		cbType:    ct, // func(T*) type, same as constructor.
    90  	}
    91  
    92  	// Initialize each shard.
    93  	v.Range(constructor)
    94  
    95  	return v
    96  }
    97  
    98  var byteType = reflect.TypeOf(byte(0))
    99  
   100  // Get returns a pointer to some shard of v.
   101  //
   102  // Get may return the same pointer to multiple goroutines, so the
   103  // caller is responsible for synchronizing concurrent access to the
   104  // returned value. This can be done using atomic operations or locks,
   105  // just like any other shared structure.
   106  //
   107  // Get attempts to maintain CPU locality and contention-freedom of
   108  // shards. That is, two calls to Get from the same CPU are likely to
   109  // return the same pointer, while calls to Get from different CPUs are
   110  // likely to return different pointers. Furthermore, accessing
   111  // different shards in parallel is unlikely to result in cache
   112  // contention.
   113  func (v *Value) Get() interface{} {
   114  	// Get the P ID.
   115  	//
   116  	// TODO: Could use CPU ID instead of P ID. Would get even
   117  	// better cache locality and limit might be more fixed.
   118  	//
   119  	// TODO: We don't need pinning here.
   120  	pid := runtime_procPin()
   121  	runtime_procUnpin()
   122  
   123  	// This is 10% faster than procPin/procUnpin. It requires the
   124  	// following patch to the runtime:
   125  	////go:linkname sync_split_procID sync/split.procID
   126  	//func sync_split_procID() int {
   127  	//	return int(getg().m.p.ptr().id)
   128  	//}
   129  	//pid := procID()
   130  
   131  	// This is 30% faster than procPin/procUnpin. It requires the
   132  	// following patch to the runtime:
   133  	//func ProcID() int {
   134  	//	return int(getg().m.p.ptr().id)
   135  	//}
   136  	// However, it's unclear how to do this without exposing public API.
   137  	//pid := runtime.ProcID()
   138  
   139  	if pid > v.len {
   140  		// TODO: Grow the backing store if pid is larger than
   141  		// store. This is tricky because we may have handed
   142  		// out pointers into the current store. Probably this
   143  		// is only possible with a level of indirection that
   144  		// lets us allocate the backing store in multiple
   145  		// segments. Then we can do an RCU-style update on the
   146  		// index structure. We may want to limit the number of
   147  		// shards to something sane anyway (e.g., 1024). How
   148  		// would this synchronize with Range? E.g., if Range
   149  		// iterator is going through locking everything, it
   150  		// would be bad if Get then made a new, unlocked
   151  		// element.
   152  		pid = int(uint(pid) % uint(v.len))
   153  	}
   154  	val := emptyInterface{
   155  		typ:  v.ptrType,
   156  		word: v.shard(pid),
   157  	}
   158  	return *(*interface{})(unsafe.Pointer(&val))
   159  }
   160  
   161  func (v *Value) shard(shard int) unsafe.Pointer {
   162  	// The caller must ensure that 0 <= shard < v.len.
   163  	return unsafe.Pointer(uintptr(v.store) + v.shardSize*uintptr(shard))
   164  }
   165  
   166  // Range calls each of its argument functions with pointers to all of
   167  // the shards in v. Each argument must be a function with type
   168  // func(*T), where T is the shard type of the Value.
   169  //
   170  // Range calls its first argument N times with a pointer to each of
   171  // the N shards of v. It then calls its second argument with each
   172  // shard, and so on. Range guarantees that the set of shards and their
   173  // order will not change during this process. This makes it safe to
   174  // implement multi-pass algorithms, such as locking all of the shards
   175  // and then unlocking all of the shards.
   176  //
   177  // Multiple calls to Range are not guaranteed to observe the same set
   178  // of shards, so algorithms that need a consistent view of the shards
   179  // must make a single call to Range with multiple functions.
   180  //
   181  // Multiple calls to Range are guaranteed to traverse the shards in a
   182  // consistent order. While different calls may traverse more or fewer
   183  // shards, if any Range traverses shard A before shard B, all Range
   184  // calls will do so. Uses of Range that acquire locks on multiple
   185  // shards can depend on this for lock ordering.
   186  //
   187  // Range calls each function sequentially, so it's safe to update
   188  // local state without synchronization. However, the functions may run
   189  // concurrently with other goroutines calling Get or Range, so they
   190  // must synchronize access to shard values.
   191  func (v *Value) Range(fn ...interface{}) {
   192  	// "Type check" all of the fn arguments before calling
   193  	// anything.
   194  	//
   195  	// TODO: Accept any func(U) where *T is assignable to U (like
   196  	// runtime.SetFinalizer).
   197  	for _, fn1 := range fn {
   198  		if reflect.TypeOf(fn1) != v.cbType {
   199  			panic(fmt.Sprintf("Range expected %s, got %T", v.cbType, fn1))
   200  		}
   201  	}
   202  
   203  	// TODO: If we grow the backing store, this needs to block
   204  	// growing if there are multiple passes (it doesn't have to if
   205  	// there's one pass, but it has to handle it very carefully).
   206  	for _, fn1 := range fn {
   207  		// Cast fn1 to a function with equivalent calling
   208  		// convention.
   209  		var fn1Generic func(unsafe.Pointer)
   210  		*(*unsafe.Pointer)(unsafe.Pointer(&fn1Generic)) = ((*emptyInterface)(unsafe.Pointer(&fn1)).word)
   211  		// Call function on each shard.
   212  		for i := 0; i < v.len; i++ {
   213  			fn1Generic(v.shard(i))
   214  		}
   215  	}
   216  }
   217  
   218  //go:linkname runtime_procPin runtime.procPin
   219  func runtime_procPin() int
   220  
   221  //go:linkname runtime_procUnpin runtime.procUnpin
   222  func runtime_procUnpin()
   223  
   224  // Provided by the runtime (with patch above).
   225  func procID() int