go4.org/intern@v0.0.0-20230525184215-6c62f75575cb/intern.go (about)

     1  // Copyright 2020 Brad Fitzpatrick. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package intern lets you make smaller comparable values by boxing
     6  // a larger comparable value (such as a 16 byte string header) down
     7  // into a globally unique 8 byte pointer.
     8  //
     9  // The globally unique pointers are garbage collected with weak
    10  // references and finalizers. This package hides that.
    11  //
    12  // The GitHub repo is https://github.com/go4org/intern
    13  package intern // import "go4.org/intern"
    14  
    15  import (
    16  	"os"
    17  	"runtime"
    18  	"strconv"
    19  	"sync"
    20  	"unsafe"
    21  
    22  	_ "go4.org/unsafe/assume-no-moving-gc"
    23  )
    24  
    25  // A Value pointer is the handle to an underlying comparable value.
    26  // See func Get for how Value pointers may be used.
    27  type Value struct {
    28  	_      [0]func() // prevent people from accidentally using value type as comparable
    29  	cmpVal interface{}
    30  	// resurrected is guarded by mu (for all instances of Value).
    31  	// It is set true whenever v is synthesized from a uintptr.
    32  	resurrected bool
    33  }
    34  
    35  // Get returns the comparable value passed to the Get func
    36  // that returned v.
    37  func (v *Value) Get() interface{} { return v.cmpVal }
    38  
    39  // key is a key in our global value map.
    40  // It contains type-specialized fields to avoid allocations
    41  // when converting common types to empty interfaces.
    42  type key struct {
    43  	s      string
    44  	cmpVal interface{}
    45  	// isString reports whether key contains a string.
    46  	// Without it, the zero value of key is ambiguous.
    47  	isString bool
    48  }
    49  
    50  // keyFor returns a key to use with cmpVal.
    51  func keyFor(cmpVal interface{}) key {
    52  	if s, ok := cmpVal.(string); ok {
    53  		return key{s: s, isString: true}
    54  	}
    55  	return key{cmpVal: cmpVal}
    56  }
    57  
    58  // Value returns a *Value built from k.
    59  func (k key) Value() *Value {
    60  	if k.isString {
    61  		return &Value{cmpVal: k.s}
    62  	}
    63  	return &Value{cmpVal: k.cmpVal}
    64  }
    65  
    66  var (
    67  	// mu guards valMap, a weakref map of *Value by underlying value.
    68  	// It also guards the resurrected field of all *Values.
    69  	mu      sync.Mutex
    70  	valMap  = map[key]uintptr{} // to uintptr(*Value)
    71  	valSafe = safeMap()         // non-nil in safe+leaky mode
    72  )
    73  
    74  // safeMap returns a non-nil map if we're in safe-but-leaky mode,
    75  // as controlled by GO4_INTERN_SAFE_BUT_LEAKY.
    76  func safeMap() map[key]*Value {
    77  	if v, _ := strconv.ParseBool(os.Getenv("GO4_INTERN_SAFE_BUT_LEAKY")); v {
    78  		return map[key]*Value{}
    79  	}
    80  	return nil
    81  }
    82  
    83  // Get returns a pointer representing the comparable value cmpVal.
    84  //
    85  // The returned pointer will be the same for Get(v) and Get(v2)
    86  // if and only if v == v2, and can be used as a map key.
    87  func Get(cmpVal interface{}) *Value {
    88  	return get(keyFor(cmpVal))
    89  }
    90  
    91  // GetByString is identical to Get, except that it is specialized for strings.
    92  // This avoids an allocation from putting a string into an interface{}
    93  // to pass as an argument to Get.
    94  func GetByString(s string) *Value {
    95  	return get(key{s: s, isString: true})
    96  }
    97  
    98  // We play unsafe games that violate Go's rules (and assume a non-moving
    99  // collector). So we quiet Go here.
   100  // See the comment below Get for more implementation details.
   101  //go:nocheckptr
   102  func get(k key) *Value {
   103  	mu.Lock()
   104  	defer mu.Unlock()
   105  
   106  	var v *Value
   107  	if valSafe != nil {
   108  		v = valSafe[k]
   109  	} else if addr, ok := valMap[k]; ok {
   110  		v = (*Value)((unsafe.Pointer)(addr))
   111  		v.resurrected = true
   112  	}
   113  	if v != nil {
   114  		return v
   115  	}
   116  	v = k.Value()
   117  	if valSafe != nil {
   118  		valSafe[k] = v
   119  	} else {
   120  		// SetFinalizer before uintptr conversion (theoretical concern;
   121  		// see https://github.com/go4org/intern/issues/13)
   122  		runtime.SetFinalizer(v, finalize)
   123  		valMap[k] = uintptr(unsafe.Pointer(v))
   124  	}
   125  	return v
   126  }
   127  
   128  func finalize(v *Value) {
   129  	mu.Lock()
   130  	defer mu.Unlock()
   131  	if v.resurrected {
   132  		// We lost the race. Somebody resurrected it while we
   133  		// were about to finalize it. Try again next round.
   134  		v.resurrected = false
   135  		runtime.SetFinalizer(v, finalize)
   136  		return
   137  	}
   138  	delete(valMap, keyFor(v.cmpVal))
   139  }
   140  
   141  // Interning is simple if you don't require that unused values be
   142  // garbage collectable. But we do require that; we don't want to be
   143  // DOS vector. We do this by using a uintptr to hide the pointer from
   144  // the garbage collector, and using a finalizer to eliminate the
   145  // pointer when no other code is using it.
   146  //
   147  // The obvious implementation of this is to use a
   148  // map[interface{}]uintptr-of-*interface{}, and set up a finalizer to
   149  // delete from the map. Unfortunately, this is racy. Because pointers
   150  // are being created in violation of Go's unsafety rules, it's
   151  // possible to create a pointer to a value concurrently with the GC
   152  // concluding that the value can be collected. There are other races
   153  // that break the equality invariant as well, but the use-after-free
   154  // will cause a runtime crash.
   155  //
   156  // To make this work, the finalizer needs to know that no references
   157  // have been unsafely created since the finalizer was set up. To do
   158  // this, values carry a "resurrected" sentinel, which gets set
   159  // whenever a pointer is unsafely created. If the finalizer encounters
   160  // the sentinel, it clears the sentinel and delays collection for one
   161  // additional GC cycle, by re-installing itself as finalizer. This
   162  // ensures that the unsafely created pointer is visible to the GC, and
   163  // will correctly prevent collection.
   164  //
   165  // This technique does mean that interned values that get reused take
   166  // at least 3 GC cycles to fully collect (1 to clear the sentinel, 1
   167  // to clean up the unsafe map, 1 to be actually deleted).
   168  //
   169  // @ianlancetaylor commented in
   170  // https://github.com/golang/go/issues/41303#issuecomment-717401656
   171  // that it is possible to implement weak references in terms of
   172  // finalizers without unsafe. Unfortunately, the approach he outlined
   173  // does not work here, for two reasons. First, there is no way to
   174  // construct a strong pointer out of a weak pointer; our map stores
   175  // weak pointers, but we must return strong pointers to callers.
   176  // Second, and more fundamentally, we must return not just _a_ strong
   177  // pointer to callers, but _the same_ strong pointer to callers. In
   178  // order to return _the same_ strong pointer to callers, we must track
   179  // it, which is exactly what we cannot do with strong pointers.
   180  //
   181  // See https://github.com/inetaf/netaddr/issues/53 for more
   182  // discussion, and https://github.com/go4org/intern/issues/2 for an
   183  // illustration of the subtleties at play.