go4.org/intern@v0.0.0-20230525184215-6c62f75575cb/intern.go (about) 1 // Copyright 2020 Brad Fitzpatrick. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package intern lets you make smaller comparable values by boxing 6 // a larger comparable value (such as a 16 byte string header) down 7 // into a globally unique 8 byte pointer. 8 // 9 // The globally unique pointers are garbage collected with weak 10 // references and finalizers. This package hides that. 11 // 12 // The GitHub repo is https://github.com/go4org/intern 13 package intern // import "go4.org/intern" 14 15 import ( 16 "os" 17 "runtime" 18 "strconv" 19 "sync" 20 "unsafe" 21 22 _ "go4.org/unsafe/assume-no-moving-gc" 23 ) 24 25 // A Value pointer is the handle to an underlying comparable value. 26 // See func Get for how Value pointers may be used. 27 type Value struct { 28 _ [0]func() // prevent people from accidentally using value type as comparable 29 cmpVal interface{} 30 // resurrected is guarded by mu (for all instances of Value). 31 // It is set true whenever v is synthesized from a uintptr. 32 resurrected bool 33 } 34 35 // Get returns the comparable value passed to the Get func 36 // that returned v. 37 func (v *Value) Get() interface{} { return v.cmpVal } 38 39 // key is a key in our global value map. 40 // It contains type-specialized fields to avoid allocations 41 // when converting common types to empty interfaces. 42 type key struct { 43 s string 44 cmpVal interface{} 45 // isString reports whether key contains a string. 46 // Without it, the zero value of key is ambiguous. 47 isString bool 48 } 49 50 // keyFor returns a key to use with cmpVal. 51 func keyFor(cmpVal interface{}) key { 52 if s, ok := cmpVal.(string); ok { 53 return key{s: s, isString: true} 54 } 55 return key{cmpVal: cmpVal} 56 } 57 58 // Value returns a *Value built from k. 59 func (k key) Value() *Value { 60 if k.isString { 61 return &Value{cmpVal: k.s} 62 } 63 return &Value{cmpVal: k.cmpVal} 64 } 65 66 var ( 67 // mu guards valMap, a weakref map of *Value by underlying value. 68 // It also guards the resurrected field of all *Values. 69 mu sync.Mutex 70 valMap = map[key]uintptr{} // to uintptr(*Value) 71 valSafe = safeMap() // non-nil in safe+leaky mode 72 ) 73 74 // safeMap returns a non-nil map if we're in safe-but-leaky mode, 75 // as controlled by GO4_INTERN_SAFE_BUT_LEAKY. 76 func safeMap() map[key]*Value { 77 if v, _ := strconv.ParseBool(os.Getenv("GO4_INTERN_SAFE_BUT_LEAKY")); v { 78 return map[key]*Value{} 79 } 80 return nil 81 } 82 83 // Get returns a pointer representing the comparable value cmpVal. 84 // 85 // The returned pointer will be the same for Get(v) and Get(v2) 86 // if and only if v == v2, and can be used as a map key. 87 func Get(cmpVal interface{}) *Value { 88 return get(keyFor(cmpVal)) 89 } 90 91 // GetByString is identical to Get, except that it is specialized for strings. 92 // This avoids an allocation from putting a string into an interface{} 93 // to pass as an argument to Get. 94 func GetByString(s string) *Value { 95 return get(key{s: s, isString: true}) 96 } 97 98 // We play unsafe games that violate Go's rules (and assume a non-moving 99 // collector). So we quiet Go here. 100 // See the comment below Get for more implementation details. 101 //go:nocheckptr 102 func get(k key) *Value { 103 mu.Lock() 104 defer mu.Unlock() 105 106 var v *Value 107 if valSafe != nil { 108 v = valSafe[k] 109 } else if addr, ok := valMap[k]; ok { 110 v = (*Value)((unsafe.Pointer)(addr)) 111 v.resurrected = true 112 } 113 if v != nil { 114 return v 115 } 116 v = k.Value() 117 if valSafe != nil { 118 valSafe[k] = v 119 } else { 120 // SetFinalizer before uintptr conversion (theoretical concern; 121 // see https://github.com/go4org/intern/issues/13) 122 runtime.SetFinalizer(v, finalize) 123 valMap[k] = uintptr(unsafe.Pointer(v)) 124 } 125 return v 126 } 127 128 func finalize(v *Value) { 129 mu.Lock() 130 defer mu.Unlock() 131 if v.resurrected { 132 // We lost the race. Somebody resurrected it while we 133 // were about to finalize it. Try again next round. 134 v.resurrected = false 135 runtime.SetFinalizer(v, finalize) 136 return 137 } 138 delete(valMap, keyFor(v.cmpVal)) 139 } 140 141 // Interning is simple if you don't require that unused values be 142 // garbage collectable. But we do require that; we don't want to be 143 // DOS vector. We do this by using a uintptr to hide the pointer from 144 // the garbage collector, and using a finalizer to eliminate the 145 // pointer when no other code is using it. 146 // 147 // The obvious implementation of this is to use a 148 // map[interface{}]uintptr-of-*interface{}, and set up a finalizer to 149 // delete from the map. Unfortunately, this is racy. Because pointers 150 // are being created in violation of Go's unsafety rules, it's 151 // possible to create a pointer to a value concurrently with the GC 152 // concluding that the value can be collected. There are other races 153 // that break the equality invariant as well, but the use-after-free 154 // will cause a runtime crash. 155 // 156 // To make this work, the finalizer needs to know that no references 157 // have been unsafely created since the finalizer was set up. To do 158 // this, values carry a "resurrected" sentinel, which gets set 159 // whenever a pointer is unsafely created. If the finalizer encounters 160 // the sentinel, it clears the sentinel and delays collection for one 161 // additional GC cycle, by re-installing itself as finalizer. This 162 // ensures that the unsafely created pointer is visible to the GC, and 163 // will correctly prevent collection. 164 // 165 // This technique does mean that interned values that get reused take 166 // at least 3 GC cycles to fully collect (1 to clear the sentinel, 1 167 // to clean up the unsafe map, 1 to be actually deleted). 168 // 169 // @ianlancetaylor commented in 170 // https://github.com/golang/go/issues/41303#issuecomment-717401656 171 // that it is possible to implement weak references in terms of 172 // finalizers without unsafe. Unfortunately, the approach he outlined 173 // does not work here, for two reasons. First, there is no way to 174 // construct a strong pointer out of a weak pointer; our map stores 175 // weak pointers, but we must return strong pointers to callers. 176 // Second, and more fundamentally, we must return not just _a_ strong 177 // pointer to callers, but _the same_ strong pointer to callers. In 178 // order to return _the same_ strong pointer to callers, we must track 179 // it, which is exactly what we cannot do with strong pointers. 180 // 181 // See https://github.com/inetaf/netaddr/issues/53 for more 182 // discussion, and https://github.com/go4org/intern/issues/2 for an 183 // illustration of the subtleties at play.