github.com/grailbio/base@v0.0.11/diagnostic/stringintern/intern.go (about) 1 package stringintern 2 3 import ( 4 "reflect" 5 ) 6 7 // Intern will recursively traverse one or more objects and collapse all strings that are identical to the same pointer, saving memory. 8 // Inputs must be pointer types. 9 // String map keys are not interned. 10 // The path to all fields must be exported. It is not possible to modify unexported fields in a safe way. 11 // Example usage: 12 // var x = ... some complicated type with strings 13 // stringintern.Intern(&x) 14 // Warning: This is a potentially dangerous operation. 15 // Extreme care must be taken that no pointers exist to other structures that should not be modified. 16 // This method is not thread safe. No other threads should be reading or writing to x while it is being interned. 17 // It is safest to use this code for testing purposes to see how much memory can be saved by interning but then do the interning explicitly: 18 // sizeBefore := memsize.DeepSize(&x) 19 // stringintern.Intern(&x) 20 // sizeAfter := memsize.DeepSize(&x) 21 func Intern(x ...interface{}) { 22 myinterner := interner{ 23 dict: make(map[string]string), 24 locMap: make(map[addressAndType]struct{}), 25 } 26 for _, val := range x { 27 value := reflect.ValueOf(val) 28 if value.Kind() != reflect.Ptr { 29 panic("input kind must be a pointer") 30 } 31 myinterner.intern(value) 32 } 33 } 34 35 type interner struct { 36 // dict stores the mapping of strings to their canonical interned version. 37 dict map[string]string 38 // keeps track of which memory locations have already been scanned. 39 // it is necessary to also store type because structs and fields can have the same address and must be differentiated. 40 locMap map[addressAndType]struct{} 41 } 42 43 type addressAndType struct { 44 address uintptr 45 tp reflect.Type 46 } 47 48 func (s *interner) intern(x reflect.Value) { 49 if x.CanAddr() { 50 addr := x.UnsafeAddr() 51 x.Type().Name() 52 if _, alreadyProcessed := s.locMap[addressAndType{addr, x.Type()}]; alreadyProcessed { 53 return 54 } 55 s.locMap[addressAndType{addr, x.Type()}] = struct{}{} // mark current memory location 56 } 57 switch x.Kind() { 58 case reflect.String: 59 if x.CanSet() { 60 val := x.String() 61 s.internString(&val) 62 x.SetString(val) 63 } 64 case reflect.Float64, reflect.Float32, 65 reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, 66 reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, 67 reflect.Complex64, reflect.Complex128, 68 reflect.Invalid, reflect.Chan, reflect.Bool, reflect.Uintptr, reflect.Func: 69 // noop. don't do anything. 70 case reflect.Struct: 71 for i := 0; i < x.NumField(); i++ { 72 s.intern(x.Field(i)) 73 } 74 case reflect.Ptr, reflect.Interface: 75 if !x.IsNil() { 76 s.intern(x.Elem()) 77 } 78 case reflect.Slice, reflect.Array: 79 for i := 0; i < x.Len(); i++ { 80 s.intern(x.Index(i)) 81 } 82 case reflect.Map: 83 for _, key := range x.MapKeys() { 84 val := x.MapIndex(key) 85 if val.Kind() == reflect.String { 86 stringVal := val.String() 87 s.internString(&stringVal) 88 x.SetMapIndex(key, reflect.ValueOf(stringVal)) 89 } else { 90 s.intern(val) 91 } 92 } 93 } 94 } 95 96 // takes a pointer to a string. If string has previously been seen, it will change to interned version. 97 // otherwise adds to dictionary of interned strings. 98 func (s *interner) internString(x *string) { 99 if val, ok := s.dict[*x]; ok { 100 *x = val 101 } else { 102 s.dict[*x] = *x 103 } 104 }