github.com/grailbio/base@v0.0.11/diagnostic/stringintern/intern.go (about)

     1  package stringintern
     2  
     3  import (
     4  	"reflect"
     5  )
     6  
     7  // Intern will recursively traverse one or more objects and collapse all strings that are identical to the same pointer, saving memory.
     8  // Inputs must be pointer types.
     9  // String map keys are not interned.
    10  // The path to all fields must be exported.  It is not possible to modify unexported fields in a safe way.
    11  // Example usage:
    12  //     var x = ... some complicated type with strings
    13  //     stringintern.Intern(&x)
    14  // Warning:  This is a potentially dangerous operation.
    15  //		Extreme care must be taken that no pointers exist to other structures that should not be modified.
    16  //		This method is not thread safe.  No other threads should be reading or writing to x while it is being interned.
    17  // It is safest to use this code for testing purposes to see how much memory can be saved by interning but then do the interning explicitly:
    18  // 	sizeBefore := memsize.DeepSize(&x)
    19  // 	stringintern.Intern(&x)
    20  // 	sizeAfter := memsize.DeepSize(&x)
    21  func Intern(x ...interface{}) {
    22  	myinterner := interner{
    23  		dict:   make(map[string]string),
    24  		locMap: make(map[addressAndType]struct{}),
    25  	}
    26  	for _, val := range x {
    27  		value := reflect.ValueOf(val)
    28  		if value.Kind() != reflect.Ptr {
    29  			panic("input kind must be a pointer")
    30  		}
    31  		myinterner.intern(value)
    32  	}
    33  }
    34  
    35  type interner struct {
    36  	// dict stores the mapping of strings to their canonical interned version.
    37  	dict map[string]string
    38  	// keeps track of which memory locations have already been scanned.
    39  	// it is necessary to also store type because structs and fields can have the same address and must be differentiated.
    40  	locMap map[addressAndType]struct{}
    41  }
    42  
    43  type addressAndType struct {
    44  	address uintptr
    45  	tp      reflect.Type
    46  }
    47  
    48  func (s *interner) intern(x reflect.Value) {
    49  	if x.CanAddr() {
    50  		addr := x.UnsafeAddr()
    51  		x.Type().Name()
    52  		if _, alreadyProcessed := s.locMap[addressAndType{addr, x.Type()}]; alreadyProcessed {
    53  			return
    54  		}
    55  		s.locMap[addressAndType{addr, x.Type()}] = struct{}{} // mark current memory location
    56  	}
    57  	switch x.Kind() {
    58  	case reflect.String:
    59  		if x.CanSet() {
    60  			val := x.String()
    61  			s.internString(&val)
    62  			x.SetString(val)
    63  		}
    64  	case reflect.Float64, reflect.Float32,
    65  		reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
    66  		reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
    67  		reflect.Complex64, reflect.Complex128,
    68  		reflect.Invalid, reflect.Chan, reflect.Bool, reflect.Uintptr, reflect.Func:
    69  		// noop.  don't do anything.
    70  	case reflect.Struct:
    71  		for i := 0; i < x.NumField(); i++ {
    72  			s.intern(x.Field(i))
    73  		}
    74  	case reflect.Ptr, reflect.Interface:
    75  		if !x.IsNil() {
    76  			s.intern(x.Elem())
    77  		}
    78  	case reflect.Slice, reflect.Array:
    79  		for i := 0; i < x.Len(); i++ {
    80  			s.intern(x.Index(i))
    81  		}
    82  	case reflect.Map:
    83  		for _, key := range x.MapKeys() {
    84  			val := x.MapIndex(key)
    85  			if val.Kind() == reflect.String {
    86  				stringVal := val.String()
    87  				s.internString(&stringVal)
    88  				x.SetMapIndex(key, reflect.ValueOf(stringVal))
    89  			} else {
    90  				s.intern(val)
    91  			}
    92  		}
    93  	}
    94  }
    95  
    96  // takes a pointer to a string.  If string has previously been seen, it will change to interned version.
    97  // otherwise adds to dictionary of interned strings.
    98  func (s *interner) internString(x *string) {
    99  	if val, ok := s.dict[*x]; ok {
   100  		*x = val
   101  	} else {
   102  		s.dict[*x] = *x
   103  	}
   104  }