github.com/lkarlslund/stringdedup@v0.6.2/example/main.go (about) 1 package main 2 3 import ( 4 "fmt" 5 "os" 6 "path/filepath" 7 "runtime" 8 "time" 9 "unsafe" 10 11 "github.com/OneOfOne/xxhash" 12 "github.com/lkarlslund/stringdedup" 13 ) 14 15 type fileinfo struct { 16 folder, basename, extension string 17 } 18 19 var files, files2 []fileinfo 20 21 func main() { 22 fmt.Println("String deduplication demonstration") 23 fmt.Println("---") 24 25 d := stringdedup.New(func(in []byte) uint32 { 26 return xxhash.Checksum32(in) 27 }) 28 29 var memstats runtime.MemStats 30 31 runtime.ReadMemStats(&memstats) 32 fmt.Printf("Initial memory usage at start of program: %v objects, consuming %v bytes\n", memstats.HeapObjects, memstats.HeapInuse) 33 fmt.Println("---") 34 35 searchDir := "/usr" 36 if runtime.GOOS == "windows" { 37 searchDir = "c:/windows" 38 } 39 40 fmt.Printf("Scanning and indexing files in %v - hang on ...\n", searchDir) 41 42 filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error { 43 if !f.IsDir() { 44 folder := filepath.Dir(path) 45 extension := filepath.Ext(path) 46 basename := filepath.Base(path) 47 basename = basename[:len(basename)-len(extension)] 48 files = append(files, fileinfo{ 49 folder: folder, 50 basename: extension, 51 extension: basename, 52 }) 53 } 54 return nil 55 }) 56 57 fmt.Println("Scanning done!") 58 fmt.Println("---") 59 60 runtime.GC() // Let garbage collector run, and see memory usage 61 time.Sleep(time.Millisecond * 100) // Settle down 62 runtime.ReadMemStats(&memstats) 63 fmt.Printf("Memory usage for %v fileinfo: %v object, consuming %v bytes\n", len(files), memstats.HeapObjects, memstats.HeapInuse) 64 65 undedupbytes := memstats.HeapInuse 66 67 fmt.Printf("Slice reference costs %v x %v bytes - a total of %v bytes\n", len(files), unsafe.Sizeof(fileinfo{}), len(files)*int(unsafe.Sizeof(fileinfo{}))) 68 69 checksum := xxhash.New64() 70 for _, fi := range files { 71 checksum.Write([]byte(fi.folder + fi.basename + fi.extension)) 72 } 73 csum := checksum.Sum64() 74 fmt.Printf("Validation checksum on non deduped files is %x\n", csum) 75 fmt.Println("---") 76 77 // NON DEDUPLICATED STATISTICS END 78 79 // A new batch of fileinfo 80 files2 = make([]fileinfo, len(files), cap(files)) 81 82 // Lets try that again with deduplication 83 for i, fi := range files { 84 files2[i] = fileinfo{ 85 folder: d.S(fi.folder), 86 basename: d.S(fi.basename), 87 extension: d.S(fi.extension), 88 } 89 } 90 91 runtime.ReadMemStats(&memstats) 92 fmt.Println("Both a duplicated and non-deduplicated slice is now in memory") 93 fmt.Printf("Double allocated memory usage for %v fileinfo: %v objects, consuming %v bytes\n", len(files2), memstats.HeapObjects, memstats.HeapInuse) 94 95 // Let garbage collector run, and see memory usage 96 runtime.KeepAlive(files) 97 files = nil 98 runtime.GC() 99 time.Sleep(time.Millisecond * 1000) 100 101 runtime.ReadMemStats(&memstats) 102 fmt.Println("---") 103 fmt.Printf("Dedup memory usage for %v fileinfo: %v objects, consuming %v bytes\n", len(files2), memstats.HeapObjects, memstats.HeapInuse) 104 105 dedupbytes := memstats.HeapInuse 106 fmt.Printf("Reduction in memory usage: %.2f\n", float32(dedupbytes)/float32(undedupbytes)) 107 108 // Drop indexes and let's see 109 d.Flush() 110 runtime.GC() 111 time.Sleep(time.Millisecond * 1000) 112 113 runtime.ReadMemStats(&memstats) 114 fmt.Println("---") 115 fmt.Printf("Flushed index memory usage: %v object, consuming %v bytes\n", memstats.HeapObjects, memstats.HeapInuse) 116 fmt.Printf("Reduction in memory usage (after dropping indexes): %.2f\n", float32(memstats.HeapInuse)/float32(undedupbytes)) 117 118 // Validate that deduped files are the same as non deduped files 119 checksum = xxhash.New64() 120 for _, fi := range files2 { 121 checksum.Write([]byte(fi.folder + fi.basename + fi.extension)) 122 } 123 fmt.Println("---") 124 csum2 := checksum.Sum64() 125 fmt.Printf("Validation on dedup strings checksum is %x\n", csum2) 126 checksum = nil 127 128 if csum != csum2 { 129 fmt.Println("!!! VALIDATION FAILED. DEDUPED STRINGS ARE NOT THE SAME AS NON DEDUPED STRINGS !!!") 130 } 131 132 var bytes int 133 for _, file := range files2 { 134 bytes += len(file.basename) + len(file.extension) + len(file.folder) 135 } 136 137 // Let garbage collector run, and see memory usage 138 // Clean up stuff left by finalizers 139 files2 = nil 140 runtime.GC() 141 time.Sleep(time.Millisecond * 100) 142 runtime.GC() 143 144 runtime.ReadMemStats(&memstats) 145 fmt.Println("---") 146 fmt.Printf("Cleared memory usage: %v object, consuming %v bytes\n", memstats.HeapObjects, memstats.HeapInuse) 147 } 148 149 // func printmemstats("")