github.com/attic-labs/noms@v0.0.0-20210827224422-e5fa29d95e8b/go/nbs/benchmarks/data_source.go (about) 1 // Copyright 2016 Attic Labs, Inc. All rights reserved. 2 // Licensed under the Apache License, version 2.0: 3 // http://www.apache.org/licenses/LICENSE-2.0 4 5 package main 6 7 import ( 8 "bufio" 9 "encoding/binary" 10 "fmt" 11 "io" 12 "os" 13 14 "github.com/attic-labs/kingpin" 15 "github.com/attic-labs/noms/go/chunks" 16 "github.com/attic-labs/noms/go/d" 17 "github.com/attic-labs/noms/go/hash" 18 "github.com/attic-labs/noms/go/nbs/benchmarks/gen" 19 "github.com/dustin/go-humanize" 20 ) 21 22 var readFile = kingpin.Flag("input-file", "A file full of test data. Creates and saves associated .chunks file at runtime if it doesn't yet exist. If none is specified, data and .chunks files will be generated and saved.").String() 23 24 const averageChunkSize = 4 * 1024 25 26 type hashSlice []hash.Hash 27 28 func (s hashSlice) Len() int { 29 return len(s) 30 } 31 32 func (s hashSlice) Less(i, j int) bool { 33 return s[i].Less(s[j]) 34 } 35 36 func (s hashSlice) Swap(i, j int) { 37 s[i], s[j] = s[j], s[i] 38 } 39 40 type dataSource struct { 41 data, cm *os.File 42 totalData, dataRead uint64 43 hashes hashSlice 44 } 45 46 func getInput(dataSize uint64) (src *dataSource, err error) { 47 filename := *readFile 48 if filename == "" { 49 filename = humanize.IBytes(dataSize) + ".bin" 50 } 51 data, err := gen.OpenOrGenerateDataFile(filename, dataSize) 52 if err != nil { 53 return nil, err 54 } 55 chunkFile := data.Name() + ".chunks" 56 cm := gen.OpenOrBuildChunkMap(chunkFile, data) 57 fmt.Println("Reading from", filename, "with chunks", chunkFile) 58 src = &dataSource{data: data, cm: cm, totalData: dataSize} 59 tuples := make(chan offsetTuple, 1024) 60 go func() { 61 src.readTuples(tuples) 62 close(tuples) 63 }() 64 for ot := range tuples { 65 src.hashes = append(src.hashes, ot.h) 66 } 67 return src, err 68 } 69 70 type offsetTuple struct { 71 h hash.Hash 72 l uint64 73 } 74 75 func (src *dataSource) PrimeFilesystemCache() { 76 bufData := bufio.NewReaderSize(src.data, 10*humanize.MiByte) 77 tuples := make(chan offsetTuple, 16) 78 go func() { 79 src.readTuples(tuples) 80 close(tuples) 81 }() 82 83 for ot := range tuples { 84 buff := make([]byte, ot.l) 85 n, err := io.ReadFull(bufData, buff) 86 d.Chk.NoError(err) 87 d.Chk.True(uint64(n) == ot.l) 88 } 89 } 90 91 func (src *dataSource) ReadChunks(chunkChan chan<- *chunks.Chunk) { 92 bufData := bufio.NewReaderSize(src.data, humanize.MiByte) 93 tuples := make(chan offsetTuple, 1024) 94 go func() { 95 src.readTuples(tuples) 96 close(tuples) 97 }() 98 99 for ot := range tuples { 100 buff := make([]byte, ot.l) 101 n, err := io.ReadFull(bufData, buff) 102 d.Chk.NoError(err) 103 d.Chk.True(uint64(n) == ot.l) 104 c := chunks.NewChunkWithHash(ot.h, buff) 105 chunkChan <- &c 106 } 107 } 108 109 func (src *dataSource) GetHashes() hashSlice { 110 out := make(hashSlice, len(src.hashes)) 111 copy(out, src.hashes) 112 return out 113 } 114 115 func (src *dataSource) readTuples(tuples chan<- offsetTuple) { 116 src.reset() 117 118 otBuf := [gen.OffsetTupleLen]byte{} 119 cm := bufio.NewReaderSize(src.cm, humanize.MiByte) 120 ot := offsetTuple{} 121 122 for src.dataRead < src.totalData { 123 n, err := io.ReadFull(cm, otBuf[:]) 124 if err != nil { 125 d.Chk.True(err == io.EOF) 126 return 127 } 128 d.Chk.True(n == gen.OffsetTupleLen) 129 ot.h = hash.New(otBuf[:20]) 130 ot.l = uint64(binary.BigEndian.Uint32(otBuf[20:])) 131 src.dataRead += ot.l 132 tuples <- ot 133 } 134 } 135 136 func (src *dataSource) reset() { 137 _, err := src.data.Seek(0, os.SEEK_SET) 138 d.Chk.NoError(err) 139 _, err = src.cm.Seek(0, os.SEEK_SET) 140 d.Chk.NoError(err) 141 src.dataRead = 0 142 } 143 144 func (src *dataSource) Close() { 145 d.Chk.NoError(src.data.Close()) 146 d.Chk.NoError(src.cm.Close()) 147 }