github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/benchmarks/data_source.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package main 23 24 import ( 25 "bufio" 26 "encoding/binary" 27 "errors" 28 "fmt" 29 "io" 30 "os" 31 32 "github.com/dustin/go-humanize" 33 flag "github.com/juju/gnuflag" 34 35 "github.com/dolthub/dolt/go/store/atomicerr" 36 "github.com/dolthub/dolt/go/store/chunks" 37 "github.com/dolthub/dolt/go/store/hash" 38 "github.com/dolthub/dolt/go/store/nbs/benchmarks/gen" 39 ) 40 41 var readFile = flag.String("input-file", "", "A file full of test data. Creates and saves associated .chunks file at runtime if it doesn't yet exist. If none is specified, data and .chunks files will be generated and saved.") 42 43 type hashSlice []hash.Hash 44 45 func (s hashSlice) Len() int { 46 return len(s) 47 } 48 49 func (s hashSlice) Less(i, j int) bool { 50 return s[i].Less(s[j]) 51 } 52 53 func (s hashSlice) Swap(i, j int) { 54 s[i], s[j] = s[j], s[i] 55 } 56 57 type dataSource struct { 58 data, cm *os.File 59 totalData, dataRead uint64 60 hashes hashSlice 61 } 62 63 func getInput(dataSize uint64) (src *dataSource, err error) { 64 filename := *readFile 65 if filename == "" { 66 filename = humanize.IBytes(dataSize) + ".bin" 67 } 68 data, err := gen.OpenOrGenerateDataFile(filename, dataSize) 69 if err != nil { 70 return nil, err 71 } 72 chunkFile := data.Name() + ".chunks" 73 cm := gen.OpenOrBuildChunkMap(chunkFile, data) 74 fmt.Println("Reading from", filename, "with chunks", chunkFile) 75 src = &dataSource{data: data, cm: cm, totalData: dataSize} 76 tuples := make(chan offsetTuple, 1024) 77 go func() { 78 src.readTuples(tuples) 79 close(tuples) 80 }() 81 for ot := range tuples { 82 src.hashes = append(src.hashes, ot.h) 83 } 84 return src, err 85 } 86 87 type offsetTuple struct { 88 h hash.Hash 89 l uint64 90 } 91 92 func (src *dataSource) PrimeFilesystemCache() error { 93 bufData := bufio.NewReaderSize(src.data, 10*humanize.MiByte) 94 tuples := make(chan offsetTuple, 16) 95 96 ae := atomicerr.New() 97 go func() { 98 err := src.readTuples(tuples) 99 ae.SetIfError(err) 100 close(tuples) 101 }() 102 103 for ot := range tuples { 104 if ae.IsSet() { 105 break 106 } 107 108 buff := make([]byte, ot.l) 109 n, err := io.ReadFull(bufData, buff) 110 111 if err != nil { 112 return err 113 } 114 115 if uint64(n) != ot.l { 116 return errors.New("failed to read all data") 117 } 118 } 119 120 return ae.Get() 121 } 122 123 func (src *dataSource) ReadChunks(chunkChan chan<- *chunks.Chunk) error { 124 bufData := bufio.NewReaderSize(src.data, humanize.MiByte) 125 tuples := make(chan offsetTuple, 1024) 126 127 ae := atomicerr.New() 128 go func() { 129 err := src.readTuples(tuples) 130 ae.SetIfError(err) 131 close(tuples) 132 }() 133 134 for ot := range tuples { 135 if ae.IsSet() { 136 break 137 } 138 139 buff := make([]byte, ot.l) 140 n, err := io.ReadFull(bufData, buff) 141 142 if err != nil { 143 return err 144 } 145 146 if uint64(n) != ot.l { 147 return errors.New("failed to read the entire chunk") 148 } 149 150 c := chunks.NewChunkWithHash(ot.h, buff) 151 chunkChan <- &c 152 } 153 154 return ae.Get() 155 } 156 157 func (src *dataSource) GetHashes() hashSlice { 158 out := make(hashSlice, len(src.hashes)) 159 copy(out, src.hashes) 160 return out 161 } 162 163 func (src *dataSource) readTuples(tuples chan<- offsetTuple) error { 164 err := src.reset() 165 166 if err != nil { 167 return err 168 } 169 170 otBuf := [gen.OffsetTupleLen]byte{} 171 cm := bufio.NewReaderSize(src.cm, humanize.MiByte) 172 ot := offsetTuple{} 173 174 for src.dataRead < src.totalData { 175 n, err := io.ReadFull(cm, otBuf[:]) 176 if err != nil { 177 if err != io.EOF { 178 return err 179 } 180 181 return nil 182 } 183 184 if n != gen.OffsetTupleLen { 185 return errors.New("failed to read all data") 186 } 187 188 ot.h = hash.New(otBuf[:20]) 189 ot.l = uint64(binary.BigEndian.Uint32(otBuf[20:])) 190 src.dataRead += ot.l 191 tuples <- ot 192 } 193 194 return nil 195 } 196 197 func (src *dataSource) reset() error { 198 _, err := src.data.Seek(0, io.SeekStart) 199 200 if err != nil { 201 return err 202 } 203 204 _, err = src.cm.Seek(0, io.SeekStart) 205 206 if err != nil { 207 return err 208 } 209 210 src.dataRead = 0 211 212 return nil 213 } 214 215 func (src *dataSource) Close() error { 216 dataErr := src.data.Close() 217 cmErr := src.cm.Close() 218 219 if dataErr != nil { 220 return dataErr 221 } 222 223 if cmErr != nil { 224 return cmErr 225 } 226 227 return nil 228 }