github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/benchmarks/gen/gen.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package gen 23 24 import ( 25 "bytes" 26 "crypto/sha512" 27 "encoding/binary" 28 "fmt" 29 "io" 30 "math/rand" 31 "os" 32 33 "github.com/dustin/go-humanize" 34 35 "github.com/dolthub/dolt/go/store/d" 36 "github.com/dolthub/dolt/go/store/hash" 37 ) 38 39 const ( 40 OffsetTupleLen = 24 41 averageChunkSize = 4 * uint64(1<<10) // 4KB 42 ) 43 44 func OpenOrGenerateDataFile(name string, totalData uint64) (data *os.File, err error) { 45 data, err = os.Open(name) 46 if os.IsNotExist(err) { 47 data, _ = os.Create(name) 48 fmt.Printf("Creating data file with %s\n", humanize.IBytes(totalData)) 49 generateData(data, totalData) 50 _, err = data.Seek(0, io.SeekStart) 51 d.Chk.NoError(err) 52 return data, nil 53 } 54 d.Chk.NoError(err) 55 56 info, err := data.Stat() 57 d.Chk.NoError(err) 58 if uint64(info.Size()) < totalData { 59 data.Close() 60 return nil, fmt.Errorf("%s is too small to benchmark with %s", name, humanize.IBytes(totalData)) 61 } 62 return data, nil 63 } 64 65 func OpenOrBuildChunkMap(name string, data *os.File) *os.File { 66 cm, err := os.Open(name) 67 if os.IsNotExist(err) { 68 cm, _ = os.Create(name) 69 fmt.Printf("Chunking %s into chunk-map: %s ...", data.Name(), name) 70 cc := chunk(cm, data) 71 fmt.Println(cc, " chunks") 72 73 _, err = cm.Seek(0, io.SeekStart) 74 d.Chk.NoError(err) 75 return cm 76 } 77 d.Chk.NoError(err) 78 return cm 79 } 80 81 func generateData(w io.Writer, totalData uint64) { 82 r := &randomByteReader{} 83 84 buff := [humanize.MiByte]byte{} 85 bs := buff[:] 86 buffIdx := 0 87 88 for bc := uint64(0); bc < totalData; bc++ { 89 b, _ := r.ReadByte() 90 bs[buffIdx] = b 91 buffIdx++ 92 if buffIdx == int(humanize.MiByte) { 93 io.Copy(w, bytes.NewReader(bs)) 94 buffIdx = 0 95 } 96 } 97 } 98 99 type randomByteReader struct { 100 rand *rand.Rand 101 scratch [2 * averageChunkSize]byte 102 pos int 103 } 104 105 func (rbr *randomByteReader) ReadByte() (byte, error) { 106 if rbr.rand == nil { 107 rbr.rand = rand.New(rand.NewSource(0)) 108 rbr.pos = cap(rbr.scratch) 109 } 110 if rbr.pos >= cap(rbr.scratch) { 111 rbr.rand.Read(rbr.scratch[:]) 112 rbr.pos = 0 113 } 114 b := rbr.scratch[rbr.pos] 115 rbr.pos++ 116 return b, nil 117 } 118 119 func (rbr *randomByteReader) Close() error { 120 return nil 121 } 122 123 type offsetTuple [OffsetTupleLen]byte 124 125 func chunk(w io.Writer, r io.Reader) (chunkCount int) { 126 buff := [humanize.MiByte]byte{} 127 bs := buff[:] 128 buffIdx := uint64(0) 129 rv := newRollingValueHasher() 130 sha := sha512.New() 131 ot := offsetTuple{} 132 lastOffset := uint64(0) 133 134 var err error 135 var n int 136 137 writeChunk := func() { 138 chunkCount++ 139 var d []byte 140 d = sha.Sum(d) 141 copy(ot[:hash.ByteLen], d) 142 143 chunkLength := uint32(buffIdx - lastOffset) 144 145 binary.BigEndian.PutUint32(ot[hash.ByteLen:], chunkLength) 146 147 io.Copy(w, bytes.NewReader(ot[:])) 148 149 lastOffset = buffIdx 150 sha.Reset() 151 } 152 153 for err == nil { 154 n, err = io.ReadFull(r, bs) 155 156 for i := uint64(0); i < uint64(n); i++ { 157 b := bs[i] 158 buffIdx++ 159 sha.Write(bs[i : i+1]) 160 161 if rv.HashByte(b) { 162 writeChunk() 163 } 164 } 165 } 166 167 if lastOffset < buffIdx { 168 writeChunk() 169 } 170 171 return 172 }