github.com/attic-labs/noms@v0.0.0-20210827224422-e5fa29d95e8b/go/nbs/benchmarks/gen/gen.go (about) 1 // Copyright 2016 Attic Labs, Inc. All rights reserved. 2 // Licensed under the Apache License, version 2.0: 3 // http://www.apache.org/licenses/LICENSE-2.0 4 5 package gen 6 7 import ( 8 "bytes" 9 "crypto/sha512" 10 "encoding/binary" 11 "fmt" 12 "io" 13 "math/rand" 14 "os" 15 16 "github.com/attic-labs/noms/go/d" 17 "github.com/attic-labs/noms/go/hash" 18 "github.com/dustin/go-humanize" 19 ) 20 21 const ( 22 OffsetTupleLen = 24 23 averageChunkSize = 4 * uint64(1<<10) // 4KB 24 ) 25 26 func OpenOrGenerateDataFile(name string, totalData uint64) (data *os.File, err error) { 27 data, err = os.Open(name) 28 if os.IsNotExist(err) { 29 data, err = os.Create(name) 30 fmt.Printf("Creating data file with %s\n", humanize.IBytes(totalData)) 31 generateData(data, totalData) 32 _, err = data.Seek(0, os.SEEK_SET) 33 d.Chk.NoError(err) 34 return data, nil 35 } 36 d.Chk.NoError(err) 37 38 info, err := data.Stat() 39 d.Chk.NoError(err) 40 if uint64(info.Size()) < totalData { 41 data.Close() 42 return nil, fmt.Errorf("%s is too small to benchmark with %s", name, humanize.IBytes(totalData)) 43 } 44 return data, nil 45 } 46 47 func OpenOrBuildChunkMap(name string, data *os.File) *os.File { 48 cm, err := os.Open(name) 49 if os.IsNotExist(err) { 50 cm, err = os.Create(name) 51 fmt.Printf("Chunking %s into chunk-map: %s ...", data.Name(), name) 52 cc := chunk(cm, data) 53 fmt.Println(cc, " chunks") 54 55 _, err = cm.Seek(0, os.SEEK_SET) 56 d.Chk.NoError(err) 57 return cm 58 } 59 d.Chk.NoError(err) 60 return cm 61 } 62 63 func generateData(w io.Writer, totalData uint64) { 64 r := &randomByteReader{} 65 66 buff := [humanize.MiByte]byte{} 67 bs := buff[:] 68 buffIdx := 0 69 70 for bc := uint64(0); bc < totalData; bc++ { 71 b, _ := r.ReadByte() 72 bs[buffIdx] = b 73 buffIdx++ 74 if buffIdx == int(humanize.MiByte) { 75 io.Copy(w, bytes.NewReader(bs)) 76 buffIdx = 0 77 } 78 } 79 } 80 81 type randomByteReader struct { 82 rand *rand.Rand 83 scratch [2 * averageChunkSize]byte 84 pos int 85 } 86 87 func (rbr *randomByteReader) ReadByte() (byte, error) { 88 if rbr.rand == nil { 89 rbr.rand = rand.New(rand.NewSource(0)) 90 rbr.pos = cap(rbr.scratch) 91 } 92 if rbr.pos >= cap(rbr.scratch) { 93 rbr.rand.Read(rbr.scratch[:]) 94 rbr.pos = 0 95 } 96 b := rbr.scratch[rbr.pos] 97 rbr.pos++ 98 return b, nil 99 } 100 101 func (rbr *randomByteReader) Close() error { 102 return nil 103 } 104 105 type offsetTuple [OffsetTupleLen]byte 106 107 func chunk(w io.Writer, r io.Reader) (chunkCount int) { 108 buff := [humanize.MiByte]byte{} 109 bs := buff[:] 110 buffIdx := uint64(0) 111 rv := newRollingValueHasher() 112 sha := sha512.New() 113 ot := offsetTuple{} 114 lastOffset := uint64(0) 115 116 var err error 117 var n int 118 119 writeChunk := func() { 120 chunkCount++ 121 var d []byte 122 d = sha.Sum(d) 123 copy(ot[:hash.ByteLen], d) 124 125 chunkLength := uint32(buffIdx - lastOffset) 126 127 binary.BigEndian.PutUint32(ot[hash.ByteLen:], chunkLength) 128 129 io.Copy(w, bytes.NewReader(ot[:])) 130 131 lastOffset = buffIdx 132 sha.Reset() 133 } 134 135 for err == nil { 136 n, err = io.ReadFull(r, bs) 137 138 for i := uint64(0); i < uint64(n); i++ { 139 b := bs[i] 140 buffIdx++ 141 sha.Write(bs[i : i+1]) 142 143 if rv.HashByte(b) { 144 writeChunk() 145 } 146 } 147 } 148 149 if lastOffset < buffIdx { 150 writeChunk() 151 } 152 153 return 154 }