github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/benchmarks/gen/gen.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package gen
    23  
    24  import (
    25  	"bytes"
    26  	"crypto/sha512"
    27  	"encoding/binary"
    28  	"fmt"
    29  	"io"
    30  	"math/rand"
    31  	"os"
    32  
    33  	"github.com/dustin/go-humanize"
    34  
    35  	"github.com/dolthub/dolt/go/store/d"
    36  	"github.com/dolthub/dolt/go/store/hash"
    37  )
    38  
    39  const (
    40  	OffsetTupleLen   = 24
    41  	averageChunkSize = 4 * uint64(1<<10) // 4KB
    42  )
    43  
    44  func OpenOrGenerateDataFile(name string, totalData uint64) (data *os.File, err error) {
    45  	data, err = os.Open(name)
    46  	if os.IsNotExist(err) {
    47  		data, _ = os.Create(name)
    48  		fmt.Printf("Creating data file with %s\n", humanize.IBytes(totalData))
    49  		generateData(data, totalData)
    50  		_, err = data.Seek(0, io.SeekStart)
    51  		d.Chk.NoError(err)
    52  		return data, nil
    53  	}
    54  	d.Chk.NoError(err)
    55  
    56  	info, err := data.Stat()
    57  	d.Chk.NoError(err)
    58  	if uint64(info.Size()) < totalData {
    59  		data.Close()
    60  		return nil, fmt.Errorf("%s is too small to benchmark with %s", name, humanize.IBytes(totalData))
    61  	}
    62  	return data, nil
    63  }
    64  
    65  func OpenOrBuildChunkMap(name string, data *os.File) *os.File {
    66  	cm, err := os.Open(name)
    67  	if os.IsNotExist(err) {
    68  		cm, _ = os.Create(name)
    69  		fmt.Printf("Chunking %s into chunk-map: %s ...", data.Name(), name)
    70  		cc := chunk(cm, data)
    71  		fmt.Println(cc, " chunks")
    72  
    73  		_, err = cm.Seek(0, io.SeekStart)
    74  		d.Chk.NoError(err)
    75  		return cm
    76  	}
    77  	d.Chk.NoError(err)
    78  	return cm
    79  }
    80  
    81  func generateData(w io.Writer, totalData uint64) {
    82  	r := &randomByteReader{}
    83  
    84  	buff := [humanize.MiByte]byte{}
    85  	bs := buff[:]
    86  	buffIdx := 0
    87  
    88  	for bc := uint64(0); bc < totalData; bc++ {
    89  		b, _ := r.ReadByte()
    90  		bs[buffIdx] = b
    91  		buffIdx++
    92  		if buffIdx == int(humanize.MiByte) {
    93  			io.Copy(w, bytes.NewReader(bs))
    94  			buffIdx = 0
    95  		}
    96  	}
    97  }
    98  
    99  type randomByteReader struct {
   100  	rand    *rand.Rand
   101  	scratch [2 * averageChunkSize]byte
   102  	pos     int
   103  }
   104  
   105  func (rbr *randomByteReader) ReadByte() (byte, error) {
   106  	if rbr.rand == nil {
   107  		rbr.rand = rand.New(rand.NewSource(0))
   108  		rbr.pos = cap(rbr.scratch)
   109  	}
   110  	if rbr.pos >= cap(rbr.scratch) {
   111  		rbr.rand.Read(rbr.scratch[:])
   112  		rbr.pos = 0
   113  	}
   114  	b := rbr.scratch[rbr.pos]
   115  	rbr.pos++
   116  	return b, nil
   117  }
   118  
   119  func (rbr *randomByteReader) Close() error {
   120  	return nil
   121  }
   122  
   123  type offsetTuple [OffsetTupleLen]byte
   124  
   125  func chunk(w io.Writer, r io.Reader) (chunkCount int) {
   126  	buff := [humanize.MiByte]byte{}
   127  	bs := buff[:]
   128  	buffIdx := uint64(0)
   129  	rv := newRollingValueHasher()
   130  	sha := sha512.New()
   131  	ot := offsetTuple{}
   132  	lastOffset := uint64(0)
   133  
   134  	var err error
   135  	var n int
   136  
   137  	writeChunk := func() {
   138  		chunkCount++
   139  		var d []byte
   140  		d = sha.Sum(d)
   141  		copy(ot[:hash.ByteLen], d)
   142  
   143  		chunkLength := uint32(buffIdx - lastOffset)
   144  
   145  		binary.BigEndian.PutUint32(ot[hash.ByteLen:], chunkLength)
   146  
   147  		io.Copy(w, bytes.NewReader(ot[:]))
   148  
   149  		lastOffset = buffIdx
   150  		sha.Reset()
   151  	}
   152  
   153  	for err == nil {
   154  		n, err = io.ReadFull(r, bs)
   155  
   156  		for i := uint64(0); i < uint64(n); i++ {
   157  			b := bs[i]
   158  			buffIdx++
   159  			sha.Write(bs[i : i+1])
   160  
   161  			if rv.HashByte(b) {
   162  				writeChunk()
   163  			}
   164  		}
   165  	}
   166  
   167  	if lastOffset < buffIdx {
   168  		writeChunk()
   169  	}
   170  
   171  	return
   172  }