github.com/attic-labs/noms@v0.0.0-20210827224422-e5fa29d95e8b/go/nbs/benchmarks/gen/gen.go (about)

     1  // Copyright 2016 Attic Labs, Inc. All rights reserved.
     2  // Licensed under the Apache License, version 2.0:
     3  // http://www.apache.org/licenses/LICENSE-2.0
     4  
     5  package gen
     6  
     7  import (
     8  	"bytes"
     9  	"crypto/sha512"
    10  	"encoding/binary"
    11  	"fmt"
    12  	"io"
    13  	"math/rand"
    14  	"os"
    15  
    16  	"github.com/attic-labs/noms/go/d"
    17  	"github.com/attic-labs/noms/go/hash"
    18  	"github.com/dustin/go-humanize"
    19  )
    20  
    21  const (
    22  	OffsetTupleLen   = 24
    23  	averageChunkSize = 4 * uint64(1<<10) // 4KB
    24  )
    25  
    26  func OpenOrGenerateDataFile(name string, totalData uint64) (data *os.File, err error) {
    27  	data, err = os.Open(name)
    28  	if os.IsNotExist(err) {
    29  		data, err = os.Create(name)
    30  		fmt.Printf("Creating data file with %s\n", humanize.IBytes(totalData))
    31  		generateData(data, totalData)
    32  		_, err = data.Seek(0, os.SEEK_SET)
    33  		d.Chk.NoError(err)
    34  		return data, nil
    35  	}
    36  	d.Chk.NoError(err)
    37  
    38  	info, err := data.Stat()
    39  	d.Chk.NoError(err)
    40  	if uint64(info.Size()) < totalData {
    41  		data.Close()
    42  		return nil, fmt.Errorf("%s is too small to benchmark with %s", name, humanize.IBytes(totalData))
    43  	}
    44  	return data, nil
    45  }
    46  
    47  func OpenOrBuildChunkMap(name string, data *os.File) *os.File {
    48  	cm, err := os.Open(name)
    49  	if os.IsNotExist(err) {
    50  		cm, err = os.Create(name)
    51  		fmt.Printf("Chunking %s into chunk-map: %s ...", data.Name(), name)
    52  		cc := chunk(cm, data)
    53  		fmt.Println(cc, " chunks")
    54  
    55  		_, err = cm.Seek(0, os.SEEK_SET)
    56  		d.Chk.NoError(err)
    57  		return cm
    58  	}
    59  	d.Chk.NoError(err)
    60  	return cm
    61  }
    62  
    63  func generateData(w io.Writer, totalData uint64) {
    64  	r := &randomByteReader{}
    65  
    66  	buff := [humanize.MiByte]byte{}
    67  	bs := buff[:]
    68  	buffIdx := 0
    69  
    70  	for bc := uint64(0); bc < totalData; bc++ {
    71  		b, _ := r.ReadByte()
    72  		bs[buffIdx] = b
    73  		buffIdx++
    74  		if buffIdx == int(humanize.MiByte) {
    75  			io.Copy(w, bytes.NewReader(bs))
    76  			buffIdx = 0
    77  		}
    78  	}
    79  }
    80  
    81  type randomByteReader struct {
    82  	rand    *rand.Rand
    83  	scratch [2 * averageChunkSize]byte
    84  	pos     int
    85  }
    86  
    87  func (rbr *randomByteReader) ReadByte() (byte, error) {
    88  	if rbr.rand == nil {
    89  		rbr.rand = rand.New(rand.NewSource(0))
    90  		rbr.pos = cap(rbr.scratch)
    91  	}
    92  	if rbr.pos >= cap(rbr.scratch) {
    93  		rbr.rand.Read(rbr.scratch[:])
    94  		rbr.pos = 0
    95  	}
    96  	b := rbr.scratch[rbr.pos]
    97  	rbr.pos++
    98  	return b, nil
    99  }
   100  
   101  func (rbr *randomByteReader) Close() error {
   102  	return nil
   103  }
   104  
   105  type offsetTuple [OffsetTupleLen]byte
   106  
   107  func chunk(w io.Writer, r io.Reader) (chunkCount int) {
   108  	buff := [humanize.MiByte]byte{}
   109  	bs := buff[:]
   110  	buffIdx := uint64(0)
   111  	rv := newRollingValueHasher()
   112  	sha := sha512.New()
   113  	ot := offsetTuple{}
   114  	lastOffset := uint64(0)
   115  
   116  	var err error
   117  	var n int
   118  
   119  	writeChunk := func() {
   120  		chunkCount++
   121  		var d []byte
   122  		d = sha.Sum(d)
   123  		copy(ot[:hash.ByteLen], d)
   124  
   125  		chunkLength := uint32(buffIdx - lastOffset)
   126  
   127  		binary.BigEndian.PutUint32(ot[hash.ByteLen:], chunkLength)
   128  
   129  		io.Copy(w, bytes.NewReader(ot[:]))
   130  
   131  		lastOffset = buffIdx
   132  		sha.Reset()
   133  	}
   134  
   135  	for err == nil {
   136  		n, err = io.ReadFull(r, bs)
   137  
   138  		for i := uint64(0); i < uint64(n); i++ {
   139  			b := bs[i]
   140  			buffIdx++
   141  			sha.Write(bs[i : i+1])
   142  
   143  			if rv.HashByte(b) {
   144  				writeChunk()
   145  			}
   146  		}
   147  	}
   148  
   149  	if lastOffset < buffIdx {
   150  		writeChunk()
   151  	}
   152  
   153  	return
   154  }