github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/benchmarks/data_source.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package main
    23  
    24  import (
    25  	"bufio"
    26  	"encoding/binary"
    27  	"errors"
    28  	"fmt"
    29  	"io"
    30  	"os"
    31  
    32  	"github.com/dustin/go-humanize"
    33  	flag "github.com/juju/gnuflag"
    34  
    35  	"github.com/dolthub/dolt/go/store/atomicerr"
    36  	"github.com/dolthub/dolt/go/store/chunks"
    37  	"github.com/dolthub/dolt/go/store/hash"
    38  	"github.com/dolthub/dolt/go/store/nbs/benchmarks/gen"
    39  )
    40  
    41  var readFile = flag.String("input-file", "", "A file full of test data. Creates and saves associated .chunks file at runtime if it doesn't yet exist. If none is specified, data and .chunks files will be generated and saved.")
    42  
    43  type hashSlice []hash.Hash
    44  
    45  func (s hashSlice) Len() int {
    46  	return len(s)
    47  }
    48  
    49  func (s hashSlice) Less(i, j int) bool {
    50  	return s[i].Less(s[j])
    51  }
    52  
    53  func (s hashSlice) Swap(i, j int) {
    54  	s[i], s[j] = s[j], s[i]
    55  }
    56  
    57  type dataSource struct {
    58  	data, cm            *os.File
    59  	totalData, dataRead uint64
    60  	hashes              hashSlice
    61  }
    62  
    63  func getInput(dataSize uint64) (src *dataSource, err error) {
    64  	filename := *readFile
    65  	if filename == "" {
    66  		filename = humanize.IBytes(dataSize) + ".bin"
    67  	}
    68  	data, err := gen.OpenOrGenerateDataFile(filename, dataSize)
    69  	if err != nil {
    70  		return nil, err
    71  	}
    72  	chunkFile := data.Name() + ".chunks"
    73  	cm := gen.OpenOrBuildChunkMap(chunkFile, data)
    74  	fmt.Println("Reading from", filename, "with chunks", chunkFile)
    75  	src = &dataSource{data: data, cm: cm, totalData: dataSize}
    76  	tuples := make(chan offsetTuple, 1024)
    77  	go func() {
    78  		src.readTuples(tuples)
    79  		close(tuples)
    80  	}()
    81  	for ot := range tuples {
    82  		src.hashes = append(src.hashes, ot.h)
    83  	}
    84  	return src, err
    85  }
    86  
    87  type offsetTuple struct {
    88  	h hash.Hash
    89  	l uint64
    90  }
    91  
    92  func (src *dataSource) PrimeFilesystemCache() error {
    93  	bufData := bufio.NewReaderSize(src.data, 10*humanize.MiByte)
    94  	tuples := make(chan offsetTuple, 16)
    95  
    96  	ae := atomicerr.New()
    97  	go func() {
    98  		err := src.readTuples(tuples)
    99  		ae.SetIfError(err)
   100  		close(tuples)
   101  	}()
   102  
   103  	for ot := range tuples {
   104  		if ae.IsSet() {
   105  			break
   106  		}
   107  
   108  		buff := make([]byte, ot.l)
   109  		n, err := io.ReadFull(bufData, buff)
   110  
   111  		if err != nil {
   112  			return err
   113  		}
   114  
   115  		if uint64(n) != ot.l {
   116  			return errors.New("failed to read all data")
   117  		}
   118  	}
   119  
   120  	return ae.Get()
   121  }
   122  
   123  func (src *dataSource) ReadChunks(chunkChan chan<- *chunks.Chunk) error {
   124  	bufData := bufio.NewReaderSize(src.data, humanize.MiByte)
   125  	tuples := make(chan offsetTuple, 1024)
   126  
   127  	ae := atomicerr.New()
   128  	go func() {
   129  		err := src.readTuples(tuples)
   130  		ae.SetIfError(err)
   131  		close(tuples)
   132  	}()
   133  
   134  	for ot := range tuples {
   135  		if ae.IsSet() {
   136  			break
   137  		}
   138  
   139  		buff := make([]byte, ot.l)
   140  		n, err := io.ReadFull(bufData, buff)
   141  
   142  		if err != nil {
   143  			return err
   144  		}
   145  
   146  		if uint64(n) != ot.l {
   147  			return errors.New("failed to read the entire chunk")
   148  		}
   149  
   150  		c := chunks.NewChunkWithHash(ot.h, buff)
   151  		chunkChan <- &c
   152  	}
   153  
   154  	return ae.Get()
   155  }
   156  
   157  func (src *dataSource) GetHashes() hashSlice {
   158  	out := make(hashSlice, len(src.hashes))
   159  	copy(out, src.hashes)
   160  	return out
   161  }
   162  
   163  func (src *dataSource) readTuples(tuples chan<- offsetTuple) error {
   164  	err := src.reset()
   165  
   166  	if err != nil {
   167  		return err
   168  	}
   169  
   170  	otBuf := [gen.OffsetTupleLen]byte{}
   171  	cm := bufio.NewReaderSize(src.cm, humanize.MiByte)
   172  	ot := offsetTuple{}
   173  
   174  	for src.dataRead < src.totalData {
   175  		n, err := io.ReadFull(cm, otBuf[:])
   176  		if err != nil {
   177  			if err != io.EOF {
   178  				return err
   179  			}
   180  
   181  			return nil
   182  		}
   183  
   184  		if n != gen.OffsetTupleLen {
   185  			return errors.New("failed to read all data")
   186  		}
   187  
   188  		ot.h = hash.New(otBuf[:20])
   189  		ot.l = uint64(binary.BigEndian.Uint32(otBuf[20:]))
   190  		src.dataRead += ot.l
   191  		tuples <- ot
   192  	}
   193  
   194  	return nil
   195  }
   196  
   197  func (src *dataSource) reset() error {
   198  	_, err := src.data.Seek(0, io.SeekStart)
   199  
   200  	if err != nil {
   201  		return err
   202  	}
   203  
   204  	_, err = src.cm.Seek(0, io.SeekStart)
   205  
   206  	if err != nil {
   207  		return err
   208  	}
   209  
   210  	src.dataRead = 0
   211  
   212  	return nil
   213  }
   214  
   215  func (src *dataSource) Close() error {
   216  	dataErr := src.data.Close()
   217  	cmErr := src.cm.Close()
   218  
   219  	if dataErr != nil {
   220  		return dataErr
   221  	}
   222  
   223  	if cmErr != nil {
   224  		return cmErr
   225  	}
   226  
   227  	return nil
   228  }