github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/valuefile/value_file.go (about)

     1  // Copyright 2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package valuefile
    16  
    17  import (
    18  	"context"
    19  	"encoding/binary"
    20  	"errors"
    21  	"fmt"
    22  	"io"
    23  	"os"
    24  
    25  	"github.com/dolthub/dolt/go/libraries/utils/iohelp"
    26  	"github.com/dolthub/dolt/go/store/chunks"
    27  	"github.com/dolthub/dolt/go/store/datas"
    28  	"github.com/dolthub/dolt/go/store/hash"
    29  	"github.com/dolthub/dolt/go/store/types"
    30  )
    31  
    32  // ErrCorruptNVF is the error used when the file being read is corrupt
    33  var ErrCorruptNVF = errors.New("nvf file is corrupt")
    34  
    35  // WritePrimitiveValueFile writes values to the filepath provided
    36  func WritePrimitiveValueFile(ctx context.Context, filepath string, values ...types.Value) error {
    37  	for _, v := range values {
    38  		if !types.IsPrimitiveKind(v.Kind()) {
    39  			return errors.New("non-primitve value found")
    40  		}
    41  	}
    42  
    43  	nbf := types.Format_Default
    44  	store, err := NewFileValueStore(nbf)
    45  
    46  	if err != nil {
    47  		return err
    48  	}
    49  
    50  	return WriteValueFile(ctx, filepath, store, values...)
    51  }
    52  
    53  // WriteValueFile writes the values stored in the *FileValueStore to the filepath provided
    54  func WriteValueFile(ctx context.Context, filepath string, store *FileValueStore, values ...types.Value) (err error) {
    55  
    56  	f, err := os.OpenFile(filepath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm)
    57  	defer func() {
    58  		closeErr := f.Close()
    59  		if err == nil {
    60  			err = closeErr
    61  		}
    62  	}()
    63  
    64  	if err != nil {
    65  		return err
    66  	}
    67  
    68  	return WriteToWriter(ctx, f, store, values...)
    69  }
    70  
    71  // WriteToWriter writes the values out to the provided writer in the value file format
    72  func WriteToWriter(ctx context.Context, wr io.Writer, store *FileValueStore, values ...types.Value) error {
    73  	db := datas.NewDatabase(store)
    74  	ds, err := db.GetDataset(ctx, "master")
    75  
    76  	if err != nil {
    77  		return err
    78  	}
    79  
    80  	l, err := types.NewList(ctx, db, values...)
    81  
    82  	if err != nil {
    83  		return err
    84  	}
    85  
    86  	ds, err = db.CommitValue(ctx, ds, l)
    87  
    88  	if err != nil {
    89  		return err
    90  	}
    91  
    92  	ref, _, err := ds.MaybeHeadRef()
    93  
    94  	if err != nil {
    95  		return err
    96  	}
    97  
    98  	err = write(wr, ref.TargetHash(), store)
    99  
   100  	if err != nil {
   101  		return err
   102  	}
   103  
   104  	return nil
   105  }
   106  
   107  // write writes out:
   108  // NomsBinFormat version string length
   109  // NomsBinFormat version String
   110  // Root Hash
   111  // uint32 num chunks
   112  //
   113  // for each chunk:
   114  //   hash of chunk
   115  //   len of chunk
   116  //
   117  // for each chunk
   118  //   chunk bytes
   119  func write(wr io.Writer, h hash.Hash, store *FileValueStore) error {
   120  	// The Write*IfNoErr functions makes the error handling code less annoying
   121  	err := iohelp.WritePrimIfNoErr(wr, uint32(len(store.nbf.VersionString())), nil)
   122  	err = iohelp.WriteIfNoErr(wr, []byte(store.nbf.VersionString()), err)
   123  	err = iohelp.WriteIfNoErr(wr, h[:], err)
   124  	err = iohelp.WritePrimIfNoErr(wr, uint32(store.numChunks()), err)
   125  
   126  	if err != nil {
   127  		return err
   128  	}
   129  
   130  	err = store.iterChunks(func(ch chunks.Chunk) error {
   131  		h := ch.Hash()
   132  		err = iohelp.WriteIfNoErr(wr, h[:], err)
   133  		return iohelp.WritePrimIfNoErr(wr, uint32(len(ch.Data())), err)
   134  	})
   135  
   136  	err = store.iterChunks(func(ch chunks.Chunk) error {
   137  		return iohelp.WriteIfNoErr(wr, ch.Data(), err)
   138  	})
   139  
   140  	return err
   141  }
   142  
   143  // ReadValueFile reads from the provided file and returns the values stored in the file
   144  func ReadValueFile(ctx context.Context, filepath string) ([]types.Value, error) {
   145  	f, err := os.Open(filepath)
   146  
   147  	if err != nil {
   148  		return nil, err
   149  	}
   150  
   151  	defer f.Close()
   152  
   153  	return ReadFromReader(ctx, f)
   154  }
   155  
   156  // ReadFromReader reads from the provided reader which should provided access to data in the value file format and returns
   157  // the values
   158  func ReadFromReader(ctx context.Context, rd io.Reader) ([]types.Value, error) {
   159  	h, store, err := read(ctx, rd)
   160  
   161  	if err != nil {
   162  		return nil, err
   163  	}
   164  
   165  	db := datas.NewDatabase(store)
   166  	v, err := db.ReadValue(ctx, h)
   167  
   168  	if err != nil {
   169  		return nil, err
   170  	}
   171  
   172  	commitSt, ok := v.(types.Struct)
   173  
   174  	if !ok {
   175  		return nil, ErrCorruptNVF
   176  	}
   177  
   178  	rootVal, ok, err := commitSt.MaybeGet(datas.ValueField)
   179  
   180  	if err != nil {
   181  		return nil, err
   182  	}
   183  
   184  	l := rootVal.(types.List)
   185  	values := make([]types.Value, l.Len())
   186  	err = l.IterAll(ctx, func(v types.Value, index uint64) error {
   187  		values[index] = v
   188  		return nil
   189  	})
   190  
   191  	if err != nil {
   192  		return nil, err
   193  	}
   194  
   195  	return values, nil
   196  }
   197  
   198  // see the write section to see the value file
   199  func read(ctx context.Context, rd io.Reader) (hash.Hash, *FileValueStore, error) {
   200  	// ErrPreservingReader allows me to ignore errors until I need to use the data
   201  	errRd := iohelp.NewErrPreservingReader(rd)
   202  
   203  	// read len of NBF version string and then read the version string and check it
   204  	fmtLen, err := errRd.ReadUint32(binary.BigEndian)
   205  
   206  	if err != nil {
   207  		if err == io.EOF {
   208  			err = fmt.Errorf("EOF read while tring to get nbf format len - %w", ErrCorruptNVF)
   209  		}
   210  
   211  		return hash.Hash{}, nil, err
   212  	}
   213  
   214  	data, err := iohelp.ReadNBytes(errRd, int(fmtLen))
   215  
   216  	if err != nil {
   217  		if err == io.EOF {
   218  			err = fmt.Errorf("EOF read while tring to get nbf format string - %w", ErrCorruptNVF)
   219  		}
   220  
   221  		return hash.Hash{}, nil, err
   222  	}
   223  
   224  	var nbf *types.NomsBinFormat
   225  	switch string(data) {
   226  	case types.Format_7_18.VersionString():
   227  		nbf = types.Format_7_18
   228  	case types.Format_LD_1.VersionString():
   229  		nbf = types.Format_LD_1
   230  	default:
   231  		return hash.Hash{}, nil, fmt.Errorf("unknown noms format: %s", string(data))
   232  	}
   233  
   234  	store, err := NewFileValueStore(nbf)
   235  
   236  	if err != nil {
   237  		return hash.Hash{}, nil, err
   238  	}
   239  
   240  	// read the root hash and the chunk count
   241  	hashBytes, _ := iohelp.ReadNBytes(errRd, hash.ByteLen)
   242  	numChunks, err := errRd.ReadUint32(binary.BigEndian)
   243  
   244  	if err != nil {
   245  		if err == io.EOF {
   246  			err = fmt.Errorf("EOF read while trying to read the root hash and chunk count - %w", ErrCorruptNVF)
   247  		}
   248  
   249  		return hash.Hash{}, nil, err
   250  	}
   251  
   252  	// read the hashes and sizes
   253  	type hashAndSize struct {
   254  		h    hash.Hash
   255  		size uint32
   256  	}
   257  	hashesAndSizes := make([]hashAndSize, numChunks)
   258  	for i := uint32(0); i < numChunks; i++ {
   259  		chHashBytes, _ := iohelp.ReadNBytes(errRd, hash.ByteLen)
   260  		size, err := errRd.ReadUint32(binary.BigEndian)
   261  
   262  		if err != nil {
   263  			if err == io.EOF {
   264  				err = fmt.Errorf("EOF read the root hash and chunk count - %w", ErrCorruptNVF)
   265  			}
   266  
   267  			return hash.Hash{}, nil, err
   268  		}
   269  
   270  		hashesAndSizes[i] = hashAndSize{hash.New(chHashBytes), size}
   271  	}
   272  
   273  	// read the data and validate it against the expected hashes
   274  	for _, hashAndSize := range hashesAndSizes {
   275  		h := hashAndSize.h
   276  		size := hashAndSize.size
   277  		chBytes, err := iohelp.ReadNBytes(errRd, int(size))
   278  
   279  		if err != nil && err != io.EOF || err == io.EOF && uint32(len(chBytes)) != size {
   280  			if err == io.EOF {
   281  				err = fmt.Errorf("EOF read trying to read chunk - %w", ErrCorruptNVF)
   282  			}
   283  
   284  			return hash.Hash{}, nil, err
   285  		}
   286  
   287  		ch := chunks.NewChunk(chBytes)
   288  
   289  		if h != ch.Hash() {
   290  			return hash.Hash{}, nil, errors.New("data corrupted")
   291  		}
   292  
   293  		err = store.Put(ctx, ch)
   294  
   295  		if err != nil {
   296  			return hash.Hash{}, nil, err
   297  		}
   298  	}
   299  
   300  	return hash.New(hashBytes), store, nil
   301  }