github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/valuefile/value_file.go (about) 1 // Copyright 2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package valuefile 16 17 import ( 18 "context" 19 "encoding/binary" 20 "errors" 21 "fmt" 22 "io" 23 "os" 24 25 "github.com/dolthub/dolt/go/libraries/utils/iohelp" 26 "github.com/dolthub/dolt/go/store/chunks" 27 "github.com/dolthub/dolt/go/store/datas" 28 "github.com/dolthub/dolt/go/store/hash" 29 "github.com/dolthub/dolt/go/store/types" 30 ) 31 32 // ErrCorruptNVF is the error used when the file being read is corrupt 33 var ErrCorruptNVF = errors.New("nvf file is corrupt") 34 35 // WritePrimitiveValueFile writes values to the filepath provided 36 func WritePrimitiveValueFile(ctx context.Context, filepath string, values ...types.Value) error { 37 for _, v := range values { 38 if !types.IsPrimitiveKind(v.Kind()) { 39 return errors.New("non-primitve value found") 40 } 41 } 42 43 nbf := types.Format_Default 44 store, err := NewFileValueStore(nbf) 45 46 if err != nil { 47 return err 48 } 49 50 return WriteValueFile(ctx, filepath, store, values...) 51 } 52 53 // WriteValueFile writes the values stored in the *FileValueStore to the filepath provided 54 func WriteValueFile(ctx context.Context, filepath string, store *FileValueStore, values ...types.Value) (err error) { 55 56 f, err := os.OpenFile(filepath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm) 57 defer func() { 58 closeErr := f.Close() 59 if err == nil { 60 err = closeErr 61 } 62 }() 63 64 if err != nil { 65 return err 66 } 67 68 return WriteToWriter(ctx, f, store, values...) 69 } 70 71 // WriteToWriter writes the values out to the provided writer in the value file format 72 func WriteToWriter(ctx context.Context, wr io.Writer, store *FileValueStore, values ...types.Value) error { 73 db := datas.NewDatabase(store) 74 ds, err := db.GetDataset(ctx, "master") 75 76 if err != nil { 77 return err 78 } 79 80 l, err := types.NewList(ctx, db, values...) 81 82 if err != nil { 83 return err 84 } 85 86 ds, err = db.CommitValue(ctx, ds, l) 87 88 if err != nil { 89 return err 90 } 91 92 ref, _, err := ds.MaybeHeadRef() 93 94 if err != nil { 95 return err 96 } 97 98 err = write(wr, ref.TargetHash(), store) 99 100 if err != nil { 101 return err 102 } 103 104 return nil 105 } 106 107 // write writes out: 108 // NomsBinFormat version string length 109 // NomsBinFormat version String 110 // Root Hash 111 // uint32 num chunks 112 // 113 // for each chunk: 114 // hash of chunk 115 // len of chunk 116 // 117 // for each chunk 118 // chunk bytes 119 func write(wr io.Writer, h hash.Hash, store *FileValueStore) error { 120 // The Write*IfNoErr functions makes the error handling code less annoying 121 err := iohelp.WritePrimIfNoErr(wr, uint32(len(store.nbf.VersionString())), nil) 122 err = iohelp.WriteIfNoErr(wr, []byte(store.nbf.VersionString()), err) 123 err = iohelp.WriteIfNoErr(wr, h[:], err) 124 err = iohelp.WritePrimIfNoErr(wr, uint32(store.numChunks()), err) 125 126 if err != nil { 127 return err 128 } 129 130 err = store.iterChunks(func(ch chunks.Chunk) error { 131 h := ch.Hash() 132 err = iohelp.WriteIfNoErr(wr, h[:], err) 133 return iohelp.WritePrimIfNoErr(wr, uint32(len(ch.Data())), err) 134 }) 135 136 err = store.iterChunks(func(ch chunks.Chunk) error { 137 return iohelp.WriteIfNoErr(wr, ch.Data(), err) 138 }) 139 140 return err 141 } 142 143 // ReadValueFile reads from the provided file and returns the values stored in the file 144 func ReadValueFile(ctx context.Context, filepath string) ([]types.Value, error) { 145 f, err := os.Open(filepath) 146 147 if err != nil { 148 return nil, err 149 } 150 151 defer f.Close() 152 153 return ReadFromReader(ctx, f) 154 } 155 156 // ReadFromReader reads from the provided reader which should provided access to data in the value file format and returns 157 // the values 158 func ReadFromReader(ctx context.Context, rd io.Reader) ([]types.Value, error) { 159 h, store, err := read(ctx, rd) 160 161 if err != nil { 162 return nil, err 163 } 164 165 db := datas.NewDatabase(store) 166 v, err := db.ReadValue(ctx, h) 167 168 if err != nil { 169 return nil, err 170 } 171 172 commitSt, ok := v.(types.Struct) 173 174 if !ok { 175 return nil, ErrCorruptNVF 176 } 177 178 rootVal, ok, err := commitSt.MaybeGet(datas.ValueField) 179 180 if err != nil { 181 return nil, err 182 } 183 184 l := rootVal.(types.List) 185 values := make([]types.Value, l.Len()) 186 err = l.IterAll(ctx, func(v types.Value, index uint64) error { 187 values[index] = v 188 return nil 189 }) 190 191 if err != nil { 192 return nil, err 193 } 194 195 return values, nil 196 } 197 198 // see the write section to see the value file 199 func read(ctx context.Context, rd io.Reader) (hash.Hash, *FileValueStore, error) { 200 // ErrPreservingReader allows me to ignore errors until I need to use the data 201 errRd := iohelp.NewErrPreservingReader(rd) 202 203 // read len of NBF version string and then read the version string and check it 204 fmtLen, err := errRd.ReadUint32(binary.BigEndian) 205 206 if err != nil { 207 if err == io.EOF { 208 err = fmt.Errorf("EOF read while tring to get nbf format len - %w", ErrCorruptNVF) 209 } 210 211 return hash.Hash{}, nil, err 212 } 213 214 data, err := iohelp.ReadNBytes(errRd, int(fmtLen)) 215 216 if err != nil { 217 if err == io.EOF { 218 err = fmt.Errorf("EOF read while tring to get nbf format string - %w", ErrCorruptNVF) 219 } 220 221 return hash.Hash{}, nil, err 222 } 223 224 var nbf *types.NomsBinFormat 225 switch string(data) { 226 case types.Format_7_18.VersionString(): 227 nbf = types.Format_7_18 228 case types.Format_LD_1.VersionString(): 229 nbf = types.Format_LD_1 230 default: 231 return hash.Hash{}, nil, fmt.Errorf("unknown noms format: %s", string(data)) 232 } 233 234 store, err := NewFileValueStore(nbf) 235 236 if err != nil { 237 return hash.Hash{}, nil, err 238 } 239 240 // read the root hash and the chunk count 241 hashBytes, _ := iohelp.ReadNBytes(errRd, hash.ByteLen) 242 numChunks, err := errRd.ReadUint32(binary.BigEndian) 243 244 if err != nil { 245 if err == io.EOF { 246 err = fmt.Errorf("EOF read while trying to read the root hash and chunk count - %w", ErrCorruptNVF) 247 } 248 249 return hash.Hash{}, nil, err 250 } 251 252 // read the hashes and sizes 253 type hashAndSize struct { 254 h hash.Hash 255 size uint32 256 } 257 hashesAndSizes := make([]hashAndSize, numChunks) 258 for i := uint32(0); i < numChunks; i++ { 259 chHashBytes, _ := iohelp.ReadNBytes(errRd, hash.ByteLen) 260 size, err := errRd.ReadUint32(binary.BigEndian) 261 262 if err != nil { 263 if err == io.EOF { 264 err = fmt.Errorf("EOF read the root hash and chunk count - %w", ErrCorruptNVF) 265 } 266 267 return hash.Hash{}, nil, err 268 } 269 270 hashesAndSizes[i] = hashAndSize{hash.New(chHashBytes), size} 271 } 272 273 // read the data and validate it against the expected hashes 274 for _, hashAndSize := range hashesAndSizes { 275 h := hashAndSize.h 276 size := hashAndSize.size 277 chBytes, err := iohelp.ReadNBytes(errRd, int(size)) 278 279 if err != nil && err != io.EOF || err == io.EOF && uint32(len(chBytes)) != size { 280 if err == io.EOF { 281 err = fmt.Errorf("EOF read trying to read chunk - %w", ErrCorruptNVF) 282 } 283 284 return hash.Hash{}, nil, err 285 } 286 287 ch := chunks.NewChunk(chBytes) 288 289 if h != ch.Hash() { 290 return hash.Hash{}, nil, errors.New("data corrupted") 291 } 292 293 err = store.Put(ctx, ch) 294 295 if err != nil { 296 return hash.Hash{}, nil, err 297 } 298 } 299 300 return hash.New(hashBytes), store, nil 301 }