github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/file_table_reader.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package nbs
    23  
    24  import (
    25  	"context"
    26  	"errors"
    27  	"fmt"
    28  	"io"
    29  	"os"
    30  	"path/filepath"
    31  	"time"
    32  
    33  	"github.com/dolthub/dolt/go/store/hash"
    34  )
    35  
    36  type fileTableReader struct {
    37  	tableReader
    38  	h hash.Hash
    39  }
    40  
    41  const (
    42  	fileBlockSize = 1 << 12
    43  )
    44  
    45  func tableFileExists(ctx context.Context, dir string, h hash.Hash) (bool, error) {
    46  	path := filepath.Join(dir, h.String())
    47  	_, err := os.Stat(path)
    48  
    49  	if os.IsNotExist(err) {
    50  		return false, nil
    51  	}
    52  
    53  	return err == nil, err
    54  }
    55  
    56  func newFileTableReader(ctx context.Context, dir string, h hash.Hash, chunkCount uint32, q MemoryQuotaProvider) (cs chunkSource, err error) {
    57  	path := filepath.Join(dir, h.String())
    58  
    59  	var f *os.File
    60  	index, sz, err := func() (ti onHeapTableIndex, sz int64, err error) {
    61  		// Be careful with how |f| is used below. |RefFile| returns a cached
    62  		// os.File pointer so the code needs to use f in a concurrency-safe
    63  		// manner. Moving the file offset is BAD.
    64  		f, err = os.Open(path)
    65  		if err != nil {
    66  			return
    67  		}
    68  
    69  		// Since we can't move the file offset, get the size of the file and use
    70  		// ReadAt to load the index instead.
    71  		var fi os.FileInfo
    72  		fi, err = f.Stat()
    73  
    74  		if err != nil {
    75  			return
    76  		}
    77  
    78  		if fi.Size() < 0 {
    79  			// Size returns the number of bytes for regular files and is system dependant for others (Some of which can be negative).
    80  			err = fmt.Errorf("%s has invalid size: %d", path, fi.Size())
    81  			return
    82  		}
    83  
    84  		idxSz := int64(indexSize(chunkCount) + footerSize)
    85  		sz = fi.Size()
    86  		indexOffset := sz - idxSz
    87  		r := io.NewSectionReader(f, indexOffset, idxSz)
    88  
    89  		if int64(int(idxSz)) != idxSz {
    90  			err = fmt.Errorf("table file %s/%s is too large to read on this platform. index size %d > max int.", dir, h.String(), idxSz)
    91  			return
    92  		}
    93  
    94  		var b []byte
    95  		b, err = q.AcquireQuotaBytes(ctx, int(idxSz))
    96  		if err != nil {
    97  			return
    98  		}
    99  
   100  		_, err = io.ReadFull(r, b)
   101  		if err != nil {
   102  			q.ReleaseQuotaBytes(len(b))
   103  			return
   104  		}
   105  
   106  		ti, err = parseTableIndex(ctx, b, q)
   107  		if err != nil {
   108  			q.ReleaseQuotaBytes(len(b))
   109  			return
   110  		}
   111  
   112  		return
   113  	}()
   114  	if err != nil {
   115  		if f != nil {
   116  			f.Close()
   117  		}
   118  		return nil, err
   119  	}
   120  
   121  	if chunkCount != index.chunkCount() {
   122  		index.Close()
   123  		f.Close()
   124  		return nil, errors.New("unexpected chunk count")
   125  	}
   126  
   127  	tr, err := newTableReader(index, &fileReaderAt{f, path, sz}, fileBlockSize)
   128  	if err != nil {
   129  		index.Close()
   130  		f.Close()
   131  		return nil, err
   132  	}
   133  	return &fileTableReader{
   134  		tr,
   135  		h,
   136  	}, nil
   137  }
   138  
   139  func (ftr *fileTableReader) hash() hash.Hash {
   140  	return ftr.h
   141  }
   142  
   143  func (ftr *fileTableReader) Close() error {
   144  	return ftr.tableReader.close()
   145  }
   146  
   147  func (ftr *fileTableReader) clone() (chunkSource, error) {
   148  	tr, err := ftr.tableReader.clone()
   149  	if err != nil {
   150  		return &fileTableReader{}, err
   151  	}
   152  	return &fileTableReader{tr, ftr.h}, nil
   153  }
   154  
   155  type fileReaderAt struct {
   156  	f    *os.File
   157  	path string
   158  	sz   int64
   159  }
   160  
   161  func (fra *fileReaderAt) clone() (tableReaderAt, error) {
   162  	f, err := os.Open(fra.path)
   163  	if err != nil {
   164  		return nil, err
   165  	}
   166  	return &fileReaderAt{
   167  		f,
   168  		fra.path,
   169  		fra.sz,
   170  	}, nil
   171  }
   172  
   173  func (fra *fileReaderAt) Close() error {
   174  	return fra.f.Close()
   175  }
   176  
   177  func (fra *fileReaderAt) Reader(ctx context.Context) (io.ReadCloser, error) {
   178  	return os.Open(fra.path)
   179  }
   180  
   181  func (fra *fileReaderAt) ReadAtWithStats(ctx context.Context, p []byte, off int64, stats *Stats) (n int, err error) {
   182  	t1 := time.Now()
   183  	defer func() {
   184  		stats.FileBytesPerRead.Sample(uint64(len(p)))
   185  		stats.FileReadLatency.SampleTimeSince(t1)
   186  	}()
   187  	return fra.f.ReadAt(p, off)
   188  }