github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/remotestorage/chunk_store.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package remotestorage
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"crypto/md5"
    21  	"encoding/base64"
    22  	"errors"
    23  	"fmt"
    24  	"io"
    25  	"net/http"
    26  	"net/url"
    27  	"sort"
    28  	"strings"
    29  	"sync"
    30  	"sync/atomic"
    31  	"time"
    32  
    33  	"github.com/cenkalti/backoff"
    34  	"github.com/opentracing/opentracing-go"
    35  	"golang.org/x/sync/errgroup"
    36  
    37  	remotesapi "github.com/dolthub/dolt/go/gen/proto/dolt/services/remotesapi/v1alpha1"
    38  	"github.com/dolthub/dolt/go/libraries/utils/iohelp"
    39  	"github.com/dolthub/dolt/go/libraries/utils/tracing"
    40  	"github.com/dolthub/dolt/go/store/atomicerr"
    41  	"github.com/dolthub/dolt/go/store/chunks"
    42  	"github.com/dolthub/dolt/go/store/datas"
    43  	"github.com/dolthub/dolt/go/store/hash"
    44  	"github.com/dolthub/dolt/go/store/nbs"
    45  	"github.com/dolthub/dolt/go/store/types"
    46  )
    47  
    48  var DownloadHedger *Hedger
    49  
    50  func init() {
    51  	// TODO: This does not necessarily respond well to changes in network
    52  	// conditions during the program's runtime.
    53  	DownloadHedger = NewHedger(
    54  		8,
    55  		NewMinStrategy(
    56  			1*time.Second,
    57  			NewPercentileStrategy(0, 1*time.Hour, 95.0),
    58  		),
    59  	)
    60  }
    61  
    62  var ErrUploadFailed = errors.New("upload failed")
    63  var ErrInvalidDoltSpecPath = errors.New("invalid dolt spec path")
    64  
    65  var globalHttpFetcher HTTPFetcher = &http.Client{}
    66  
    67  var _ nbs.TableFileStore = (*DoltChunkStore)(nil)
    68  var _ datas.NBSCompressedChunkStore = (*DoltChunkStore)(nil)
    69  var _ chunks.ChunkStore = (*DoltChunkStore)(nil)
    70  
    71  // We may need this to be configurable for users with really bad internet
    72  var downThroughputCheck = iohelp.MinThroughputCheckParams{
    73  	MinBytesPerSec: 1024,
    74  	CheckInterval:  1 * time.Second,
    75  	NumIntervals:   5,
    76  }
    77  
    78  const (
    79  	downRetryCount   = 5
    80  	uploadRetryCount = 5
    81  )
    82  
    83  var uploadRetryParams = backoff.NewExponentialBackOff()
    84  var downRetryParams = backoff.NewExponentialBackOff()
    85  
    86  func init() {
    87  	uploadRetryParams.MaxInterval = 5 * time.Second
    88  
    89  	downRetryParams.MaxInterval = 5 * time.Second
    90  }
    91  
    92  // Only hedge downloads of ranges < 4MB in length for now.
    93  const HedgeDownloadSizeLimit = 4 * 1024 * 1024
    94  
    95  type HTTPFetcher interface {
    96  	Do(req *http.Request) (*http.Response, error)
    97  }
    98  
    99  type ConcurrencyParams struct {
   100  	ConcurrentSmallFetches int
   101  	ConcurrentLargeFetches int
   102  	LargeFetchSize         int
   103  }
   104  
   105  type DoltChunkStore struct {
   106  	org         string
   107  	repoName    string
   108  	host        string
   109  	csClient    remotesapi.ChunkStoreServiceClient
   110  	cache       ChunkCache
   111  	metadata    *remotesapi.GetRepoMetadataResponse
   112  	nbf         *types.NomsBinFormat
   113  	httpFetcher HTTPFetcher
   114  	concurrency ConcurrencyParams
   115  	stats       cacheStats
   116  }
   117  
   118  func NewDoltChunkStoreFromPath(ctx context.Context, nbf *types.NomsBinFormat, path, host string, csClient remotesapi.ChunkStoreServiceClient) (*DoltChunkStore, error) {
   119  	tokens := strings.Split(strings.Trim(path, "/"), "/")
   120  	if len(tokens) != 2 {
   121  		return nil, ErrInvalidDoltSpecPath
   122  	}
   123  
   124  	// todo:
   125  	// this may just be a dolthub thing.  Need to revisit how we do this.
   126  	org := tokens[0]
   127  	repoName := tokens[1]
   128  
   129  	return NewDoltChunkStore(ctx, nbf, org, repoName, host, csClient)
   130  }
   131  
   132  func NewDoltChunkStore(ctx context.Context, nbf *types.NomsBinFormat, org, repoName, host string, csClient remotesapi.ChunkStoreServiceClient) (*DoltChunkStore, error) {
   133  	metadata, err := csClient.GetRepoMetadata(ctx, &remotesapi.GetRepoMetadataRequest{
   134  		RepoId: &remotesapi.RepoId{
   135  			Org:      org,
   136  			RepoName: repoName,
   137  		},
   138  		ClientRepoFormat: &remotesapi.ClientRepoFormat{
   139  			NbfVersion: nbf.VersionString(),
   140  			NbsVersion: nbs.StorageVersion,
   141  		},
   142  	})
   143  
   144  	if err != nil {
   145  		return nil, err
   146  	}
   147  
   148  	return &DoltChunkStore{org, repoName, host, csClient, newMapChunkCache(), metadata, nbf, globalHttpFetcher, defaultConcurrency, cacheStats{}}, nil
   149  }
   150  
   151  func (dcs *DoltChunkStore) WithHTTPFetcher(fetcher HTTPFetcher) *DoltChunkStore {
   152  	return &DoltChunkStore{dcs.org, dcs.repoName, dcs.host, dcs.csClient, dcs.cache, dcs.metadata, dcs.nbf, fetcher, dcs.concurrency, dcs.stats}
   153  }
   154  
   155  func (dcs *DoltChunkStore) WithNoopChunkCache() *DoltChunkStore {
   156  	return &DoltChunkStore{dcs.org, dcs.repoName, dcs.host, dcs.csClient, noopChunkCache, dcs.metadata, dcs.nbf, dcs.httpFetcher, dcs.concurrency, dcs.stats}
   157  }
   158  
   159  func (dcs *DoltChunkStore) WithChunkCache(cache ChunkCache) *DoltChunkStore {
   160  	return &DoltChunkStore{dcs.org, dcs.repoName, dcs.host, dcs.csClient, cache, dcs.metadata, dcs.nbf, dcs.httpFetcher, dcs.concurrency, dcs.stats}
   161  }
   162  
   163  func (dcs *DoltChunkStore) WithDownloadConcurrency(concurrency ConcurrencyParams) *DoltChunkStore {
   164  	return &DoltChunkStore{dcs.org, dcs.repoName, dcs.host, dcs.csClient, dcs.cache, dcs.metadata, dcs.nbf, dcs.httpFetcher, concurrency, dcs.stats}
   165  }
   166  
   167  func (dcs *DoltChunkStore) getRepoId() *remotesapi.RepoId {
   168  	return &remotesapi.RepoId{
   169  		Org:      dcs.org,
   170  		RepoName: dcs.repoName,
   171  	}
   172  }
   173  
   174  type cacheStats struct {
   175  	Hits uint32
   176  }
   177  
   178  func (s cacheStats) CacheHits() uint32 {
   179  	return s.Hits
   180  }
   181  
   182  type CacheStats interface {
   183  	CacheHits() uint32
   184  }
   185  
   186  // Get the Chunk for the value of the hash in the store. If the hash is absent from the store EmptyChunk is returned.
   187  func (dcs *DoltChunkStore) Get(ctx context.Context, h hash.Hash) (chunks.Chunk, error) {
   188  	hashes := hash.HashSet{h: struct{}{}}
   189  	var found *chunks.Chunk
   190  	err := dcs.GetMany(ctx, hashes, func(c *chunks.Chunk) { found = c })
   191  	if err != nil {
   192  		return chunks.EmptyChunk, err
   193  	}
   194  	if found != nil {
   195  		return *found, nil
   196  	} else {
   197  		return chunks.EmptyChunk, nil
   198  	}
   199  }
   200  
   201  func (dcs *DoltChunkStore) GetMany(ctx context.Context, hashes hash.HashSet, found func(*chunks.Chunk)) error {
   202  	ae := atomicerr.New()
   203  	decompressedSize := uint64(0)
   204  	err := dcs.GetManyCompressed(ctx, hashes, func(cc nbs.CompressedChunk) {
   205  		if ae.IsSet() {
   206  			return
   207  		}
   208  		c, err := cc.ToChunk()
   209  		if ae.SetIfErrAndCheck(err) {
   210  			return
   211  		}
   212  		atomic.AddUint64(&decompressedSize, uint64(len(c.Data())))
   213  		found(&c)
   214  	})
   215  	if span := opentracing.SpanFromContext(ctx); span != nil {
   216  		span.LogKV("decompressed_bytes", decompressedSize)
   217  	}
   218  	if err != nil {
   219  		return err
   220  	}
   221  	if err = ae.Get(); err != nil {
   222  		return err
   223  	}
   224  	return nil
   225  }
   226  
   227  // GetMany gets the Chunks with |hashes| from the store. On return, |foundChunks| will have been fully sent all chunks
   228  // which have been found. Any non-present chunks will silently be ignored.
   229  func (dcs *DoltChunkStore) GetManyCompressed(ctx context.Context, hashes hash.HashSet, found func(nbs.CompressedChunk)) error {
   230  	span, ctx := tracing.StartSpan(ctx, "remotestorage.GetManyCompressed")
   231  	defer span.Finish()
   232  
   233  	hashToChunk := dcs.cache.Get(hashes)
   234  
   235  	span.LogKV("num_hashes", len(hashes), "cache_hits", len(hashToChunk))
   236  	atomic.AddUint32(&dcs.stats.Hits, uint32(len(hashToChunk)))
   237  
   238  	notCached := make([]hash.Hash, 0, len(hashes))
   239  	for h := range hashes {
   240  		c := hashToChunk[h]
   241  
   242  		if c.IsEmpty() {
   243  			notCached = append(notCached, h)
   244  		} else {
   245  			found(c)
   246  		}
   247  	}
   248  
   249  	if len(notCached) > 0 {
   250  		err := dcs.readChunksAndCache(ctx, hashes, notCached, found)
   251  
   252  		if err != nil {
   253  			return err
   254  		}
   255  	}
   256  
   257  	return nil
   258  }
   259  
   260  const (
   261  	getLocsBatchSize = 256
   262  )
   263  
   264  type GetRange remotesapi.HttpGetRange
   265  
   266  func (gr *GetRange) ResourcePath() string {
   267  	u, _ := url.Parse(gr.Url)
   268  	return fmt.Sprintf("%s://%s%s", u.Scheme, u.Host, u.Path)
   269  }
   270  
   271  func (gr *GetRange) Append(other *GetRange) {
   272  	gr.Url = other.Url
   273  	gr.Ranges = append(gr.Ranges, other.Ranges...)
   274  }
   275  
   276  func (gr *GetRange) Sort() {
   277  	sort.Slice(gr.Ranges, func(i, j int) bool {
   278  		return gr.Ranges[i].Offset < gr.Ranges[j].Offset
   279  	})
   280  }
   281  
   282  func (gr *GetRange) ChunkStartOffset(i int) uint64 {
   283  	return gr.Ranges[i].Offset
   284  }
   285  
   286  func (gr *GetRange) ChunkEndOffset(i int) uint64 {
   287  	return gr.Ranges[i].Offset + uint64(gr.Ranges[i].Length)
   288  }
   289  
   290  func (gr *GetRange) GapBetween(i, j int) uint64 {
   291  	return gr.ChunkStartOffset(j) - gr.ChunkEndOffset(i)
   292  }
   293  
   294  func (gr *GetRange) SplitAtGaps(maxGapBytes uint64) []*GetRange {
   295  	gr.Sort()
   296  	res := make([]*GetRange, 0)
   297  	i := 0
   298  	for i < len(gr.Ranges) {
   299  		j := i + 1
   300  		for j < len(gr.Ranges) {
   301  			if gr.GapBetween(j-1, j) > maxGapBytes {
   302  				break
   303  			}
   304  			j++
   305  		}
   306  		res = append(res, &GetRange{Url: gr.Url, Ranges: gr.Ranges[i:j]})
   307  		i = j
   308  	}
   309  	return res
   310  }
   311  
   312  func (gr *GetRange) NumChunks() int {
   313  	return len(gr.Ranges)
   314  }
   315  
   316  func (gr *GetRange) RangeLen() uint64 {
   317  	return gr.ChunkEndOffset(gr.NumChunks()-1) - gr.ChunkStartOffset(0)
   318  }
   319  
   320  func (gr *GetRange) NumBytesInRanges() uint64 {
   321  	res := uint64(0)
   322  	for i := 0; i < len(gr.Ranges); i++ {
   323  		start, end := gr.ChunkByteRange(i)
   324  		res += start - end
   325  	}
   326  	return res
   327  }
   328  
   329  func (gr *GetRange) ChunkByteRange(i int) (uint64, uint64) {
   330  	start := gr.ChunkStartOffset(i) - gr.ChunkStartOffset(0)
   331  	end := gr.ChunkEndOffset(i) - gr.ChunkStartOffset(0)
   332  	return start, end
   333  }
   334  
   335  func sortRangesBySize(ranges []*GetRange) {
   336  	sort.Slice(ranges, func(i, j int) bool {
   337  		return ranges[j].RangeLen() < ranges[i].RangeLen()
   338  	})
   339  }
   340  
   341  type resourcePathToUrlFunc func(ctx context.Context, lastError error, resourcePath string) (url string, err error)
   342  
   343  func (gr *GetRange) GetDownloadFunc(ctx context.Context, stats StatsRecorder, fetcher HTTPFetcher, chunkChan chan nbs.CompressedChunk, pathToUrl resourcePathToUrlFunc) func() error {
   344  	if len(gr.Ranges) == 0 {
   345  		return func() error { return nil }
   346  	}
   347  	return func() error {
   348  		urlF := func(lastError error) (string, error) {
   349  			url, err := pathToUrl(ctx, lastError, gr.ResourcePath())
   350  			if err != nil {
   351  				return "", err
   352  			}
   353  			if url == "" {
   354  				url = gr.Url
   355  			}
   356  			return url, nil
   357  		}
   358  		var comprData []byte
   359  		var err error
   360  		rangeLen := gr.RangeLen()
   361  		if rangeLen > HedgeDownloadSizeLimit {
   362  			comprData, err = rangeDownloadWithRetries(ctx, stats, fetcher, gr.ChunkStartOffset(0), rangeLen, 1, urlF)
   363  		} else {
   364  			comprData, err = hedgedRangeDownloadWithRetries(ctx, stats, fetcher, gr.ChunkStartOffset(0), rangeLen, urlF)
   365  		}
   366  		if err != nil {
   367  			return err
   368  		}
   369  		// Send the chunk for each range included in GetRange.
   370  		for i := 0; i < len(gr.Ranges); i++ {
   371  			s, e := gr.ChunkByteRange(i)
   372  			cmpChnk, err := nbs.NewCompressedChunk(hash.New(gr.Ranges[i].Hash), comprData[s:e])
   373  			if err != nil {
   374  				return err
   375  			}
   376  			select {
   377  			case chunkChan <- cmpChnk:
   378  			case <-ctx.Done():
   379  				return ctx.Err()
   380  			}
   381  		}
   382  		return nil
   383  	}
   384  }
   385  
   386  type locationRefresh struct {
   387  	RefreshAfter   time.Time
   388  	RefreshRequest *remotesapi.RefreshTableFileUrlRequest
   389  	URL            string
   390  	lastRefresh    time.Time
   391  	mu             *sync.Mutex
   392  }
   393  
   394  func (r *locationRefresh) Add(resp *remotesapi.DownloadLoc) {
   395  	if r.URL == "" {
   396  		r.URL = resp.Location.(*remotesapi.DownloadLoc_HttpGetRange).HttpGetRange.Url
   397  	}
   398  	if resp.RefreshAfter == nil {
   399  		return
   400  	}
   401  	respTime := resp.RefreshAfter.AsTime()
   402  	if (r.RefreshAfter == time.Time{}) || respTime.After(r.RefreshAfter) {
   403  		r.RefreshAfter = resp.RefreshAfter.AsTime()
   404  		r.RefreshRequest = resp.RefreshRequest
   405  		r.URL = resp.Location.(*remotesapi.DownloadLoc_HttpGetRange).HttpGetRange.Url
   406  	}
   407  }
   408  
   409  var refreshTableFileURLRetryDuration = 5 * time.Second
   410  
   411  func (r *locationRefresh) GetURL(ctx context.Context, lastError error, client remotesapi.ChunkStoreServiceClient) (string, error) {
   412  	r.mu.Lock()
   413  	defer r.mu.Unlock()
   414  	if r.RefreshRequest != nil {
   415  		now := time.Now()
   416  		wantsRefresh := now.After(r.RefreshAfter) || errors.Is(lastError, HttpError)
   417  		canRefresh := time.Since(r.lastRefresh) > refreshTableFileURLRetryDuration
   418  		if wantsRefresh && canRefresh {
   419  			resp, err := client.RefreshTableFileUrl(ctx, r.RefreshRequest)
   420  			if err != nil {
   421  				return r.URL, err
   422  			}
   423  			r.RefreshAfter = resp.RefreshAfter.AsTime()
   424  			r.URL = resp.Url
   425  			r.lastRefresh = now
   426  		}
   427  	}
   428  	return r.URL, nil
   429  }
   430  
   431  type dlLocations struct {
   432  	ranges    map[string]*GetRange
   433  	refreshes map[string]*locationRefresh
   434  }
   435  
   436  func newDlLocations() dlLocations {
   437  	return dlLocations{
   438  		ranges:    make(map[string]*GetRange),
   439  		refreshes: make(map[string]*locationRefresh),
   440  	}
   441  }
   442  
   443  func (l *dlLocations) Add(resp *remotesapi.DownloadLoc) {
   444  	gr := (*GetRange)(resp.Location.(*remotesapi.DownloadLoc_HttpGetRange).HttpGetRange)
   445  	path := gr.ResourcePath()
   446  	if v, ok := l.ranges[path]; ok {
   447  		v.Append(gr)
   448  		l.refreshes[path].Add(resp)
   449  	} else {
   450  		l.ranges[path] = gr
   451  		refresh := &locationRefresh{mu: new(sync.Mutex)}
   452  		refresh.Add(resp)
   453  		l.refreshes[path] = refresh
   454  	}
   455  }
   456  
   457  func (dcs *DoltChunkStore) getDLLocs(ctx context.Context, hashes []hash.Hash) (dlLocations, error) {
   458  	span, ctx := tracing.StartSpan(ctx, "remotestorage.getDLLocs")
   459  	span.LogKV("num_hashes", len(hashes))
   460  	defer span.Finish()
   461  
   462  	res := newDlLocations()
   463  
   464  	// channel for receiving results from go routines making grpc calls to get download locations for chunks
   465  	resCh := make(chan []*remotesapi.DownloadLoc)
   466  
   467  	eg, ctx := errgroup.WithContext(ctx)
   468  
   469  	// go routine for receiving the results of the grpc calls and aggregating the results into resourceToUrlAndRanges
   470  	eg.Go(func() error {
   471  		for {
   472  			select {
   473  			case locs, ok := <-resCh:
   474  				if !ok {
   475  					return nil
   476  				}
   477  				for _, loc := range locs {
   478  					res.Add(loc)
   479  				}
   480  			case <-ctx.Done():
   481  				return ctx.Err()
   482  			}
   483  		}
   484  	})
   485  
   486  	// go routine for batching the get location requests, streaming the requests and streaming the responses.
   487  	eg.Go(func() error {
   488  		var reqs []*remotesapi.GetDownloadLocsRequest
   489  		hashesBytes := HashesToSlices(hashes)
   490  		batchItr(len(hashesBytes), getLocsBatchSize, func(st, end int) (stop bool) {
   491  			batch := hashesBytes[st:end]
   492  			req := &remotesapi.GetDownloadLocsRequest{RepoId: dcs.getRepoId(), ChunkHashes: batch}
   493  			reqs = append(reqs, req)
   494  			return false
   495  		})
   496  		op := func() error {
   497  			seg, ctx := errgroup.WithContext(ctx)
   498  			stream, err := dcs.csClient.StreamDownloadLocations(ctx)
   499  			if err != nil {
   500  				return NewRpcError(err, "StreamDownloadLocations", dcs.host, nil)
   501  			}
   502  			completedReqs := 0
   503  			// Write requests
   504  			seg.Go(func() error {
   505  				for i := range reqs {
   506  					if err := stream.Send(reqs[i]); err != nil {
   507  						return NewRpcError(err, "StreamDownloadLocations", dcs.host, reqs[i])
   508  					}
   509  				}
   510  				return stream.CloseSend()
   511  			})
   512  			// Read responses
   513  			seg.Go(func() error {
   514  				for {
   515  					resp, err := stream.Recv()
   516  					if err != nil {
   517  						if err == io.EOF {
   518  							return nil
   519  						}
   520  						return NewRpcError(err, "StreamDownloadLocations", dcs.host, reqs[completedReqs])
   521  					}
   522  					select {
   523  					case resCh <- resp.Locs:
   524  						completedReqs += 1
   525  					case <-ctx.Done():
   526  						return ctx.Err()
   527  					}
   528  				}
   529  			})
   530  			err = seg.Wait()
   531  			reqs = reqs[completedReqs:]
   532  			if len(reqs) == 0 {
   533  				close(resCh)
   534  			}
   535  			return processGrpcErr(err)
   536  		}
   537  		return backoff.Retry(op, backoff.WithMaxRetries(csRetryParams, csClientRetries))
   538  	})
   539  
   540  	if err := eg.Wait(); err != nil {
   541  		return dlLocations{}, err
   542  	}
   543  	return res, nil
   544  }
   545  
   546  func (dcs *DoltChunkStore) readChunksAndCache(ctx context.Context, hashes hash.HashSet, notCached []hash.Hash, found func(nbs.CompressedChunk)) error {
   547  	// get the locations where the chunks can be downloaded from
   548  	dlLocs, err := dcs.getDLLocs(ctx, notCached)
   549  	if err != nil {
   550  		return err
   551  	}
   552  
   553  	var wg sync.WaitGroup
   554  
   555  	// channel to receive chunks on
   556  	chunkChan := make(chan nbs.CompressedChunk, 128)
   557  
   558  	toSend := make(map[hash.Hash]struct{}, len(notCached))
   559  	for _, h := range notCached {
   560  		toSend[h] = struct{}{}
   561  	}
   562  
   563  	// start a go routine to receive the downloaded chunks on
   564  	wg.Add(1)
   565  	go func() {
   566  		defer wg.Done()
   567  		for chunk := range chunkChan {
   568  			dcs.cache.PutChunk(chunk)
   569  
   570  			h := chunk.Hash()
   571  
   572  			if _, send := toSend[h]; send {
   573  				found(chunk)
   574  			}
   575  		}
   576  	}()
   577  
   578  	// download the chunks and close the channel after
   579  	func() {
   580  		defer close(chunkChan)
   581  		err = dcs.downloadChunks(ctx, dlLocs, chunkChan)
   582  	}()
   583  
   584  	// wait for all the results to finish processing
   585  	wg.Wait()
   586  
   587  	if err != nil {
   588  		return err
   589  	}
   590  
   591  	return nil
   592  }
   593  
   594  // Returns true iff the value at the address |h| is contained in the
   595  // store
   596  func (dcs *DoltChunkStore) Has(ctx context.Context, h hash.Hash) (bool, error) {
   597  	hashes := hash.HashSet{h: struct{}{}}
   598  	absent, err := dcs.HasMany(ctx, hashes)
   599  
   600  	if err != nil {
   601  		return false, err
   602  	}
   603  
   604  	return len(absent) == 0, nil
   605  }
   606  
   607  const maxHasManyBatchSize = 16 * 1024
   608  
   609  // Returns a new HashSet containing any members of |hashes| that are
   610  // absent from the store.
   611  func (dcs *DoltChunkStore) HasMany(ctx context.Context, hashes hash.HashSet) (hash.HashSet, error) {
   612  	// get the set of hashes that isn't already in the cache
   613  	notCached := dcs.cache.Has(hashes)
   614  
   615  	if len(notCached) == 0 {
   616  		return notCached, nil
   617  	}
   618  
   619  	// convert the set to a slice of hashes and a corresponding slice of the byte encoding for those hashes
   620  	hashSl, byteSl := HashSetToSlices(notCached)
   621  
   622  	absent := make(hash.HashSet)
   623  	var found []nbs.CompressedChunk
   624  	var err error
   625  
   626  	batchItr(len(hashSl), maxHasManyBatchSize, func(st, end int) (stop bool) {
   627  		// slice the slices into a batch of hashes
   628  		currHashSl := hashSl[st:end]
   629  		currByteSl := byteSl[st:end]
   630  
   631  		// send a request to the remote api to determine which chunks the remote api already has
   632  		req := &remotesapi.HasChunksRequest{RepoId: dcs.getRepoId(), Hashes: currByteSl}
   633  		resp, err := dcs.csClient.HasChunks(ctx, req)
   634  
   635  		if err != nil {
   636  			err = NewRpcError(err, "HasMany", dcs.host, req)
   637  			return true
   638  		}
   639  
   640  		numAbsent := len(resp.Absent)
   641  		sort.Slice(resp.Absent, func(i, j int) bool {
   642  			return resp.Absent[i] < resp.Absent[j]
   643  		})
   644  
   645  		// loop over every hash in the current batch, and if they are absent from the remote host add them to the
   646  		// absent set, otherwise append them to the found slice
   647  		for i, j := 0, 0; i < len(currHashSl); i++ {
   648  			currHash := currHashSl[i]
   649  
   650  			nextAbsent := -1
   651  			if j < numAbsent {
   652  				nextAbsent = int(resp.Absent[j])
   653  			}
   654  
   655  			if i == nextAbsent {
   656  				absent[currHash] = struct{}{}
   657  				j++
   658  			} else {
   659  				c := nbs.ChunkToCompressedChunk(chunks.NewChunkWithHash(currHash, []byte{}))
   660  				found = append(found, c)
   661  			}
   662  		}
   663  
   664  		return false
   665  	})
   666  
   667  	if err != nil {
   668  		return nil, err
   669  	}
   670  
   671  	if len(found)+len(absent) != len(notCached) {
   672  		panic("not all chunks were accounted for")
   673  	}
   674  
   675  	if len(found) > 0 {
   676  		dcs.cache.Put(found)
   677  	}
   678  
   679  	return absent, nil
   680  }
   681  
   682  // Put caches c. Upon return, c must be visible to
   683  // subsequent Get and Has calls, but must not be persistent until a call
   684  // to Flush(). Put may be called concurrently with other calls to Put(),
   685  // Get(), GetMany(), Has() and HasMany().
   686  func (dcs *DoltChunkStore) Put(ctx context.Context, c chunks.Chunk) error {
   687  	cc := nbs.ChunkToCompressedChunk(c)
   688  	dcs.cache.Put([]nbs.CompressedChunk{cc})
   689  	return nil
   690  }
   691  
   692  // Returns the NomsVersion with which this ChunkSource is compatible.
   693  func (dcs *DoltChunkStore) Version() string {
   694  	return dcs.metadata.NbfVersion
   695  }
   696  
   697  // Rebase brings this ChunkStore into sync with the persistent storage's
   698  // current root.
   699  func (dcs *DoltChunkStore) Rebase(ctx context.Context) error {
   700  	req := &remotesapi.RebaseRequest{RepoId: dcs.getRepoId()}
   701  	_, err := dcs.csClient.Rebase(ctx, req)
   702  
   703  	if err != nil {
   704  		return NewRpcError(err, "Rebase", dcs.host, req)
   705  	}
   706  
   707  	return dcs.refreshRepoMetadata(ctx)
   708  }
   709  
   710  func (dcs *DoltChunkStore) refreshRepoMetadata(ctx context.Context) error {
   711  	mdReq := &remotesapi.GetRepoMetadataRequest{
   712  		RepoId: &remotesapi.RepoId{
   713  			Org:      dcs.org,
   714  			RepoName: dcs.repoName,
   715  		},
   716  		ClientRepoFormat: &remotesapi.ClientRepoFormat{
   717  			NbfVersion: dcs.nbf.VersionString(),
   718  			NbsVersion: nbs.StorageVersion,
   719  		},
   720  	}
   721  	metadata, err := dcs.csClient.GetRepoMetadata(ctx, mdReq)
   722  	if err != nil {
   723  		return NewRpcError(err, "GetRepoMetadata", dcs.host, mdReq)
   724  	}
   725  	dcs.metadata = metadata
   726  	return nil
   727  }
   728  
   729  // Root returns the root of the database as of the time the ChunkStore
   730  // was opened or the most recent call to Rebase.
   731  func (dcs *DoltChunkStore) Root(ctx context.Context) (hash.Hash, error) {
   732  	req := &remotesapi.RootRequest{RepoId: dcs.getRepoId()}
   733  	resp, err := dcs.csClient.Root(ctx, req)
   734  
   735  	if err != nil {
   736  		return hash.Hash{}, NewRpcError(err, "Root", dcs.host, req)
   737  	}
   738  
   739  	return hash.New(resp.RootHash), nil
   740  }
   741  
   742  // Commit atomically attempts to persist all novel Chunks and update the
   743  // persisted root hash from last to current (or keeps it the same).
   744  // If last doesn't match the root in persistent storage, returns false.
   745  func (dcs *DoltChunkStore) Commit(ctx context.Context, current, last hash.Hash) (bool, error) {
   746  	hashToChunkCount, err := dcs.uploadChunks(ctx)
   747  
   748  	if err != nil {
   749  		return false, err
   750  	}
   751  
   752  	chnkTblInfo := make([]*remotesapi.ChunkTableInfo, 0, len(hashToChunkCount))
   753  	for h, cnt := range hashToChunkCount {
   754  		chnkTblInfo = append(chnkTblInfo, &remotesapi.ChunkTableInfo{Hash: h[:], ChunkCount: uint32(cnt)})
   755  	}
   756  
   757  	req := &remotesapi.CommitRequest{
   758  		RepoId:         dcs.getRepoId(),
   759  		Current:        current[:],
   760  		Last:           last[:],
   761  		ChunkTableInfo: chnkTblInfo,
   762  		ClientRepoFormat: &remotesapi.ClientRepoFormat{
   763  			NbfVersion: dcs.nbf.VersionString(),
   764  			NbsVersion: nbs.StorageVersion,
   765  		},
   766  	}
   767  	resp, err := dcs.csClient.Commit(ctx, req)
   768  	if err != nil {
   769  		return false, NewRpcError(err, "Commit", dcs.host, req)
   770  	}
   771  
   772  	return resp.Success, dcs.refreshRepoMetadata(ctx)
   773  }
   774  
   775  // Stats may return some kind of struct that reports statistics about the
   776  // ChunkStore instance. The type is implementation-dependent, and impls
   777  // may return nil
   778  func (dcs *DoltChunkStore) Stats() interface{} {
   779  	return cacheStats{atomic.LoadUint32(&dcs.stats.Hits)}
   780  }
   781  
   782  // StatsSummary may return a string containing summarized statistics for
   783  // this ChunkStore. It must return "Unsupported" if this operation is not
   784  // supported.
   785  func (dcs *DoltChunkStore) StatsSummary() string {
   786  	return fmt.Sprintf("CacheHits: %v", dcs.Stats().(CacheStats).CacheHits())
   787  }
   788  
   789  // Close tears down any resources in use by the implementation. After
   790  // Close(), the ChunkStore may not be used again. It is NOT SAFE to call
   791  // Close() concurrently with any other ChunkStore method; behavior is
   792  // undefined and probably crashy.
   793  func (dcs *DoltChunkStore) Close() error {
   794  	return nil
   795  }
   796  
   797  // getting this working using the simplest approach first
   798  func (dcs *DoltChunkStore) uploadChunks(ctx context.Context) (map[hash.Hash]int, error) {
   799  	hashToChunk := dcs.cache.GetAndClearChunksToFlush()
   800  
   801  	if len(hashToChunk) == 0 {
   802  		return map[hash.Hash]int{}, nil
   803  	}
   804  
   805  	chnks := make([]chunks.Chunk, 0, len(hashToChunk))
   806  	for _, chable := range hashToChunk {
   807  		ch, err := chable.ToChunk()
   808  
   809  		if err != nil {
   810  			return nil, err
   811  		}
   812  
   813  		chnks = append(chnks, ch)
   814  	}
   815  
   816  	hashToCount := make(map[hash.Hash]int)
   817  	hashToData := make(map[hash.Hash][]byte)
   818  	hashToDetails := make(map[hash.Hash]*remotesapi.TableFileDetails)
   819  
   820  	// structuring so this can be done as multiple files in the future.
   821  	{
   822  		name, data, err := nbs.WriteChunks(chnks)
   823  
   824  		if err != nil {
   825  			return map[hash.Hash]int{}, err
   826  		}
   827  
   828  		h := hash.Parse(name)
   829  		hashToData[h] = data
   830  		hashToCount[h] = len(chnks)
   831  
   832  		md5Bytes := md5.Sum(data)
   833  		hashToDetails[h] = &remotesapi.TableFileDetails{
   834  			Id:            h[:],
   835  			ContentLength: uint64(len(data)),
   836  			ContentHash:   md5Bytes[:],
   837  		}
   838  	}
   839  
   840  	tfds := make([]*remotesapi.TableFileDetails, 0, len(hashToDetails))
   841  	for _, v := range hashToDetails {
   842  		tfds = append(tfds, v)
   843  	}
   844  
   845  	req := &remotesapi.GetUploadLocsRequest{RepoId: dcs.getRepoId(), TableFileDetails: tfds}
   846  	resp, err := dcs.csClient.GetUploadLocations(ctx, req)
   847  
   848  	if err != nil {
   849  		return map[hash.Hash]int{}, err
   850  	}
   851  
   852  	for _, loc := range resp.Locs {
   853  		var err error
   854  		h := hash.New(loc.TableFileHash)
   855  		data := hashToData[h]
   856  		details := hashToDetails[h]
   857  		switch typedLoc := loc.Location.(type) {
   858  		case *remotesapi.UploadLoc_HttpPost:
   859  			err = dcs.httpPostUpload(ctx, loc.TableFileHash, typedLoc.HttpPost, bytes.NewBuffer(data), details.ContentHash)
   860  		default:
   861  			break
   862  		}
   863  
   864  		if err != nil {
   865  			return map[hash.Hash]int{}, err
   866  		}
   867  	}
   868  
   869  	return hashToCount, nil
   870  }
   871  
   872  type Sizer interface {
   873  	Size() int64
   874  }
   875  
   876  func (dcs *DoltChunkStore) httpPostUpload(ctx context.Context, hashBytes []byte, post *remotesapi.HttpPostTableFile, rd io.Reader, contentHash []byte) error {
   877  	return HttpPostUpload(ctx, dcs.httpFetcher, post, rd, contentHash)
   878  }
   879  
   880  func HttpPostUpload(ctx context.Context, httpFetcher HTTPFetcher, post *remotesapi.HttpPostTableFile, rd io.Reader, contentHash []byte) error {
   881  	req, err := http.NewRequest(http.MethodPut, post.Url, rd)
   882  	if err != nil {
   883  		return err
   884  	}
   885  
   886  	if sizer, ok := rd.(Sizer); ok {
   887  		req.ContentLength = sizer.Size()
   888  	}
   889  
   890  	if len(contentHash) > 0 {
   891  		md5s := base64.StdEncoding.EncodeToString(contentHash)
   892  		req.Header.Set("Content-MD5", md5s)
   893  	}
   894  
   895  	fetcher := globalHttpFetcher
   896  	if httpFetcher != nil {
   897  		fetcher = httpFetcher
   898  	}
   899  
   900  	var resp *http.Response
   901  	op := func() error {
   902  		var err error
   903  		resp, err = fetcher.Do(req.WithContext(ctx))
   904  
   905  		if err == nil {
   906  			defer func() {
   907  				_ = resp.Body.Close()
   908  			}()
   909  		}
   910  
   911  		return processHttpResp(resp, err)
   912  	}
   913  
   914  	err = backoff.Retry(op, backoff.WithMaxRetries(uploadRetryParams, uploadRetryCount))
   915  
   916  	if err != nil {
   917  		return err
   918  	}
   919  
   920  	return nil
   921  }
   922  
   923  // aggregateDownloads looks for byte ranges that need to be downloaded, and tries to aggregate them into a smaller number
   924  // of larger downloads.  It does this by sorting the ranges of bytes that are needed, and then comparing how close together
   925  // neighboring ranges are.  If they are within the threshold the two ranges will be aggregated into a single request for
   926  // the entire range of data.
   927  func aggregateDownloads(aggDistance uint64, resourceGets map[string]*GetRange) []*GetRange {
   928  	var res []*GetRange
   929  	for _, resourceGet := range resourceGets {
   930  		res = append(res, resourceGet.SplitAtGaps(aggDistance)...)
   931  	}
   932  	return res
   933  }
   934  
   935  const (
   936  	chunkAggDistance = 8 * 1024
   937  )
   938  
   939  var defaultConcurrency ConcurrencyParams = ConcurrencyParams{
   940  	ConcurrentSmallFetches: 64,
   941  	ConcurrentLargeFetches: 2,
   942  	LargeFetchSize:         2 * 1024 * 1024,
   943  }
   944  
   945  func logDownloadStats(span opentracing.Span, originalGets map[string]*GetRange, computedGets []*GetRange) {
   946  	chunkCount := 0
   947  	originalBytes := uint64(0)
   948  	for _, r := range originalGets {
   949  		chunkCount += r.NumChunks()
   950  		originalBytes += r.NumBytesInRanges()
   951  	}
   952  	downloadBytes := uint64(0)
   953  	for _, r := range computedGets {
   954  		downloadBytes += r.RangeLen()
   955  	}
   956  	span.LogKV("num_files", len(originalGets), "num_chunks", chunkCount, "num_batches", len(computedGets), "original_bytes", originalBytes, "download_bytes", downloadBytes)
   957  }
   958  
   959  // creates work functions for each download and executes them in parallel.  The work functions write downloaded chunks
   960  // to chunkChan
   961  func (dcs *DoltChunkStore) downloadChunks(ctx context.Context, dlLocs dlLocations, chunkChan chan nbs.CompressedChunk) error {
   962  	span, ctx := tracing.StartSpan(ctx, "remotestorage.downloadChunks")
   963  	defer span.Finish()
   964  
   965  	resourceGets := dlLocs.ranges
   966  
   967  	gets := aggregateDownloads(chunkAggDistance, resourceGets)
   968  	logDownloadStats(span, resourceGets, gets)
   969  
   970  	sortRangesBySize(gets)
   971  
   972  	toUrl := func(ctx context.Context, lastError error, resourcePath string) (string, error) {
   973  		return dlLocs.refreshes[resourcePath].GetURL(ctx, lastError, dcs.csClient)
   974  	}
   975  
   976  	stats := StatsFactory()
   977  
   978  	eg, ctx := errgroup.WithContext(ctx)
   979  
   980  	// loop over all the gets that need to be downloaded and create a work function for each
   981  	work := make([]func() error, len(gets))
   982  	largeCutoff := -1
   983  	for i, get := range gets {
   984  		work[i] = get.GetDownloadFunc(ctx, stats, dcs.httpFetcher, chunkChan, toUrl)
   985  		if get.RangeLen() >= uint64(dcs.concurrency.LargeFetchSize) {
   986  			largeCutoff = i
   987  		}
   988  	}
   989  
   990  	// execute the work
   991  	eg.Go(func() error {
   992  		return concurrentExec(work[0:largeCutoff+1], dcs.concurrency.ConcurrentLargeFetches)
   993  	})
   994  	eg.Go(func() error {
   995  		return concurrentExec(work[largeCutoff+1:len(work)], dcs.concurrency.ConcurrentSmallFetches)
   996  	})
   997  
   998  	defer func() {
   999  		StatsFlusher(stats)
  1000  	}()
  1001  	return eg.Wait()
  1002  }
  1003  
  1004  type urlFactoryFunc func(error) (string, error)
  1005  
  1006  func hedgedRangeDownloadWithRetries(ctx context.Context, stats StatsRecorder, fetcher HTTPFetcher, offset, length uint64, urlStrF urlFactoryFunc) ([]byte, error) {
  1007  	res, err := DownloadHedger.Do(ctx, Work{
  1008  		Work: func(ctx context.Context, n int) (interface{}, error) {
  1009  			return rangeDownloadWithRetries(ctx, stats, fetcher, offset, length, n, urlStrF)
  1010  		},
  1011  		Size: int(length),
  1012  	})
  1013  	if err != nil {
  1014  		return nil, err
  1015  	}
  1016  	return res.([]byte), nil
  1017  }
  1018  
  1019  // rangeDownloadWithRetries executes an http get with the 'Range' header to get a range of bytes from a file.  Request
  1020  // is executed with retries and if progress was made, downloads will be resumed from where they left off on subsequent attempts.
  1021  func rangeDownloadWithRetries(ctx context.Context, stats StatsRecorder, fetcher HTTPFetcher, offset, length uint64, hedgeN int, urlStrF urlFactoryFunc) ([]byte, error) {
  1022  	// create the request
  1023  
  1024  	// parameters used for resuming downloads.
  1025  	var allBufs [][]byte
  1026  	currOffset := offset
  1027  	currLength := length
  1028  
  1029  	var lastError error
  1030  	var retryCnt int
  1031  
  1032  	//execute the request
  1033  	op := func() (rerr error) {
  1034  		defer func() {
  1035  			lastError = rerr
  1036  			retryCnt += 1
  1037  		}()
  1038  		urlStr, err := urlStrF(lastError)
  1039  		if err != nil {
  1040  			return err
  1041  		}
  1042  
  1043  		req, err := http.NewRequest(http.MethodGet, urlStr, nil)
  1044  		if err != nil {
  1045  			return err
  1046  		}
  1047  
  1048  		rangeVal := fmt.Sprintf("bytes=%d-%d", currOffset, currOffset+currLength-1)
  1049  		req.Header.Set("Range", rangeVal)
  1050  
  1051  		stats.RecordDownloadAttemptStart(hedgeN, retryCnt, currOffset-offset, length)
  1052  		start := time.Now()
  1053  		resp, err := fetcher.Do(req.WithContext(ctx))
  1054  		if err == nil {
  1055  			defer func() {
  1056  				_ = resp.Body.Close()
  1057  			}()
  1058  		}
  1059  
  1060  		respErr := processHttpResp(resp, err)
  1061  		if respErr != nil {
  1062  			return respErr
  1063  		}
  1064  		stats.RecordTimeToFirstByte(hedgeN, retryCnt, length, time.Since(start))
  1065  
  1066  		// read the results
  1067  		comprData, err := iohelp.ReadWithMinThroughput(resp.Body, int64(currLength), downThroughputCheck)
  1068  
  1069  		dataRead := len(comprData)
  1070  		if dataRead > 0 {
  1071  			allBufs = append(allBufs, comprData)
  1072  			currLength -= uint64(dataRead)
  1073  			currOffset += uint64(dataRead)
  1074  		}
  1075  		return err
  1076  	}
  1077  
  1078  	dstart := time.Now()
  1079  	err := backoff.Retry(op, backoff.WithMaxRetries(downRetryParams, downRetryCount))
  1080  	if err != nil {
  1081  		return nil, err
  1082  	}
  1083  	stats.RecordDownloadComplete(hedgeN, retryCnt, length, time.Since(dstart))
  1084  
  1085  	return collapseBuffers(allBufs, length), nil
  1086  }
  1087  
  1088  func collapseBuffers(bufs [][]byte, length uint64) []byte {
  1089  	if len(bufs) == 1 {
  1090  		return bufs[0]
  1091  	}
  1092  	res := make([]byte, 0, length)
  1093  	for _, buf := range bufs {
  1094  		res = append(res, buf...)
  1095  	}
  1096  	return res
  1097  }
  1098  
  1099  func (dcs *DoltChunkStore) SupportedOperations() nbs.TableFileStoreOps {
  1100  	return nbs.TableFileStoreOps{
  1101  		CanRead:  true,
  1102  		CanWrite: true,
  1103  		CanPrune: false,
  1104  		CanGC:    false,
  1105  	}
  1106  }
  1107  
  1108  // WriteTableFile reads a table file from the provided reader and writes it to the chunk store.
  1109  func (dcs *DoltChunkStore) WriteTableFile(ctx context.Context, fileId string, numChunks int, rd io.Reader, contentLength uint64, contentHash []byte) error {
  1110  	fileIdBytes := hash.Parse(fileId)
  1111  	tfd := &remotesapi.TableFileDetails{
  1112  		Id:            fileIdBytes[:],
  1113  		ContentLength: contentLength,
  1114  		ContentHash:   contentHash,
  1115  	}
  1116  	req := &remotesapi.GetUploadLocsRequest{
  1117  		RepoId:           dcs.getRepoId(),
  1118  		TableFileDetails: []*remotesapi.TableFileDetails{tfd},
  1119  
  1120  		// redundant and deprecated.  Still setting for compatibility, but will remove promptly.
  1121  		TableFileHashes: [][]byte{fileIdBytes[:]},
  1122  	}
  1123  	resp, err := dcs.csClient.GetUploadLocations(ctx, req)
  1124  
  1125  	if err != nil {
  1126  		return err
  1127  	}
  1128  
  1129  	if len(resp.Locs) != 1 {
  1130  		return errors.New("unexpected upload location count")
  1131  	}
  1132  
  1133  	loc := resp.Locs[0]
  1134  	switch typedLoc := loc.Location.(type) {
  1135  	case *remotesapi.UploadLoc_HttpPost:
  1136  		err = dcs.httpPostUpload(ctx, loc.TableFileHash, typedLoc.HttpPost, rd, contentHash)
  1137  
  1138  		if err != nil {
  1139  			return err
  1140  		}
  1141  
  1142  	default:
  1143  		return errors.New("unsupported upload location")
  1144  	}
  1145  
  1146  	chnkTblInfo := []*remotesapi.ChunkTableInfo{
  1147  		{Hash: fileIdBytes[:], ChunkCount: uint32(numChunks)},
  1148  	}
  1149  
  1150  	atReq := &remotesapi.AddTableFilesRequest{
  1151  		RepoId:         dcs.getRepoId(),
  1152  		ChunkTableInfo: chnkTblInfo,
  1153  		ClientRepoFormat: &remotesapi.ClientRepoFormat{
  1154  			NbfVersion: dcs.nbf.VersionString(),
  1155  			NbsVersion: nbs.StorageVersion,
  1156  		},
  1157  	}
  1158  
  1159  	atResp, err := dcs.csClient.AddTableFiles(ctx, atReq)
  1160  
  1161  	if err != nil {
  1162  		return NewRpcError(err, "UpdateManifest", dcs.host, atReq)
  1163  	}
  1164  
  1165  	if !atResp.Success {
  1166  		return errors.New("update table files failed")
  1167  	}
  1168  
  1169  	return nil
  1170  }
  1171  
  1172  // PruneTableFiles deletes old table files that are no longer referenced in the manifest.
  1173  func (dcs *DoltChunkStore) PruneTableFiles(ctx context.Context) error {
  1174  	return chunks.ErrUnsupportedOperation
  1175  }
  1176  
  1177  // Sources retrieves the current root hash, a list of all the table files (which may include appendix table files)
  1178  // and a list of only appendix table files
  1179  func (dcs *DoltChunkStore) Sources(ctx context.Context) (hash.Hash, []nbs.TableFile, []nbs.TableFile, error) {
  1180  	req := &remotesapi.ListTableFilesRequest{RepoId: dcs.getRepoId()}
  1181  	resp, err := dcs.csClient.ListTableFiles(ctx, req)
  1182  	if err != nil {
  1183  		return hash.Hash{}, nil, nil, err
  1184  	}
  1185  	sourceFiles := getTableFiles(dcs, resp.TableFileInfo)
  1186  	appendixFiles := getTableFiles(dcs, resp.AppendixTableFileInfo)
  1187  	return hash.New(resp.RootHash), sourceFiles, appendixFiles, nil
  1188  }
  1189  
  1190  func getTableFiles(dcs *DoltChunkStore, infoList []*remotesapi.TableFileInfo) []nbs.TableFile {
  1191  	tableFiles := make([]nbs.TableFile, 0)
  1192  	for _, nfo := range infoList {
  1193  		tableFiles = append(tableFiles, DoltRemoteTableFile{dcs, nfo})
  1194  	}
  1195  	return tableFiles
  1196  }
  1197  
  1198  func (dcs *DoltChunkStore) Size(ctx context.Context) (uint64, error) {
  1199  	return dcs.metadata.StorageSize, nil
  1200  }
  1201  
  1202  // SetRootChunk changes the root chunk hash from the previous value to the new root.
  1203  func (dcs *DoltChunkStore) SetRootChunk(ctx context.Context, root, previous hash.Hash) error {
  1204  	panic("Not Implemented")
  1205  }
  1206  
  1207  // DoltRemoteTableFile is an implementation of a TableFile that lives in a DoltChunkStore
  1208  type DoltRemoteTableFile struct {
  1209  	dcs  *DoltChunkStore
  1210  	info *remotesapi.TableFileInfo
  1211  }
  1212  
  1213  // FileID gets the id of the file
  1214  func (drtf DoltRemoteTableFile) FileID() string {
  1215  	return drtf.info.FileId
  1216  }
  1217  
  1218  // NumChunks returns the number of chunks in a table file
  1219  func (drtf DoltRemoteTableFile) NumChunks() int {
  1220  	return int(drtf.info.NumChunks)
  1221  }
  1222  
  1223  var ErrRemoteTableFileGet = errors.New("HTTP GET for remote table file failed")
  1224  
  1225  func sanitizeSignedUrl(url string) string {
  1226  	si := strings.Index(url, "Signature=")
  1227  	if si == -1 {
  1228  		return url
  1229  	}
  1230  	ei := strings.Index(url[si:], "&")
  1231  	if ei == -1 {
  1232  		return url[:si+15] + "..."
  1233  	} else {
  1234  		return url[:si+15] + "..." + url[si:][ei:]
  1235  	}
  1236  }
  1237  
  1238  // Open returns an io.ReadCloser which can be used to read the bytes of a table file.
  1239  func (drtf DoltRemoteTableFile) Open(ctx context.Context) (io.ReadCloser, error) {
  1240  	if drtf.info.RefreshAfter != nil && drtf.info.RefreshAfter.AsTime().After(time.Now()) {
  1241  		resp, err := drtf.dcs.csClient.RefreshTableFileUrl(ctx, drtf.info.RefreshRequest)
  1242  		if err == nil {
  1243  			drtf.info.Url = resp.Url
  1244  			drtf.info.RefreshAfter = resp.RefreshAfter
  1245  		}
  1246  	}
  1247  
  1248  	req, err := http.NewRequestWithContext(ctx, http.MethodGet, drtf.info.Url, nil)
  1249  	if err != nil {
  1250  		return nil, err
  1251  	}
  1252  
  1253  	resp, err := drtf.dcs.httpFetcher.Do(req)
  1254  	if err != nil {
  1255  		return nil, err
  1256  	}
  1257  
  1258  	if resp.StatusCode/100 != 2 {
  1259  		defer resp.Body.Close()
  1260  		body := make([]byte, 4096)
  1261  		n, _ := io.ReadFull(resp.Body, body)
  1262  		return nil, fmt.Errorf("%w: status code: %d;\nurl: %s\n\nbody:\n\n%s\n", ErrRemoteTableFileGet, resp.StatusCode, sanitizeSignedUrl(drtf.info.Url), string(body[0:n]))
  1263  	}
  1264  
  1265  	return resp.Body, nil
  1266  }