github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/storageccl/export.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package storageccl
    10  
    11  import (
    12  	"bytes"
    13  	"context"
    14  	"crypto/sha512"
    15  	"fmt"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/base"
    18  	"github.com/cockroachdb/cockroach/pkg/keys"
    19  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval"
    20  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result"
    21  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
    22  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    23  	"github.com/cockroachdb/cockroach/pkg/settings"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/sem/builtins"
    25  	"github.com/cockroachdb/cockroach/pkg/storage"
    26  	"github.com/cockroachdb/cockroach/pkg/storage/cloud"
    27  	"github.com/cockroachdb/cockroach/pkg/util/log"
    28  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    29  	"github.com/cockroachdb/errors"
    30  )
    31  
    32  // ExportRequestTargetFileSize controls the target file size for SSTs created
    33  // during backups.
    34  var ExportRequestTargetFileSize = settings.RegisterByteSizeSetting(
    35  	"kv.bulk_sst.target_size",
    36  	"target size for SSTs emitted from export requests",
    37  	64<<20, /* 64 MiB */
    38  )
    39  
    40  // ExportRequestMaxAllowedFileSizeOverage controls the maximum size in excess of
    41  // the target file size which an exported SST may be. If this value is positive
    42  // and an SST would exceed this size (due to large rows or large numbers of
    43  // versions), then the export will fail.
    44  var ExportRequestMaxAllowedFileSizeOverage = settings.RegisterByteSizeSetting(
    45  	"kv.bulk_sst.max_allowed_overage",
    46  	"if positive, allowed size in excess of target size for SSTs from export requests",
    47  	64<<20, /* 64 MiB */
    48  )
    49  
    50  func init() {
    51  	batcheval.RegisterReadOnlyCommand(roachpb.Export, declareKeysExport, evalExport)
    52  	ExportRequestTargetFileSize.SetVisibility(settings.Reserved)
    53  	ExportRequestMaxAllowedFileSizeOverage.SetVisibility(settings.Reserved)
    54  }
    55  
    56  func declareKeysExport(
    57  	desc *roachpb.RangeDescriptor,
    58  	header roachpb.Header,
    59  	req roachpb.Request,
    60  	latchSpans, lockSpans *spanset.SpanSet,
    61  ) {
    62  	batcheval.DefaultDeclareIsolatedKeys(desc, header, req, latchSpans, lockSpans)
    63  	latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{Key: keys.RangeLastGCKey(header.RangeID)})
    64  }
    65  
    66  // evalExport dumps the requested keys into files of non-overlapping key ranges
    67  // in a format suitable for bulk ingest.
    68  func evalExport(
    69  	ctx context.Context, batch storage.Reader, cArgs batcheval.CommandArgs, resp roachpb.Response,
    70  ) (result.Result, error) {
    71  	args := cArgs.Args.(*roachpb.ExportRequest)
    72  	h := cArgs.Header
    73  	reply := resp.(*roachpb.ExportResponse)
    74  
    75  	ctx, span := tracing.ChildSpan(ctx, fmt.Sprintf("Export [%s,%s)", args.Key, args.EndKey))
    76  	defer tracing.FinishSpan(span)
    77  
    78  	// If the startTime is zero, then we're doing a full backup and the gc
    79  	// threshold is irrelevant for MVCC_Lastest backups. Otherwise, make sure
    80  	// startTime is after the gc threshold. If it's not, the mvcc tombstones could
    81  	// have been deleted and the resulting RocksDB tombstones compacted, which
    82  	// means we'd miss deletions in the incremental backup. For MVCC_All backups
    83  	// with no start time, they'll only be capturing the *revisions* since the
    84  	// gc threshold, so noting that in the reply allows the BACKUP to correctly
    85  	// note the supported time bounds for RESTORE AS OF SYSTEM TIME.
    86  	gcThreshold := cArgs.EvalCtx.GetGCThreshold()
    87  	if !args.StartTime.IsEmpty() {
    88  		if args.StartTime.LessEq(gcThreshold) {
    89  			return result.Result{}, errors.Errorf("start timestamp %v must be after replica GC threshold %v", args.StartTime, gcThreshold)
    90  		}
    91  	} else if args.MVCCFilter == roachpb.MVCCFilter_All {
    92  		reply.StartTime = gcThreshold
    93  	}
    94  
    95  	if err := cArgs.EvalCtx.GetLimiters().ConcurrentExportRequests.Begin(ctx); err != nil {
    96  		return result.Result{}, err
    97  	}
    98  	defer cArgs.EvalCtx.GetLimiters().ConcurrentExportRequests.Finish()
    99  
   100  	makeExternalStorage := !args.ReturnSST || args.Storage != roachpb.ExternalStorage{} ||
   101  		(args.StorageByLocalityKV != nil && len(args.StorageByLocalityKV) > 0)
   102  	if makeExternalStorage || log.V(1) {
   103  		log.Infof(ctx, "export [%s,%s)", args.Key, args.EndKey)
   104  	} else {
   105  		// Requests that don't write to export storage are expected to be small.
   106  		log.Eventf(ctx, "export [%s,%s)", args.Key, args.EndKey)
   107  	}
   108  
   109  	// To get the store to export to, first try to match the locality of this node
   110  	// to the locality KVs in args.StorageByLocalityKV (used for partitioned
   111  	// backups). If that map isn't set or there's no match, fall back to
   112  	// args.Storage.
   113  	var localityKV string
   114  	var exportStore cloud.ExternalStorage
   115  	if makeExternalStorage {
   116  		var storeConf roachpb.ExternalStorage
   117  		var err error
   118  		foundStoreByLocality := false
   119  		if args.StorageByLocalityKV != nil && len(args.StorageByLocalityKV) > 0 {
   120  			locality := cArgs.EvalCtx.GetNodeLocality()
   121  			localityKV, storeConf, foundStoreByLocality = getMatchingStore(&locality, args.StorageByLocalityKV)
   122  		}
   123  		if !foundStoreByLocality {
   124  			storeConf = args.Storage
   125  		}
   126  		exportStore, err = cArgs.EvalCtx.GetExternalStorage(ctx, storeConf)
   127  		if err != nil {
   128  			return result.Result{}, err
   129  		}
   130  		defer exportStore.Close()
   131  	}
   132  
   133  	var exportAllRevisions bool
   134  	switch args.MVCCFilter {
   135  	case roachpb.MVCCFilter_Latest:
   136  		exportAllRevisions = false
   137  	case roachpb.MVCCFilter_All:
   138  		exportAllRevisions = true
   139  	default:
   140  		return result.Result{}, errors.Errorf("unknown MVCC filter: %s", args.MVCCFilter)
   141  	}
   142  
   143  	io := storage.IterOptions{
   144  		UpperBound: args.EndKey,
   145  	}
   146  
   147  	// Time-bound iterators only make sense to use if the start time is set.
   148  	if args.EnableTimeBoundIteratorOptimization && !args.StartTime.IsEmpty() {
   149  		// The call to startTime.Next() converts our exclusive start bound into the
   150  		// inclusive start bound that MinTimestampHint expects. This is strictly a
   151  		// performance optimization; omitting the call would still return correct
   152  		// results.
   153  		io.MinTimestampHint = args.StartTime.Next()
   154  		io.MaxTimestampHint = h.Timestamp
   155  	}
   156  
   157  	e := spanset.GetDBEngine(batch, roachpb.Span{Key: args.Key, EndKey: args.EndKey})
   158  	targetSize := uint64(args.TargetFileSize)
   159  	var maxSize uint64
   160  	allowedOverage := ExportRequestMaxAllowedFileSizeOverage.Get(&cArgs.EvalCtx.ClusterSettings().SV)
   161  	if targetSize > 0 && allowedOverage > 0 {
   162  		maxSize = targetSize + uint64(allowedOverage)
   163  	}
   164  	for start := args.Key; start != nil; {
   165  		data, summary, resume, err := e.ExportToSst(start, args.EndKey, args.StartTime,
   166  			h.Timestamp, exportAllRevisions, targetSize, maxSize, io)
   167  		if err != nil {
   168  			return result.Result{}, err
   169  		}
   170  
   171  		// NB: This should only happen on the first page of results. If there were
   172  		// more data to be read that lead to pagination then we'd see it in this
   173  		// page. Break out of the loop because there must be no data to export.
   174  		if summary.DataSize == 0 {
   175  			break
   176  		}
   177  
   178  		var checksum []byte
   179  		if !args.OmitChecksum {
   180  			// Compute the checksum before we upload and remove the local file.
   181  			checksum, err = SHA512ChecksumData(data)
   182  			if err != nil {
   183  				return result.Result{}, err
   184  			}
   185  		}
   186  
   187  		if args.Encryption != nil {
   188  			data, err = EncryptFile(data, args.Encryption.Key)
   189  			if err != nil {
   190  				return result.Result{}, err
   191  			}
   192  		}
   193  
   194  		span := roachpb.Span{Key: start}
   195  		if resume != nil {
   196  			span.EndKey = resume
   197  		} else {
   198  			span.EndKey = args.EndKey
   199  		}
   200  		exported := roachpb.ExportResponse_File{
   201  			Span:       span,
   202  			Exported:   summary,
   203  			Sha512:     checksum,
   204  			LocalityKV: localityKV,
   205  		}
   206  
   207  		if exportStore != nil {
   208  			// TODO(dt): don't reach out into a SQL builtin here; this code lives in KV.
   209  			// Create a unique int differently.
   210  			nodeID := cArgs.EvalCtx.NodeID()
   211  			exported.Path = fmt.Sprintf("%d.sst", builtins.GenerateUniqueInt(base.SQLInstanceID(nodeID)))
   212  			if err := exportStore.WriteFile(ctx, exported.Path, bytes.NewReader(data)); err != nil {
   213  				return result.Result{}, err
   214  			}
   215  		}
   216  		if args.ReturnSST {
   217  			exported.SST = data
   218  		}
   219  		reply.Files = append(reply.Files, exported)
   220  		start = resume
   221  	}
   222  
   223  	return result.Result{}, nil
   224  }
   225  
   226  // SHA512ChecksumData returns the SHA512 checksum of data.
   227  func SHA512ChecksumData(data []byte) ([]byte, error) {
   228  	h := sha512.New()
   229  	if _, err := h.Write(data); err != nil {
   230  		panic(errors.Wrap(err, `"It never returns an error." -- https://golang.org/pkg/hash`))
   231  	}
   232  	return h.Sum(nil), nil
   233  }
   234  
   235  func getMatchingStore(
   236  	locality *roachpb.Locality, storageByLocalityKV map[string]*roachpb.ExternalStorage,
   237  ) (string, roachpb.ExternalStorage, bool) {
   238  	kvs := locality.Tiers
   239  	// When matching, more specific KVs in the node locality take precedence
   240  	// over less specific ones.
   241  	for i := len(kvs) - 1; i >= 0; i-- {
   242  		if store, ok := storageByLocalityKV[kvs[i].String()]; ok {
   243  			return kvs[i].String(), *store, true
   244  		}
   245  	}
   246  	return "", roachpb.ExternalStorage{}, false
   247  }