github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/storageccl/export.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Licensed as a CockroachDB Enterprise file under the Cockroach Community 4 // License (the "License"); you may not use this file except in compliance with 5 // the License. You may obtain a copy of the License at 6 // 7 // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt 8 9 package storageccl 10 11 import ( 12 "bytes" 13 "context" 14 "crypto/sha512" 15 "fmt" 16 17 "github.com/cockroachdb/cockroach/pkg/base" 18 "github.com/cockroachdb/cockroach/pkg/keys" 19 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval" 20 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result" 21 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset" 22 "github.com/cockroachdb/cockroach/pkg/roachpb" 23 "github.com/cockroachdb/cockroach/pkg/settings" 24 "github.com/cockroachdb/cockroach/pkg/sql/sem/builtins" 25 "github.com/cockroachdb/cockroach/pkg/storage" 26 "github.com/cockroachdb/cockroach/pkg/storage/cloud" 27 "github.com/cockroachdb/cockroach/pkg/util/log" 28 "github.com/cockroachdb/cockroach/pkg/util/tracing" 29 "github.com/cockroachdb/errors" 30 ) 31 32 // ExportRequestTargetFileSize controls the target file size for SSTs created 33 // during backups. 34 var ExportRequestTargetFileSize = settings.RegisterByteSizeSetting( 35 "kv.bulk_sst.target_size", 36 "target size for SSTs emitted from export requests", 37 64<<20, /* 64 MiB */ 38 ) 39 40 // ExportRequestMaxAllowedFileSizeOverage controls the maximum size in excess of 41 // the target file size which an exported SST may be. If this value is positive 42 // and an SST would exceed this size (due to large rows or large numbers of 43 // versions), then the export will fail. 44 var ExportRequestMaxAllowedFileSizeOverage = settings.RegisterByteSizeSetting( 45 "kv.bulk_sst.max_allowed_overage", 46 "if positive, allowed size in excess of target size for SSTs from export requests", 47 64<<20, /* 64 MiB */ 48 ) 49 50 func init() { 51 batcheval.RegisterReadOnlyCommand(roachpb.Export, declareKeysExport, evalExport) 52 ExportRequestTargetFileSize.SetVisibility(settings.Reserved) 53 ExportRequestMaxAllowedFileSizeOverage.SetVisibility(settings.Reserved) 54 } 55 56 func declareKeysExport( 57 desc *roachpb.RangeDescriptor, 58 header roachpb.Header, 59 req roachpb.Request, 60 latchSpans, lockSpans *spanset.SpanSet, 61 ) { 62 batcheval.DefaultDeclareIsolatedKeys(desc, header, req, latchSpans, lockSpans) 63 latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{Key: keys.RangeLastGCKey(header.RangeID)}) 64 } 65 66 // evalExport dumps the requested keys into files of non-overlapping key ranges 67 // in a format suitable for bulk ingest. 68 func evalExport( 69 ctx context.Context, batch storage.Reader, cArgs batcheval.CommandArgs, resp roachpb.Response, 70 ) (result.Result, error) { 71 args := cArgs.Args.(*roachpb.ExportRequest) 72 h := cArgs.Header 73 reply := resp.(*roachpb.ExportResponse) 74 75 ctx, span := tracing.ChildSpan(ctx, fmt.Sprintf("Export [%s,%s)", args.Key, args.EndKey)) 76 defer tracing.FinishSpan(span) 77 78 // If the startTime is zero, then we're doing a full backup and the gc 79 // threshold is irrelevant for MVCC_Lastest backups. Otherwise, make sure 80 // startTime is after the gc threshold. If it's not, the mvcc tombstones could 81 // have been deleted and the resulting RocksDB tombstones compacted, which 82 // means we'd miss deletions in the incremental backup. For MVCC_All backups 83 // with no start time, they'll only be capturing the *revisions* since the 84 // gc threshold, so noting that in the reply allows the BACKUP to correctly 85 // note the supported time bounds for RESTORE AS OF SYSTEM TIME. 86 gcThreshold := cArgs.EvalCtx.GetGCThreshold() 87 if !args.StartTime.IsEmpty() { 88 if args.StartTime.LessEq(gcThreshold) { 89 return result.Result{}, errors.Errorf("start timestamp %v must be after replica GC threshold %v", args.StartTime, gcThreshold) 90 } 91 } else if args.MVCCFilter == roachpb.MVCCFilter_All { 92 reply.StartTime = gcThreshold 93 } 94 95 if err := cArgs.EvalCtx.GetLimiters().ConcurrentExportRequests.Begin(ctx); err != nil { 96 return result.Result{}, err 97 } 98 defer cArgs.EvalCtx.GetLimiters().ConcurrentExportRequests.Finish() 99 100 makeExternalStorage := !args.ReturnSST || args.Storage != roachpb.ExternalStorage{} || 101 (args.StorageByLocalityKV != nil && len(args.StorageByLocalityKV) > 0) 102 if makeExternalStorage || log.V(1) { 103 log.Infof(ctx, "export [%s,%s)", args.Key, args.EndKey) 104 } else { 105 // Requests that don't write to export storage are expected to be small. 106 log.Eventf(ctx, "export [%s,%s)", args.Key, args.EndKey) 107 } 108 109 // To get the store to export to, first try to match the locality of this node 110 // to the locality KVs in args.StorageByLocalityKV (used for partitioned 111 // backups). If that map isn't set or there's no match, fall back to 112 // args.Storage. 113 var localityKV string 114 var exportStore cloud.ExternalStorage 115 if makeExternalStorage { 116 var storeConf roachpb.ExternalStorage 117 var err error 118 foundStoreByLocality := false 119 if args.StorageByLocalityKV != nil && len(args.StorageByLocalityKV) > 0 { 120 locality := cArgs.EvalCtx.GetNodeLocality() 121 localityKV, storeConf, foundStoreByLocality = getMatchingStore(&locality, args.StorageByLocalityKV) 122 } 123 if !foundStoreByLocality { 124 storeConf = args.Storage 125 } 126 exportStore, err = cArgs.EvalCtx.GetExternalStorage(ctx, storeConf) 127 if err != nil { 128 return result.Result{}, err 129 } 130 defer exportStore.Close() 131 } 132 133 var exportAllRevisions bool 134 switch args.MVCCFilter { 135 case roachpb.MVCCFilter_Latest: 136 exportAllRevisions = false 137 case roachpb.MVCCFilter_All: 138 exportAllRevisions = true 139 default: 140 return result.Result{}, errors.Errorf("unknown MVCC filter: %s", args.MVCCFilter) 141 } 142 143 io := storage.IterOptions{ 144 UpperBound: args.EndKey, 145 } 146 147 // Time-bound iterators only make sense to use if the start time is set. 148 if args.EnableTimeBoundIteratorOptimization && !args.StartTime.IsEmpty() { 149 // The call to startTime.Next() converts our exclusive start bound into the 150 // inclusive start bound that MinTimestampHint expects. This is strictly a 151 // performance optimization; omitting the call would still return correct 152 // results. 153 io.MinTimestampHint = args.StartTime.Next() 154 io.MaxTimestampHint = h.Timestamp 155 } 156 157 e := spanset.GetDBEngine(batch, roachpb.Span{Key: args.Key, EndKey: args.EndKey}) 158 targetSize := uint64(args.TargetFileSize) 159 var maxSize uint64 160 allowedOverage := ExportRequestMaxAllowedFileSizeOverage.Get(&cArgs.EvalCtx.ClusterSettings().SV) 161 if targetSize > 0 && allowedOverage > 0 { 162 maxSize = targetSize + uint64(allowedOverage) 163 } 164 for start := args.Key; start != nil; { 165 data, summary, resume, err := e.ExportToSst(start, args.EndKey, args.StartTime, 166 h.Timestamp, exportAllRevisions, targetSize, maxSize, io) 167 if err != nil { 168 return result.Result{}, err 169 } 170 171 // NB: This should only happen on the first page of results. If there were 172 // more data to be read that lead to pagination then we'd see it in this 173 // page. Break out of the loop because there must be no data to export. 174 if summary.DataSize == 0 { 175 break 176 } 177 178 var checksum []byte 179 if !args.OmitChecksum { 180 // Compute the checksum before we upload and remove the local file. 181 checksum, err = SHA512ChecksumData(data) 182 if err != nil { 183 return result.Result{}, err 184 } 185 } 186 187 if args.Encryption != nil { 188 data, err = EncryptFile(data, args.Encryption.Key) 189 if err != nil { 190 return result.Result{}, err 191 } 192 } 193 194 span := roachpb.Span{Key: start} 195 if resume != nil { 196 span.EndKey = resume 197 } else { 198 span.EndKey = args.EndKey 199 } 200 exported := roachpb.ExportResponse_File{ 201 Span: span, 202 Exported: summary, 203 Sha512: checksum, 204 LocalityKV: localityKV, 205 } 206 207 if exportStore != nil { 208 // TODO(dt): don't reach out into a SQL builtin here; this code lives in KV. 209 // Create a unique int differently. 210 nodeID := cArgs.EvalCtx.NodeID() 211 exported.Path = fmt.Sprintf("%d.sst", builtins.GenerateUniqueInt(base.SQLInstanceID(nodeID))) 212 if err := exportStore.WriteFile(ctx, exported.Path, bytes.NewReader(data)); err != nil { 213 return result.Result{}, err 214 } 215 } 216 if args.ReturnSST { 217 exported.SST = data 218 } 219 reply.Files = append(reply.Files, exported) 220 start = resume 221 } 222 223 return result.Result{}, nil 224 } 225 226 // SHA512ChecksumData returns the SHA512 checksum of data. 227 func SHA512ChecksumData(data []byte) ([]byte, error) { 228 h := sha512.New() 229 if _, err := h.Write(data); err != nil { 230 panic(errors.Wrap(err, `"It never returns an error." -- https://golang.org/pkg/hash`)) 231 } 232 return h.Sum(nil), nil 233 } 234 235 func getMatchingStore( 236 locality *roachpb.Locality, storageByLocalityKV map[string]*roachpb.ExternalStorage, 237 ) (string, roachpb.ExternalStorage, bool) { 238 kvs := locality.Tiers 239 // When matching, more specific KVs in the node locality take precedence 240 // over less specific ones. 241 for i := len(kvs) - 1; i >= 0; i-- { 242 if store, ok := storageByLocalityKV[kvs[i].String()]; ok { 243 return kvs[i].String(), *store, true 244 } 245 } 246 return "", roachpb.ExternalStorage{}, false 247 }