github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/storageccl/key_rewriter.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package storageccl
    10  
    11  import (
    12  	"bytes"
    13  
    14  	"github.com/cockroachdb/cockroach/pkg/keys"
    15  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    16  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    17  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    18  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    19  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    20  	"github.com/cockroachdb/errors"
    21  )
    22  
    23  // prefixRewrite holds information for a single []byte replacement of a prefix.
    24  type prefixRewrite struct {
    25  	OldPrefix []byte
    26  	NewPrefix []byte
    27  }
    28  
    29  // prefixRewriter is a matcher for an ordered list of pairs of byte prefix
    30  // rewrite rules.
    31  type prefixRewriter []prefixRewrite
    32  
    33  // RewriteKey modifies key using the first matching rule and returns
    34  // it. If no rules matched, returns false and the original input key.
    35  func (p prefixRewriter) rewriteKey(key []byte) ([]byte, bool) {
    36  	for _, rewrite := range p {
    37  		if bytes.HasPrefix(key, rewrite.OldPrefix) {
    38  			if len(rewrite.OldPrefix) == len(rewrite.NewPrefix) {
    39  				copy(key[:len(rewrite.OldPrefix)], rewrite.NewPrefix)
    40  				return key, true
    41  			}
    42  			// TODO(dan): Special case when key's cap() is enough.
    43  			newKey := make([]byte, 0, len(rewrite.NewPrefix)+len(key)-len(rewrite.OldPrefix))
    44  			newKey = append(newKey, rewrite.NewPrefix...)
    45  			newKey = append(newKey, key[len(rewrite.OldPrefix):]...)
    46  			return newKey, true
    47  		}
    48  	}
    49  	return key, false
    50  }
    51  
    52  // KeyRewriter rewrites old table IDs to new table IDs. It is able to descend
    53  // into interleaved keys, and is able to function on partial keys for spans
    54  // and splits.
    55  type KeyRewriter struct {
    56  	prefixes prefixRewriter
    57  	descs    map[sqlbase.ID]*sqlbase.TableDescriptor
    58  }
    59  
    60  // MakeKeyRewriterFromRekeys makes a KeyRewriter from Rekey protos.
    61  func MakeKeyRewriterFromRekeys(rekeys []roachpb.ImportRequest_TableRekey) (*KeyRewriter, error) {
    62  	descs := make(map[sqlbase.ID]*sqlbase.TableDescriptor)
    63  	for _, rekey := range rekeys {
    64  		var desc sqlbase.Descriptor
    65  		if err := protoutil.Unmarshal(rekey.NewDesc, &desc); err != nil {
    66  			return nil, errors.Wrapf(err, "unmarshalling rekey descriptor for old table id %d", rekey.OldID)
    67  		}
    68  		table := desc.Table(hlc.Timestamp{})
    69  		if table == nil {
    70  			return nil, errors.New("expected a table descriptor")
    71  		}
    72  		descs[sqlbase.ID(rekey.OldID)] = table
    73  	}
    74  	return MakeKeyRewriter(descs)
    75  }
    76  
    77  // MakeKeyRewriter makes a KeyRewriter from a map of descs keyed by original ID.
    78  func MakeKeyRewriter(descs map[sqlbase.ID]*sqlbase.TableDescriptor) (*KeyRewriter, error) {
    79  	var prefixes prefixRewriter
    80  	seenPrefixes := make(map[string]bool)
    81  	for oldID, desc := range descs {
    82  		// The PrefixEnd() of index 1 is the same as the prefix of index 2, so use a
    83  		// map to avoid duplicating entries.
    84  
    85  		for _, index := range desc.AllNonDropIndexes() {
    86  			oldPrefix := roachpb.Key(makeKeyRewriterPrefixIgnoringInterleaved(oldID, index.ID))
    87  			newPrefix := roachpb.Key(makeKeyRewriterPrefixIgnoringInterleaved(desc.ID, index.ID))
    88  			if !seenPrefixes[string(oldPrefix)] {
    89  				seenPrefixes[string(oldPrefix)] = true
    90  				prefixes = append(prefixes, prefixRewrite{
    91  					OldPrefix: oldPrefix,
    92  					NewPrefix: newPrefix,
    93  				})
    94  			}
    95  			// All the encoded data for a index will have the prefix just added, but
    96  			// if you need to translate a half-open range describing that prefix
    97  			// (and we do), the prefix end needs to be in the map too.
    98  			oldPrefix = oldPrefix.PrefixEnd()
    99  			newPrefix = newPrefix.PrefixEnd()
   100  			if !seenPrefixes[string(oldPrefix)] {
   101  				seenPrefixes[string(oldPrefix)] = true
   102  				prefixes = append(prefixes, prefixRewrite{
   103  					OldPrefix: oldPrefix,
   104  					NewPrefix: newPrefix,
   105  				})
   106  			}
   107  		}
   108  	}
   109  	return &KeyRewriter{
   110  		prefixes: prefixes,
   111  		descs:    descs,
   112  	}, nil
   113  }
   114  
   115  // makeKeyRewriterPrefixIgnoringInterleaved creates a table/index prefix for
   116  // the given table and index IDs. sqlbase.MakeIndexKeyPrefix is a similar
   117  // function, but it takes into account interleaved ancestors, which we don't
   118  // want here.
   119  func makeKeyRewriterPrefixIgnoringInterleaved(tableID sqlbase.ID, indexID sqlbase.IndexID) []byte {
   120  	return keys.TODOSQLCodec.IndexPrefix(uint32(tableID), uint32(indexID))
   121  }
   122  
   123  // RewriteKey modifies key (possibly in place), changing all table IDs to their
   124  // new value, including any interleaved table children and prefix ends. This
   125  // function works by inspecting the key for table and index IDs, then uses the
   126  // corresponding table and index descriptors to determine if interleaved data is
   127  // present and if it is, to find the next prefix of an interleaved child, then
   128  // calls itself recursively until all interleaved children have been rekeyed. If
   129  // it encounters a table ID for which it does not have a configured rewrite, it
   130  // returns the prefix of the key that was rewritten key. The returned boolean
   131  // is true if and only if all of the table IDs found in the key were rewritten.
   132  // If isFromSpan is true, failures in value decoding are assumed to be due to
   133  // valid span manipulations, like PrefixEnd or Next having altered the trailing
   134  // byte(s) to corrupt the value encoding -- in such a case we will not be able
   135  // to decode the value (to determine how much further to scan for table IDs) but
   136  // we can assume that since these manipulations are only done to the trailing
   137  // byte that we're likely at the end anyway and do not need to search for any
   138  // further table IDs to replace.
   139  func (kr *KeyRewriter) RewriteKey(key []byte, isFromSpan bool) ([]byte, bool, error) {
   140  	// Fetch the original table ID for descriptor lookup. Ignore errors because
   141  	// they will be caught later on if tableID isn't in descs or kr doesn't
   142  	// perform a rewrite.
   143  	_, tableID, _ := keys.TODOSQLCodec.DecodeTablePrefix(key)
   144  	// Rewrite the first table ID.
   145  	key, ok := kr.prefixes.rewriteKey(key)
   146  	if !ok {
   147  		return nil, false, nil
   148  	}
   149  	desc := kr.descs[sqlbase.ID(tableID)]
   150  	if desc == nil {
   151  		return nil, false, errors.Errorf("missing descriptor for table %d", tableID)
   152  	}
   153  	// Check if this key may have interleaved children.
   154  	k, _, indexID, err := keys.TODOSQLCodec.DecodeIndexPrefix(key)
   155  	if err != nil {
   156  		return nil, false, err
   157  	}
   158  	if len(k) == 0 {
   159  		// If there isn't any more data, we are at some split boundary.
   160  		return key, true, nil
   161  	}
   162  	idx, err := desc.FindIndexByID(sqlbase.IndexID(indexID))
   163  	if err != nil {
   164  		return nil, false, err
   165  	}
   166  	if len(idx.InterleavedBy) == 0 {
   167  		// Not interleaved.
   168  		return key, true, nil
   169  	}
   170  	// We do not support interleaved secondary indexes.
   171  	if idx.ID != desc.PrimaryIndex.ID {
   172  		return nil, false, errors.New("restoring interleaved secondary indexes not supported")
   173  	}
   174  	colIDs, _ := idx.FullColumnIDs()
   175  	var skipCols int
   176  	for _, ancestor := range idx.Interleave.Ancestors {
   177  		skipCols += int(ancestor.SharedPrefixLen)
   178  	}
   179  	for i := 0; i < len(colIDs)-skipCols; i++ {
   180  		n, err := encoding.PeekLength(k)
   181  		if err != nil {
   182  			// PeekLength, and key decoding in general, can fail when reading the last
   183  			// value from a key that is coming from a span. Keys in spans are often
   184  			// altered e.g. by calling Next() or PrefixEnd() to ensure a given span is
   185  			// inclusive or for other reasons, but the manipulations sometimes change
   186  			// the encoded bytes, meaning they can no longer successfully decode as
   187  			// back to the original values. This is OK when span boundaries mostly are
   188  			// only required to be even divisions of keyspace, but when we try to go
   189  			// back to interpreting them as keys, it can fall apart. Partitioning a
   190  			// table (and applying zone configs) eagerly creates splits at the defined
   191  			// partition boundaries, using PrefixEnd for their ends, resulting in such
   192  			// spans.
   193  			//
   194  			// Fortunately, the only common span manipulations are to the trailing
   195  			// byte of a key (e.g. incrementing or appending a null) so for our needs
   196  			// here, if we fail to decode because of one of those manipulations, we
   197  			// can assume that we are at the end of the key as far as fields where a
   198  			// table ID which needs to be replaced can appear and consider the rewrite
   199  			// of this key as being compelted successfully.
   200  			//
   201  			// Finally unlike key rewrites of actual row-data, span rewrites do not
   202  			// need to be perfect: spans are only rewritten for use in pre-splitting
   203  			// and work distribution, so even if it turned out that this assumption
   204  			// was incorrect, it could cause a performance degradation but does not
   205  			// pose a correctness risk.
   206  			if isFromSpan {
   207  				return key, true, nil
   208  			}
   209  			return nil, false, err
   210  		}
   211  		k = k[n:]
   212  		// Check if we ran out of key before getting to an interleave child?
   213  		if len(k) == 0 {
   214  			return key, true, nil
   215  		}
   216  	}
   217  	// We might have an interleaved key.
   218  	k, ok = encoding.DecodeIfInterleavedSentinel(k)
   219  	if !ok {
   220  		return key, true, nil
   221  	}
   222  	prefix := key[:len(key)-len(k)]
   223  	k, ok, err = kr.RewriteKey(k, isFromSpan)
   224  	if err != nil {
   225  		return nil, false, err
   226  	}
   227  	if !ok {
   228  		// The interleaved child was not rewritten, skip this row.
   229  		return prefix, false, nil
   230  	}
   231  	key = append(prefix, k...)
   232  	return key, true, nil
   233  }