github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/c-deps/libroach/row_counter.cc (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 #include "row_counter.h" 12 #include <iostream> 13 #include "encoding.h" 14 15 using namespace cockroach; 16 17 int RowCounter::GetRowPrefixLength(rocksdb::Slice* key) { 18 size_t n = key->size(); 19 20 // Strip tenant ID prefix to get a "SQL key" starting with a table ID. 21 rocksdb::Slice buf = rocksdb::Slice(*key); 22 if (!StripTenantPrefix(&buf)) { 23 return 0; 24 } 25 size_t sql_n = key->size(); 26 27 if (!IsInt(&buf)) { 28 // Not a table key, so the row prefix is the entire key. 29 return n; 30 } 31 32 // The column family ID length is encoded as a varint and we take advantage of 33 // the fact that the column family ID itself will be encoded in 0-9 bytes and 34 // thus the length of the column family ID data will fit in a single byte. 35 buf.remove_prefix(sql_n - 1); 36 37 if (!IsInt(&buf)) { 38 // The last byte is not a valid column family ID suffix. 39 return 0; 40 } 41 42 uint64_t col_fam_id_len; 43 if (!DecodeUvarint64(&buf, &col_fam_id_len)) { 44 return 0; 45 } 46 47 if (col_fam_id_len > uint64_t(sql_n - 1)) { 48 // The column family ID length was impossible. colFamIDLen is the length of 49 // the encoded column family ID suffix. We add 1 to account for the byte 50 // holding the length of the encoded column family ID and if that total 51 // (colFamIDLen+1) is greater than the key suffix (sqlN == len(sqlKey)) then 52 // we bail. Note that we don't consider this an error because 53 // EnsureSafeSplitKey can be called on keys that look like table keys but 54 // which do not have a column family ID length suffix (e.g by 55 // SystemConfig.ComputeSplitKey). 56 return 0; 57 } 58 59 return n - int(col_fam_id_len) - 1; 60 } 61 62 // EnsureSafeSplitKey transforms the SQL table key argumnet such that it is a 63 // valid split key (i.e. does not occur in the middle of a row). 64 void RowCounter::EnsureSafeSplitKey(rocksdb::Slice* key) { 65 // The row prefix for a key is unique to keys in its row - no key without the 66 // row prefix will be in the key's row. Therefore, we can be certain that 67 // using the row prefix for a key as a split key is safe: it doesn't occur in 68 // the middle of a row. 69 int idx = GetRowPrefixLength(key); 70 key->remove_suffix(key->size() - idx); 71 } 72 73 // Count examines each key passed to it and increments the running count when it 74 // sees a key that belongs to a new row. 75 bool RowCounter::Count(const rocksdb::Slice& key) { 76 // EnsureSafeSplitKey is usually used to avoid splitting a row across ranges, 77 // by returning the row's key prefix. 78 // We reuse it here to count "rows" by counting when it changes. 79 // Non-SQL keys are returned unchanged or may error -- we ignore them, since 80 // non-SQL keys are obviously thus not SQL rows. 81 82 rocksdb::Slice decoded_key; 83 int64_t wall_time = 0; 84 int32_t logical = 0; 85 if (!DecodeKey(key, &decoded_key, &wall_time, &logical)) { 86 return false; 87 } 88 89 size_t key_size = decoded_key.size(); 90 EnsureSafeSplitKey(&decoded_key); 91 92 if (decoded_key.empty() || key_size == decoded_key.size()) { 93 return true; 94 } 95 96 // no change key prefix => no new row. 97 if (decoded_key == prev_key) { 98 return true; 99 } 100 101 prev_key.assign(decoded_key.data(), decoded_key.size()); 102 103 uint64_t tbl; 104 if (!DecodeTenantAndTablePrefix(&decoded_key, &tbl)) { 105 return false; 106 } 107 108 uint64_t index_id; 109 if (!DecodeUvarint64(&decoded_key, &index_id)) { 110 return false; 111 } 112 113 // This mirrors logic of the go function roachpb.BulkOpSummaryID. 114 uint64_t bulk_op_summary_id = (tbl << 32) | index_id; 115 (*summary->mutable_entry_counts())[bulk_op_summary_id]++; 116 117 if (index_id == 1) { 118 summary->set_deprecated_rows(summary->deprecated_rows() + 1); 119 } else { 120 summary->set_deprecated_index_entries(summary->deprecated_index_entries() + 1); 121 } 122 123 return true; 124 }