github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/c-deps/libroach/row_counter.cc (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  #include "row_counter.h"
    12  #include <iostream>
    13  #include "encoding.h"
    14  
    15  using namespace cockroach;
    16  
    17  int RowCounter::GetRowPrefixLength(rocksdb::Slice* key) {
    18    size_t n = key->size();
    19  
    20    // Strip tenant ID prefix to get a "SQL key" starting with a table ID.
    21    rocksdb::Slice buf = rocksdb::Slice(*key);
    22    if (!StripTenantPrefix(&buf)) {
    23      return 0;
    24    }
    25    size_t sql_n = key->size();
    26  
    27    if (!IsInt(&buf)) {
    28      // Not a table key, so the row prefix is the entire key.
    29      return n;
    30    }
    31  
    32    // The column family ID length is encoded as a varint and we take advantage of
    33    // the fact that the column family ID itself will be encoded in 0-9 bytes and
    34    // thus the length of the column family ID data will fit in a single byte.
    35    buf.remove_prefix(sql_n - 1);
    36  
    37    if (!IsInt(&buf)) {
    38      // The last byte is not a valid column family ID suffix.
    39      return 0;
    40    }
    41  
    42    uint64_t col_fam_id_len;
    43    if (!DecodeUvarint64(&buf, &col_fam_id_len)) {
    44      return 0;
    45    }
    46  
    47    if (col_fam_id_len > uint64_t(sql_n - 1)) {
    48      // The column family ID length was impossible. colFamIDLen is the length of
    49      // the encoded column family ID suffix. We add 1 to account for the byte
    50      // holding the length of the encoded column family ID and if that total
    51      // (colFamIDLen+1) is greater than the key suffix (sqlN == len(sqlKey)) then
    52      // we bail. Note that we don't consider this an error because
    53      // EnsureSafeSplitKey can be called on keys that look like table keys but
    54      // which do not have a column family ID length suffix (e.g by
    55      // SystemConfig.ComputeSplitKey).
    56      return 0;
    57    }
    58  
    59    return n - int(col_fam_id_len) - 1;
    60  }
    61  
    62  // EnsureSafeSplitKey transforms the SQL table key argumnet such that it is a
    63  // valid split key (i.e. does not occur in the middle of a row).
    64  void RowCounter::EnsureSafeSplitKey(rocksdb::Slice* key) {
    65    // The row prefix for a key is unique to keys in its row - no key without the
    66    // row prefix will be in the key's row. Therefore, we can be certain that
    67    // using the row prefix for a key as a split key is safe: it doesn't occur in
    68    // the middle of a row.
    69    int idx = GetRowPrefixLength(key);
    70    key->remove_suffix(key->size() - idx);
    71  }
    72  
    73  // Count examines each key passed to it and increments the running count when it
    74  // sees a key that belongs to a new row.
    75  bool RowCounter::Count(const rocksdb::Slice& key) {
    76    // EnsureSafeSplitKey is usually used to avoid splitting a row across ranges,
    77    // by returning the row's key prefix.
    78    // We reuse it here to count "rows" by counting when it changes.
    79    // Non-SQL keys are returned unchanged or may error -- we ignore them, since
    80    // non-SQL keys are obviously thus not SQL rows.
    81  
    82    rocksdb::Slice decoded_key;
    83    int64_t wall_time = 0;
    84    int32_t logical = 0;
    85    if (!DecodeKey(key, &decoded_key, &wall_time, &logical)) {
    86      return false;
    87    }
    88  
    89    size_t key_size = decoded_key.size();
    90    EnsureSafeSplitKey(&decoded_key);
    91  
    92    if (decoded_key.empty() || key_size == decoded_key.size()) {
    93      return true;
    94    }
    95  
    96    // no change key prefix => no new row.
    97    if (decoded_key == prev_key) {
    98      return true;
    99    }
   100  
   101    prev_key.assign(decoded_key.data(), decoded_key.size());
   102  
   103    uint64_t tbl;
   104    if (!DecodeTenantAndTablePrefix(&decoded_key, &tbl)) {
   105      return false;
   106    }
   107  
   108    uint64_t index_id;
   109    if (!DecodeUvarint64(&decoded_key, &index_id)) {
   110      return false;
   111    }
   112  
   113    // This mirrors logic of the go function roachpb.BulkOpSummaryID.
   114    uint64_t bulk_op_summary_id = (tbl << 32) | index_id;
   115    (*summary->mutable_entry_counts())[bulk_op_summary_id]++;
   116  
   117    if (index_id == 1) {
   118      summary->set_deprecated_rows(summary->deprecated_rows() + 1);
   119    } else {
   120      summary->set_deprecated_index_entries(summary->deprecated_index_entries() + 1);
   121    }
   122  
   123    return true;
   124  }