github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/show_fingerprints.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sql 12 13 import ( 14 "context" 15 "fmt" 16 "strings" 17 18 "github.com/cockroachdb/cockroach/pkg/security" 19 "github.com/cockroachdb/cockroach/pkg/sql/catalog/resolver" 20 "github.com/cockroachdb/cockroach/pkg/sql/privilege" 21 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 22 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 23 "github.com/cockroachdb/cockroach/pkg/sql/types" 24 "github.com/cockroachdb/errors" 25 ) 26 27 type showFingerprintsNode struct { 28 optColumnsSlot 29 30 tableDesc *sqlbase.ImmutableTableDescriptor 31 indexes []*sqlbase.IndexDescriptor 32 33 run showFingerprintsRun 34 } 35 36 // ShowFingerprints statement fingerprints the data in each index of a table. 37 // For each index, a full index scan is run to hash every row with the fnv64 38 // hash. For the primary index, all table columns are included in the hash, 39 // whereas for secondary indexes, the index cols + the primary index cols + the 40 // STORING cols are included. The hashed rows are all combined with XOR using 41 // distsql. 42 // 43 // Our hash functions expect input of type BYTES (or string but we use bytes 44 // here), so we have to convert any datums that are not BYTES. This is currently 45 // done by round tripping through the string representation of the column 46 // (`::string::bytes`) and is an obvious area for improvement in the next 47 // version. 48 // 49 // To extract the fingerprints at some point in the past, the following 50 // query can be used: 51 // SELECT * FROM [SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE foo] AS OF SYSTEM TIME xxx 52 func (p *planner) ShowFingerprints( 53 ctx context.Context, n *tree.ShowFingerprints, 54 ) (planNode, error) { 55 // We avoid the cache so that we can observe the fingerprints without 56 // taking a lease, like other SHOW commands. 57 tableDesc, err := p.ResolveUncachedTableDescriptorEx( 58 ctx, n.Table, true /*required*/, resolver.ResolveRequireTableDesc) 59 if err != nil { 60 return nil, err 61 } 62 63 if err := p.CheckPrivilege(ctx, tableDesc, privilege.SELECT); err != nil { 64 return nil, err 65 } 66 67 return &showFingerprintsNode{ 68 tableDesc: tableDesc, 69 indexes: tableDesc.AllNonDropIndexes(), 70 }, nil 71 } 72 73 // showFingerprintsRun contains the run-time state of 74 // showFingerprintsNode during local execution. 75 type showFingerprintsRun struct { 76 rowIdx int 77 // values stores the current row, updated by Next(). 78 values []tree.Datum 79 } 80 81 func (n *showFingerprintsNode) startExec(params runParams) error { 82 n.run.values = []tree.Datum{tree.DNull, tree.DNull} 83 return nil 84 } 85 86 func (n *showFingerprintsNode) Next(params runParams) (bool, error) { 87 if n.run.rowIdx >= len(n.indexes) { 88 return false, nil 89 } 90 index := n.indexes[n.run.rowIdx] 91 92 cols := make([]string, 0, len(n.tableDesc.Columns)) 93 addColumn := func(col *sqlbase.ColumnDescriptor) { 94 // TODO(dan): This is known to be a flawed way to fingerprint. Any datum 95 // with the same string representation is fingerprinted the same, even 96 // if they're different types. 97 switch col.Type.Family() { 98 case types.BytesFamily: 99 cols = append(cols, fmt.Sprintf("%s:::bytes", tree.NameStringP(&col.Name))) 100 default: 101 cols = append(cols, fmt.Sprintf("%s::string::bytes", tree.NameStringP(&col.Name))) 102 } 103 } 104 105 if index.ID == n.tableDesc.PrimaryIndex.ID { 106 for i := range n.tableDesc.Columns { 107 addColumn(&n.tableDesc.Columns[i]) 108 } 109 } else { 110 colsByID := make(map[sqlbase.ColumnID]*sqlbase.ColumnDescriptor) 111 for i := range n.tableDesc.Columns { 112 col := &n.tableDesc.Columns[i] 113 colsByID[col.ID] = col 114 } 115 colIDs := append(append(index.ColumnIDs, index.ExtraColumnIDs...), index.StoreColumnIDs...) 116 for _, colID := range colIDs { 117 addColumn(colsByID[colID]) 118 } 119 } 120 121 // The fnv64 hash was chosen mostly due to speed. I did an AS OF SYSTEM TIME 122 // fingerprint over 31GiB on a 4 node production cluster (with no other 123 // traffic to try and keep things comparable). The cluster was restarted in 124 // between each run. Resulting times: 125 // 126 // fnv => 17m 127 // sha512 => 1h6m 128 // sha265 => 1h6m 129 // fnv64 (again) => 17m 130 // 131 // TODO(dan): If/when this ever loses its EXPERIMENTAL prefix and gets 132 // exposed to users, consider adding a version to the fingerprint output. 133 sql := fmt.Sprintf(`SELECT 134 xor_agg(fnv64(%s))::string AS fingerprint 135 FROM [%d AS t]@{FORCE_INDEX=[%d]} 136 `, strings.Join(cols, `,`), n.tableDesc.ID, index.ID) 137 // If were'in in an AOST context, propagate it to the inner statement so that 138 // the inner statement gets planned with planner.avoidCachedDescriptors set, 139 // like the outter one. 140 if params.p.semaCtx.AsOfTimestamp != nil { 141 ts := params.p.txn.ReadTimestamp() 142 sql = sql + " AS OF SYSTEM TIME " + ts.AsOfSystemTime() 143 } 144 145 fingerprintCols, err := params.extendedEvalCtx.ExecCfg.InternalExecutor.QueryRowEx( 146 params.ctx, "hash-fingerprint", 147 params.p.txn, 148 sqlbase.InternalExecutorSessionDataOverride{User: security.RootUser}, 149 sql, 150 ) 151 if err != nil { 152 return false, err 153 } 154 155 if len(fingerprintCols) != 1 { 156 return false, errors.AssertionFailedf( 157 "unexpected number of columns returned: 1 vs %d", 158 len(fingerprintCols)) 159 } 160 fingerprint := fingerprintCols[0] 161 162 n.run.values[0] = tree.NewDString(index.Name) 163 n.run.values[1] = fingerprint 164 n.run.rowIdx++ 165 return true, nil 166 } 167 168 func (n *showFingerprintsNode) Values() tree.Datums { return n.run.values } 169 func (n *showFingerprintsNode) Close(_ context.Context) {}