vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletmanager/vdiff/report.go (about) 1 /* 2 Copyright 2022 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package vdiff 18 19 import ( 20 "fmt" 21 "sort" 22 "strings" 23 24 "vitess.io/vitess/go/sqltypes" 25 "vitess.io/vitess/go/vt/sqlparser" 26 ) 27 28 const ( 29 // At most how many samples we should show for row differences in the final report 30 maxVDiffReportSampleRows = 10 31 truncatedNotation = "...[TRUNCATED]" 32 ) 33 34 // DiffReport is the summary of differences for one table. 35 type DiffReport struct { 36 TableName string 37 38 // counts 39 ProcessedRows int64 40 MatchingRows int64 41 MismatchedRows int64 42 ExtraRowsSource int64 43 ExtraRowsTarget int64 44 45 // actual data for a few sample rows 46 ExtraRowsSourceDiffs []*RowDiff `json:"ExtraRowsSourceSample,omitempty"` 47 ExtraRowsTargetDiffs []*RowDiff `json:"ExtraRowsTargetSample,omitempty"` 48 MismatchedRowsDiffs []*DiffMismatch `json:"MismatchedRowsSample,omitempty"` 49 } 50 51 type ProgressReport struct { 52 Percentage float64 53 ETA string `json:"ETA,omitempty"` // a formatted date 54 } 55 56 // DiffMismatch is a sample of row diffs between source and target. 57 type DiffMismatch struct { 58 Source *RowDiff `json:"Source,omitempty"` 59 Target *RowDiff `json:"Target,omitempty"` 60 } 61 62 // RowDiff is a row that didn't match as part of the comparison. 63 type RowDiff struct { 64 Row map[string]string `json:"Row,omitempty"` 65 Query string `json:"Query,omitempty"` 66 } 67 68 func (td *tableDiffer) genRowDiff(queryStmt string, row []sqltypes.Value, debug, onlyPks bool) (*RowDiff, error) { 69 drp := &RowDiff{} 70 drp.Row = make(map[string]string) 71 statement, err := sqlparser.Parse(queryStmt) 72 if err != nil { 73 return nil, err 74 } 75 sel, ok := statement.(*sqlparser.Select) 76 if !ok { 77 return nil, fmt.Errorf("unexpected: %+v", sqlparser.String(statement)) 78 } 79 80 if debug { 81 drp.Query = td.genDebugQueryDiff(sel, row, onlyPks) 82 } 83 84 setVal := func(index int) { 85 buf := sqlparser.NewTrackedBuffer(nil) 86 sel.SelectExprs[index].Format(buf) 87 col := buf.String() 88 drp.Row[col] = row[index].ToString() 89 } 90 91 if onlyPks { 92 for _, pkI := range td.tablePlan.selectPks { 93 setVal(pkI) 94 } 95 return drp, nil 96 } 97 98 for i := range sel.SelectExprs { 99 setVal(i) 100 } 101 formatSampleRow(drp) 102 103 return drp, nil 104 } 105 106 func (td *tableDiffer) genDebugQueryDiff(sel *sqlparser.Select, row []sqltypes.Value, onlyPks bool) string { 107 buf := sqlparser.NewTrackedBuffer(nil) 108 buf.Myprintf("select ") 109 110 if onlyPks { 111 for i, pkI := range td.tablePlan.selectPks { 112 pk := sel.SelectExprs[pkI] 113 pk.Format(buf) 114 if i != len(td.tablePlan.selectPks)-1 { 115 buf.Myprintf(", ") 116 } 117 } 118 } else { 119 sel.SelectExprs.Format(buf) 120 } 121 buf.Myprintf(" from ") 122 buf.Myprintf(sqlparser.ToString(sel.From)) 123 buf.Myprintf(" where ") 124 for i, pkI := range td.tablePlan.selectPks { 125 sel.SelectExprs[pkI].Format(buf) 126 buf.Myprintf("=") 127 row[pkI].EncodeSQL(buf) 128 if i != len(td.tablePlan.selectPks)-1 { 129 buf.Myprintf(" AND ") 130 } 131 } 132 buf.Myprintf(";") 133 return buf.String() 134 } 135 136 // formatSampleRow returns a formatted string representing a sample 137 // extra/mismatched row 138 func formatSampleRow(rd *RowDiff) { 139 keys := make([]string, 0, len(rd.Row)) 140 rowString := strings.Builder{} 141 for k := range rd.Row { 142 keys = append(keys, k) 143 } 144 145 sort.Strings(keys) 146 for _, k := range keys { 147 // Let's truncate if it's really worth it to avoid losing value for a few chars 148 if len(rd.Row[k]) >= 30+len(truncatedNotation)+20 { 149 rd.Row[k] = rd.Row[k][:30] + truncatedNotation 150 } 151 rowString.WriteString(fmt.Sprintf("%s: %s\n", k, rd.Row[k])) 152 } 153 }