vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletmanager/vdiff/report.go (about)

     1  /*
     2  Copyright 2022 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vdiff
    18  
    19  import (
    20  	"fmt"
    21  	"sort"
    22  	"strings"
    23  
    24  	"vitess.io/vitess/go/sqltypes"
    25  	"vitess.io/vitess/go/vt/sqlparser"
    26  )
    27  
    28  const (
    29  	// At most how many samples we should show for row differences in the final report
    30  	maxVDiffReportSampleRows = 10
    31  	truncatedNotation        = "...[TRUNCATED]"
    32  )
    33  
    34  // DiffReport is the summary of differences for one table.
    35  type DiffReport struct {
    36  	TableName string
    37  
    38  	// counts
    39  	ProcessedRows   int64
    40  	MatchingRows    int64
    41  	MismatchedRows  int64
    42  	ExtraRowsSource int64
    43  	ExtraRowsTarget int64
    44  
    45  	// actual data for a few sample rows
    46  	ExtraRowsSourceDiffs []*RowDiff      `json:"ExtraRowsSourceSample,omitempty"`
    47  	ExtraRowsTargetDiffs []*RowDiff      `json:"ExtraRowsTargetSample,omitempty"`
    48  	MismatchedRowsDiffs  []*DiffMismatch `json:"MismatchedRowsSample,omitempty"`
    49  }
    50  
    51  type ProgressReport struct {
    52  	Percentage float64
    53  	ETA        string `json:"ETA,omitempty"` // a formatted date
    54  }
    55  
    56  // DiffMismatch is a sample of row diffs between source and target.
    57  type DiffMismatch struct {
    58  	Source *RowDiff `json:"Source,omitempty"`
    59  	Target *RowDiff `json:"Target,omitempty"`
    60  }
    61  
    62  // RowDiff is a row that didn't match as part of the comparison.
    63  type RowDiff struct {
    64  	Row   map[string]string `json:"Row,omitempty"`
    65  	Query string            `json:"Query,omitempty"`
    66  }
    67  
    68  func (td *tableDiffer) genRowDiff(queryStmt string, row []sqltypes.Value, debug, onlyPks bool) (*RowDiff, error) {
    69  	drp := &RowDiff{}
    70  	drp.Row = make(map[string]string)
    71  	statement, err := sqlparser.Parse(queryStmt)
    72  	if err != nil {
    73  		return nil, err
    74  	}
    75  	sel, ok := statement.(*sqlparser.Select)
    76  	if !ok {
    77  		return nil, fmt.Errorf("unexpected: %+v", sqlparser.String(statement))
    78  	}
    79  
    80  	if debug {
    81  		drp.Query = td.genDebugQueryDiff(sel, row, onlyPks)
    82  	}
    83  
    84  	setVal := func(index int) {
    85  		buf := sqlparser.NewTrackedBuffer(nil)
    86  		sel.SelectExprs[index].Format(buf)
    87  		col := buf.String()
    88  		drp.Row[col] = row[index].ToString()
    89  	}
    90  
    91  	if onlyPks {
    92  		for _, pkI := range td.tablePlan.selectPks {
    93  			setVal(pkI)
    94  		}
    95  		return drp, nil
    96  	}
    97  
    98  	for i := range sel.SelectExprs {
    99  		setVal(i)
   100  	}
   101  	formatSampleRow(drp)
   102  
   103  	return drp, nil
   104  }
   105  
   106  func (td *tableDiffer) genDebugQueryDiff(sel *sqlparser.Select, row []sqltypes.Value, onlyPks bool) string {
   107  	buf := sqlparser.NewTrackedBuffer(nil)
   108  	buf.Myprintf("select ")
   109  
   110  	if onlyPks {
   111  		for i, pkI := range td.tablePlan.selectPks {
   112  			pk := sel.SelectExprs[pkI]
   113  			pk.Format(buf)
   114  			if i != len(td.tablePlan.selectPks)-1 {
   115  				buf.Myprintf(", ")
   116  			}
   117  		}
   118  	} else {
   119  		sel.SelectExprs.Format(buf)
   120  	}
   121  	buf.Myprintf(" from ")
   122  	buf.Myprintf(sqlparser.ToString(sel.From))
   123  	buf.Myprintf(" where ")
   124  	for i, pkI := range td.tablePlan.selectPks {
   125  		sel.SelectExprs[pkI].Format(buf)
   126  		buf.Myprintf("=")
   127  		row[pkI].EncodeSQL(buf)
   128  		if i != len(td.tablePlan.selectPks)-1 {
   129  			buf.Myprintf(" AND ")
   130  		}
   131  	}
   132  	buf.Myprintf(";")
   133  	return buf.String()
   134  }
   135  
   136  // formatSampleRow returns a formatted string representing a sample
   137  // extra/mismatched row
   138  func formatSampleRow(rd *RowDiff) {
   139  	keys := make([]string, 0, len(rd.Row))
   140  	rowString := strings.Builder{}
   141  	for k := range rd.Row {
   142  		keys = append(keys, k)
   143  	}
   144  
   145  	sort.Strings(keys)
   146  	for _, k := range keys {
   147  		// Let's truncate if it's really worth it to avoid losing value for a few chars
   148  		if len(rd.Row[k]) >= 30+len(truncatedNotation)+20 {
   149  			rd.Row[k] = rd.Row[k][:30] + truncatedNotation
   150  		}
   151  		rowString.WriteString(fmt.Sprintf("%s: %s\n", k, rd.Row[k]))
   152  	}
   153  }