github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/diff/diffsplitter.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package diff
    16  
    17  import (
    18  	"errors"
    19  	"strings"
    20  
    21  	"github.com/dolthub/go-mysql-server/sql"
    22  )
    23  
    24  const (
    25  	fromPrefix = "from_"
    26  	toPrefix   = "to_"
    27  
    28  	addedStr    = "added"
    29  	modifiedStr = "modified"
    30  	removedStr  = "removed"
    31  )
    32  
    33  type DiffSplitter struct {
    34  	// results schema of dolt_diff(...)
    35  	// sql table function
    36  	querySch sql.Schema
    37  	// output schema for CLI diff
    38  	targetSch sql.Schema
    39  	// maps querySch fields to targetSch
    40  	queryToTarget []int
    41  	// divides "from_..." and "to_..." cols
    42  	splitIdx int
    43  }
    44  
    45  type RowDiff struct {
    46  	Row      sql.Row
    47  	RowDiff  ChangeType
    48  	ColDiffs []ChangeType
    49  }
    50  
    51  // NewDiffSplitter returns a splitter that knows how to split unified diff query rows.
    52  // |querySch| is the result schema from the dolt_dif(...) table function
    53  // it contains "from_..." and "to..." columns corresponding to the "from"
    54  // and "to" schemas used to generate the diff.
    55  // |targetSch| is the output schema used to print the diff and is computed
    56  // as the union schema of the "from" and "to" schemas.
    57  
    58  func NewDiffSplitter(querySch sql.Schema, targetSch sql.Schema) (*DiffSplitter, error) {
    59  	split, err := findDiffSchemaSplit(querySch)
    60  	if err != nil {
    61  		return nil, err
    62  	}
    63  
    64  	qtt, err := mapQuerySchemaToTargetSchema(querySch, targetSch)
    65  	if err != nil {
    66  		return nil, err
    67  	}
    68  
    69  	return &DiffSplitter{
    70  		querySch:      querySch,
    71  		targetSch:     targetSch,
    72  		queryToTarget: qtt,
    73  		splitIdx:      split,
    74  	}, nil
    75  }
    76  
    77  func findDiffSchemaSplit(querySch sql.Schema) (int, error) {
    78  	split := -1
    79  	for i, col := range querySch {
    80  		if strings.HasPrefix(col.Name, fromPrefix) {
    81  			if split >= 0 { // seen first "to_..." col
    82  				return 0, errors.New("interleaved 'from' and 'to' cols")
    83  			}
    84  		} else if strings.HasPrefix(col.Name, toPrefix) {
    85  			if split < 0 { // |i| is first "to_..." col
    86  				split = i
    87  			}
    88  		} else if col.Name == "diff_type" {
    89  			if split < 0 {
    90  				split = i
    91  			}
    92  		} else {
    93  			return 0, errors.New("expected column prefix of 'to_' or 'from_' (" + col.Name + ")")
    94  		}
    95  	}
    96  	return split, nil
    97  }
    98  
    99  func mapQuerySchemaToTargetSchema(query, target sql.Schema) (mapping []int, err error) {
   100  	last := query[len(query)-1]
   101  	if last.Name != "diff_type" {
   102  		return nil, errors.New("expected last diff column to be 'diff_type'")
   103  	}
   104  	query = query[:len(query)-1]
   105  
   106  	mapping = make([]int, len(query))
   107  	for i, col := range query {
   108  		if strings.HasPrefix(col.Name, fromPrefix) {
   109  			base := col.Name[len(fromPrefix):]
   110  			mapping[i] = target.IndexOfColName(base)
   111  		} else if strings.HasPrefix(col.Name, toPrefix) {
   112  			base := col.Name[len(toPrefix):]
   113  			mapping[i] = target.IndexOfColName(base)
   114  		} else {
   115  			return nil, errors.New("expected column prefix of 'to_' or 'from_' (" + col.Name + ")")
   116  		}
   117  	}
   118  	return
   119  }
   120  
   121  func mapToAndFromColumns(query sql.Schema) (mapping []int, err error) {
   122  	last := query[len(query)-1]
   123  	if last.Name != "diff_type" {
   124  		return nil, errors.New("expected last diff column to be 'diff_type'")
   125  	}
   126  	query = query[:len(query)-1]
   127  
   128  	mapping = make([]int, len(query))
   129  	for i, col := range query {
   130  		if strings.HasPrefix(col.Name, fromPrefix) {
   131  			// map "from_..." column to "to_..." column
   132  			base := col.Name[len(fromPrefix):]
   133  			mapping[i] = query.IndexOfColName(toPrefix + base)
   134  		} else if strings.HasPrefix(col.Name, toPrefix) {
   135  			// map "to_..." column to "from_..." column
   136  			base := col.Name[len(toPrefix):]
   137  			mapping[i] = query.IndexOfColName(fromPrefix + base)
   138  		} else {
   139  			return nil, errors.New("expected column prefix of 'to_' or 'from_' (" + col.Name + ")")
   140  		}
   141  	}
   142  	// |mapping| will contain -1 for unmapped columns
   143  	return
   144  }
   145  
   146  func (ds DiffSplitter) SplitDiffResultRow(row sql.Row) (from, to RowDiff, err error) {
   147  	from = RowDiff{ColDiffs: make([]ChangeType, len(ds.targetSch))}
   148  	to = RowDiff{ColDiffs: make([]ChangeType, len(ds.targetSch))}
   149  
   150  	diffType := row[len(row)-1]
   151  	row = row[:len(row)-1]
   152  
   153  	switch diffType.(string) {
   154  	case removedStr:
   155  		from.Row = make(sql.Row, len(ds.targetSch))
   156  		from.RowDiff = Removed
   157  		for i := 0; i < ds.splitIdx; i++ {
   158  			j := ds.queryToTarget[i]
   159  			// skip any columns that aren't mapped
   160  			if j < 0 {
   161  				continue
   162  			}
   163  			from.Row[j] = row[i]
   164  			from.ColDiffs[j] = Removed
   165  		}
   166  
   167  	case addedStr:
   168  		to.Row = make(sql.Row, len(ds.targetSch))
   169  		to.RowDiff = Added
   170  		for i := ds.splitIdx; i < len(row); i++ {
   171  			j := ds.queryToTarget[i]
   172  			// skip any columns that aren't mapped
   173  			if j < 0 {
   174  				continue
   175  			}
   176  			to.Row[j] = row[i]
   177  			to.ColDiffs[j] = Added
   178  		}
   179  
   180  	case modifiedStr:
   181  		from.Row = make(sql.Row, len(ds.targetSch))
   182  		from.RowDiff = ModifiedOld
   183  		for i := 0; i < ds.splitIdx; i++ {
   184  			j := ds.queryToTarget[i]
   185  			// skip any columns that aren't mapped
   186  			if j < 0 {
   187  				continue
   188  			}
   189  			from.Row[j] = row[i]
   190  		}
   191  		to.Row = make(sql.Row, len(ds.targetSch))
   192  		to.RowDiff = ModifiedNew
   193  		for i := ds.splitIdx; i < len(row); i++ {
   194  			j := ds.queryToTarget[i]
   195  			to.Row[j] = row[i]
   196  		}
   197  		// now do field-wise comparison
   198  		var cmp int
   199  		for i, col := range ds.targetSch {
   200  			cmp, err = col.Type.Compare(from.Row[i], to.Row[i])
   201  			if err != nil {
   202  				return RowDiff{}, RowDiff{}, err
   203  			} else if cmp != 0 {
   204  				from.ColDiffs[i] = ModifiedOld
   205  				to.ColDiffs[i] = ModifiedNew
   206  			} else {
   207  				from.ColDiffs[i] = None
   208  				to.ColDiffs[i] = None
   209  			}
   210  		}
   211  
   212  	default:
   213  		panic("unknown diff type " + diffType.(string))
   214  	}
   215  	return
   216  }