code.gitea.io/gitea@v1.21.7/services/gitdiff/csv.go (about)

     1  // Copyright 2021 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package gitdiff
     5  
     6  import (
     7  	"encoding/csv"
     8  	"errors"
     9  	"io"
    10  )
    11  
    12  const (
    13  	unmappedColumn           = -1
    14  	maxRowsToInspect int     = 10
    15  	minRatioToMatch  float32 = 0.8
    16  )
    17  
    18  // TableDiffCellType represents the type of a TableDiffCell.
    19  type TableDiffCellType uint8
    20  
    21  // TableDiffCellType possible values.
    22  const (
    23  	TableDiffCellUnchanged TableDiffCellType = iota + 1
    24  	TableDiffCellChanged
    25  	TableDiffCellAdd
    26  	TableDiffCellDel
    27  	TableDiffCellMovedUnchanged
    28  	TableDiffCellMovedChanged
    29  )
    30  
    31  // TableDiffCell represents a cell of a TableDiffRow
    32  type TableDiffCell struct {
    33  	LeftCell  string
    34  	RightCell string
    35  	Type      TableDiffCellType
    36  }
    37  
    38  // TableDiffRow represents a row of a TableDiffSection.
    39  type TableDiffRow struct {
    40  	RowIdx int
    41  	Cells  []*TableDiffCell
    42  }
    43  
    44  // TableDiffSection represents a section of a DiffFile.
    45  type TableDiffSection struct {
    46  	Rows []*TableDiffRow
    47  }
    48  
    49  // csvReader wraps a csv.Reader which buffers the first rows.
    50  type csvReader struct {
    51  	reader *csv.Reader
    52  	buffer [][]string
    53  	line   int
    54  	eof    bool
    55  }
    56  
    57  // ErrorUndefinedCell is for when a row, column coordinates do not exist in the CSV
    58  var ErrorUndefinedCell = errors.New("undefined cell")
    59  
    60  // createCsvReader creates a csvReader and fills the buffer
    61  func createCsvReader(reader *csv.Reader, bufferRowCount int) (*csvReader, error) {
    62  	csv := &csvReader{reader: reader}
    63  	csv.buffer = make([][]string, bufferRowCount)
    64  	for i := 0; i < bufferRowCount && !csv.eof; i++ {
    65  		row, err := csv.readNextRow()
    66  		if err != nil {
    67  			return nil, err
    68  		}
    69  		csv.buffer[i] = row
    70  	}
    71  	csv.line = bufferRowCount
    72  	return csv, nil
    73  }
    74  
    75  // GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned.
    76  func (csv *csvReader) GetRow(row int) ([]string, error) {
    77  	if row < len(csv.buffer) && row >= 0 {
    78  		return csv.buffer[row], nil
    79  	}
    80  	if csv.eof {
    81  		return nil, nil
    82  	}
    83  	for {
    84  		fields, err := csv.readNextRow()
    85  		if err != nil {
    86  			return nil, err
    87  		}
    88  		if csv.eof {
    89  			return nil, nil
    90  		}
    91  		csv.line++
    92  		if csv.line-1 == row {
    93  			return fields, nil
    94  		}
    95  	}
    96  }
    97  
    98  func (csv *csvReader) readNextRow() ([]string, error) {
    99  	if csv.eof {
   100  		return nil, nil
   101  	}
   102  	row, err := csv.reader.Read()
   103  	if err != nil {
   104  		if err != io.EOF {
   105  			return nil, err
   106  		}
   107  		csv.eof = true
   108  	}
   109  	return row, nil
   110  }
   111  
   112  // CreateCsvDiff creates a tabular diff based on two CSV readers.
   113  func CreateCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) {
   114  	if baseReader != nil && headReader != nil {
   115  		return createCsvDiff(diffFile, baseReader, headReader)
   116  	}
   117  
   118  	if baseReader != nil {
   119  		return createCsvDiffSingle(baseReader, TableDiffCellDel)
   120  	}
   121  	return createCsvDiffSingle(headReader, TableDiffCellAdd)
   122  }
   123  
   124  // createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted.
   125  func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*TableDiffSection, error) {
   126  	var rows []*TableDiffRow
   127  	i := 1
   128  	for {
   129  		row, err := reader.Read()
   130  		if err != nil {
   131  			if err == io.EOF {
   132  				break
   133  			}
   134  			return nil, err
   135  		}
   136  		cells := make([]*TableDiffCell, len(row))
   137  		for j := 0; j < len(row); j++ {
   138  			if celltype == TableDiffCellDel {
   139  				cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype}
   140  			} else {
   141  				cells[j] = &TableDiffCell{RightCell: row[j], Type: celltype}
   142  			}
   143  		}
   144  		rows = append(rows, &TableDiffRow{RowIdx: i, Cells: cells})
   145  		i++
   146  	}
   147  
   148  	return []*TableDiffSection{{Rows: rows}}, nil
   149  }
   150  
   151  func createCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) {
   152  	// Given the baseReader and headReader, we are going to create CSV Reader for each, baseCSVReader and b respectively
   153  	baseCSVReader, err := createCsvReader(baseReader, maxRowsToInspect)
   154  	if err != nil {
   155  		return nil, err
   156  	}
   157  	headCSVReader, err := createCsvReader(headReader, maxRowsToInspect)
   158  	if err != nil {
   159  		return nil, err
   160  	}
   161  
   162  	// Initializing the mappings of base to head (a2bColMap) and head to base (b2aColMap) columns
   163  	a2bColMap, b2aColMap := getColumnMapping(baseCSVReader, headCSVReader)
   164  
   165  	// Determines how many cols there will be in the diff table, which includes deleted columns from base and added columns to base
   166  	numDiffTableCols := len(a2bColMap) + countUnmappedColumns(b2aColMap)
   167  	if len(a2bColMap) < len(b2aColMap) {
   168  		numDiffTableCols = len(b2aColMap) + countUnmappedColumns(a2bColMap)
   169  	}
   170  
   171  	// createDiffTableRow takes the row # of the `a` line and `b` line of a diff (starting from 1), 0 if the line doesn't exist (undefined)
   172  	// in the base or head respectively.
   173  	// Returns a TableDiffRow which has the row index
   174  	createDiffTableRow := func(aLineNum, bLineNum int) (*TableDiffRow, error) {
   175  		// diffTableCells is a row of the diff table. It will have a cells for added, deleted, changed, and unchanged content, thus either
   176  		// the same size as the head table or bigger
   177  		diffTableCells := make([]*TableDiffCell, numDiffTableCols)
   178  		var bRow *[]string
   179  		if bLineNum > 0 {
   180  			row, err := headCSVReader.GetRow(bLineNum - 1)
   181  			if err != nil {
   182  				return nil, err
   183  			}
   184  			bRow = &row
   185  		}
   186  		var aRow *[]string
   187  		if aLineNum > 0 {
   188  			row, err := baseCSVReader.GetRow(aLineNum - 1)
   189  			if err != nil {
   190  				return nil, err
   191  			}
   192  			aRow = &row
   193  		}
   194  		if aRow == nil && bRow == nil {
   195  			// No content
   196  			return nil, nil
   197  		}
   198  
   199  		aIndex := 0      // tracks where we are in the a2bColMap
   200  		bIndex := 0      // tracks where we are in the b2aColMap
   201  		colsAdded := 0   // incremented whenever we found a column was added
   202  		colsDeleted := 0 // incrememted whenever a column was deleted
   203  
   204  		// We loop until both the aIndex and bIndex are greater than their col map, which then we are done
   205  		for aIndex < len(a2bColMap) || bIndex < len(b2aColMap) {
   206  			// Starting from where aIndex is currently pointing, we see if the map is -1 (dleeted) and if is, create column to note that, increment, and look at the next aIndex
   207  			for aIndex < len(a2bColMap) && a2bColMap[aIndex] == -1 && (bIndex >= len(b2aColMap) || aIndex <= bIndex) {
   208  				var aCell string
   209  				if aRow != nil {
   210  					if cell, err := getCell(*aRow, aIndex); err != nil {
   211  						if err != ErrorUndefinedCell {
   212  							return nil, err
   213  						}
   214  					} else {
   215  						aCell = cell
   216  					}
   217  				}
   218  				diffTableCells[bIndex+colsDeleted] = &TableDiffCell{LeftCell: aCell, Type: TableDiffCellDel}
   219  				aIndex++
   220  				colsDeleted++
   221  			}
   222  
   223  			// aIndex is now pointing to a column that also exists in b, or is at the end of a2bColMap. If the former,
   224  			// we can just increment aIndex until it points to a -1 column or one greater than the current bIndex
   225  			for aIndex < len(a2bColMap) && a2bColMap[aIndex] != -1 {
   226  				aIndex++
   227  			}
   228  
   229  			// Starting from where bIndex is currently pointing, we see if the map is -1 (added) and if is, create column to note that, increment, and look at the next aIndex
   230  			for bIndex < len(b2aColMap) && b2aColMap[bIndex] == -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) {
   231  				var bCell string
   232  				cellType := TableDiffCellAdd
   233  				if bRow != nil {
   234  					if cell, err := getCell(*bRow, bIndex); err != nil {
   235  						if err != ErrorUndefinedCell {
   236  							return nil, err
   237  						}
   238  					} else {
   239  						bCell = cell
   240  					}
   241  				} else {
   242  					cellType = TableDiffCellDel
   243  				}
   244  				diffTableCells[bIndex+colsDeleted] = &TableDiffCell{RightCell: bCell, Type: cellType}
   245  				bIndex++
   246  				colsAdded++
   247  			}
   248  
   249  			// aIndex is now pointing to a column that also exists in a, or is at the end of b2aColMap. If the former,
   250  			// we get the a col and b col values (if they exist), figure out if they are the same or not, and if the column moved, and add it to the diff table
   251  			for bIndex < len(b2aColMap) && b2aColMap[bIndex] != -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) {
   252  				var diffTableCell TableDiffCell
   253  
   254  				var aCell *string
   255  				// get the aCell value if the aRow exists
   256  				if aRow != nil {
   257  					if cell, err := getCell(*aRow, b2aColMap[bIndex]); err != nil {
   258  						if err != ErrorUndefinedCell {
   259  							return nil, err
   260  						}
   261  					} else {
   262  						aCell = &cell
   263  						diffTableCell.LeftCell = cell
   264  					}
   265  				} else {
   266  					diffTableCell.Type = TableDiffCellAdd
   267  				}
   268  
   269  				var bCell *string
   270  				// get the bCell value if the bRow exists
   271  				if bRow != nil {
   272  					if cell, err := getCell(*bRow, bIndex); err != nil {
   273  						if err != ErrorUndefinedCell {
   274  							return nil, err
   275  						}
   276  					} else {
   277  						bCell = &cell
   278  						diffTableCell.RightCell = cell
   279  					}
   280  				} else {
   281  					diffTableCell.Type = TableDiffCellDel
   282  				}
   283  
   284  				// if both a and b have a row that exists, compare the value and determine if the row has moved
   285  				if aCell != nil && bCell != nil {
   286  					moved := ((bIndex + colsDeleted) != (b2aColMap[bIndex] + colsAdded))
   287  					if *aCell != *bCell {
   288  						if moved {
   289  							diffTableCell.Type = TableDiffCellMovedChanged
   290  						} else {
   291  							diffTableCell.Type = TableDiffCellChanged
   292  						}
   293  					} else {
   294  						if moved {
   295  							diffTableCell.Type = TableDiffCellMovedUnchanged
   296  						} else {
   297  							diffTableCell.Type = TableDiffCellUnchanged
   298  						}
   299  						diffTableCell.LeftCell = ""
   300  					}
   301  				}
   302  
   303  				// Add the diff column to the diff row
   304  				diffTableCells[bIndex+colsDeleted] = &diffTableCell
   305  				bIndex++
   306  			}
   307  		}
   308  
   309  		return &TableDiffRow{RowIdx: bLineNum, Cells: diffTableCells}, nil
   310  	}
   311  
   312  	// diffTableSections are TableDiffSections which represent the diffTableSections we get when doing a diff, each will be its own table in the view
   313  	var diffTableSections []*TableDiffSection
   314  
   315  	for i, section := range diffFile.Sections {
   316  		// Each section has multiple diffTableRows
   317  		var diffTableRows []*TableDiffRow
   318  		lines := tryMergeLines(section.Lines)
   319  		// Loop through the merged lines to get each row of the CSV diff table for this section
   320  		for j, line := range lines {
   321  			if i == 0 && j == 0 && (line[0] != 1 || line[1] != 1) {
   322  				diffTableRow, err := createDiffTableRow(1, 1)
   323  				if err != nil {
   324  					return nil, err
   325  				}
   326  				if diffTableRow != nil {
   327  					diffTableRows = append(diffTableRows, diffTableRow)
   328  				}
   329  			}
   330  			diffTableRow, err := createDiffTableRow(line[0], line[1])
   331  			if err != nil {
   332  				return nil, err
   333  			}
   334  			if diffTableRow != nil {
   335  				diffTableRows = append(diffTableRows, diffTableRow)
   336  			}
   337  		}
   338  
   339  		if len(diffTableRows) > 0 {
   340  			diffTableSections = append(diffTableSections, &TableDiffSection{Rows: diffTableRows})
   341  		}
   342  	}
   343  
   344  	return diffTableSections, nil
   345  }
   346  
   347  // getColumnMapping creates a mapping of columns between a and b
   348  func getColumnMapping(baseCSVReader, headCSVReader *csvReader) ([]int, []int) {
   349  	baseRow, _ := baseCSVReader.GetRow(0)
   350  	headRow, _ := headCSVReader.GetRow(0)
   351  
   352  	base2HeadColMap := []int{}
   353  	head2BaseColMap := []int{}
   354  
   355  	if baseRow != nil {
   356  		base2HeadColMap = make([]int, len(baseRow))
   357  	}
   358  	if headRow != nil {
   359  		head2BaseColMap = make([]int, len(headRow))
   360  	}
   361  
   362  	// Initializes all head2base mappings to be unmappedColumn (-1)
   363  	for i := 0; i < len(head2BaseColMap); i++ {
   364  		head2BaseColMap[i] = unmappedColumn
   365  	}
   366  
   367  	// Loops through the baseRow and see if there is a match in the head row
   368  	for i := 0; i < len(baseRow); i++ {
   369  		base2HeadColMap[i] = unmappedColumn
   370  		baseCell, err := getCell(baseRow, i)
   371  		if err == nil {
   372  			for j := 0; j < len(headRow); j++ {
   373  				if head2BaseColMap[j] == -1 {
   374  					headCell, err := getCell(headRow, j)
   375  					if err == nil && baseCell == headCell {
   376  						base2HeadColMap[i] = j
   377  						head2BaseColMap[j] = i
   378  						break
   379  					}
   380  				}
   381  			}
   382  		}
   383  	}
   384  
   385  	tryMapColumnsByContent(baseCSVReader, base2HeadColMap, headCSVReader, head2BaseColMap)
   386  	tryMapColumnsByContent(headCSVReader, head2BaseColMap, baseCSVReader, base2HeadColMap)
   387  
   388  	return base2HeadColMap, head2BaseColMap
   389  }
   390  
   391  // tryMapColumnsByContent tries to map missing columns by the content of the first lines.
   392  func tryMapColumnsByContent(baseCSVReader *csvReader, base2HeadColMap []int, headCSVReader *csvReader, head2BaseColMap []int) {
   393  	for i := 0; i < len(base2HeadColMap); i++ {
   394  		headStart := 0
   395  		for base2HeadColMap[i] == unmappedColumn && headStart < len(head2BaseColMap) {
   396  			if head2BaseColMap[headStart] == unmappedColumn {
   397  				rows := min(maxRowsToInspect, max(0, min(len(baseCSVReader.buffer), len(headCSVReader.buffer))-1))
   398  				same := 0
   399  				for j := 1; j <= rows; j++ {
   400  					baseCell, baseErr := getCell(baseCSVReader.buffer[j], i)
   401  					headCell, headErr := getCell(headCSVReader.buffer[j], headStart)
   402  					if baseErr == nil && headErr == nil && baseCell == headCell {
   403  						same++
   404  					}
   405  				}
   406  				if (float32(same) / float32(rows)) > minRatioToMatch {
   407  					base2HeadColMap[i] = headStart
   408  					head2BaseColMap[headStart] = i
   409  				}
   410  			}
   411  			headStart++
   412  		}
   413  	}
   414  }
   415  
   416  // getCell returns the specific cell or nil if not present.
   417  func getCell(row []string, column int) (string, error) {
   418  	if column < len(row) {
   419  		return row[column], nil
   420  	}
   421  	return "", ErrorUndefinedCell
   422  }
   423  
   424  // countUnmappedColumns returns the count of unmapped columns.
   425  func countUnmappedColumns(mapping []int) int {
   426  	count := 0
   427  	for i := 0; i < len(mapping); i++ {
   428  		if mapping[i] == unmappedColumn {
   429  			count++
   430  		}
   431  	}
   432  	return count
   433  }
   434  
   435  // tryMergeLines maps the separated line numbers of a git diff. The result is assumed to be ordered.
   436  func tryMergeLines(lines []*DiffLine) [][2]int {
   437  	ids := make([][2]int, len(lines))
   438  
   439  	i := 0
   440  	for _, line := range lines {
   441  		if line.Type != DiffLineSection {
   442  			ids[i][0] = line.LeftIdx
   443  			ids[i][1] = line.RightIdx
   444  			i++
   445  		}
   446  	}
   447  
   448  	ids = ids[:i]
   449  
   450  	result := make([][2]int, len(ids))
   451  
   452  	j := 0
   453  	for i = 0; i < len(ids); i++ {
   454  		if ids[i][0] == 0 {
   455  			if j > 0 && result[j-1][1] == 0 {
   456  				temp := j
   457  				for temp > 0 && result[temp-1][1] == 0 {
   458  					temp--
   459  				}
   460  				result[temp][1] = ids[i][1]
   461  				continue
   462  			}
   463  		}
   464  		result[j] = ids[i]
   465  		j++
   466  	}
   467  
   468  	return result[:j]
   469  }