code.gitea.io/gitea@v1.21.7/services/gitdiff/csv.go (about) 1 // Copyright 2021 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package gitdiff 5 6 import ( 7 "encoding/csv" 8 "errors" 9 "io" 10 ) 11 12 const ( 13 unmappedColumn = -1 14 maxRowsToInspect int = 10 15 minRatioToMatch float32 = 0.8 16 ) 17 18 // TableDiffCellType represents the type of a TableDiffCell. 19 type TableDiffCellType uint8 20 21 // TableDiffCellType possible values. 22 const ( 23 TableDiffCellUnchanged TableDiffCellType = iota + 1 24 TableDiffCellChanged 25 TableDiffCellAdd 26 TableDiffCellDel 27 TableDiffCellMovedUnchanged 28 TableDiffCellMovedChanged 29 ) 30 31 // TableDiffCell represents a cell of a TableDiffRow 32 type TableDiffCell struct { 33 LeftCell string 34 RightCell string 35 Type TableDiffCellType 36 } 37 38 // TableDiffRow represents a row of a TableDiffSection. 39 type TableDiffRow struct { 40 RowIdx int 41 Cells []*TableDiffCell 42 } 43 44 // TableDiffSection represents a section of a DiffFile. 45 type TableDiffSection struct { 46 Rows []*TableDiffRow 47 } 48 49 // csvReader wraps a csv.Reader which buffers the first rows. 50 type csvReader struct { 51 reader *csv.Reader 52 buffer [][]string 53 line int 54 eof bool 55 } 56 57 // ErrorUndefinedCell is for when a row, column coordinates do not exist in the CSV 58 var ErrorUndefinedCell = errors.New("undefined cell") 59 60 // createCsvReader creates a csvReader and fills the buffer 61 func createCsvReader(reader *csv.Reader, bufferRowCount int) (*csvReader, error) { 62 csv := &csvReader{reader: reader} 63 csv.buffer = make([][]string, bufferRowCount) 64 for i := 0; i < bufferRowCount && !csv.eof; i++ { 65 row, err := csv.readNextRow() 66 if err != nil { 67 return nil, err 68 } 69 csv.buffer[i] = row 70 } 71 csv.line = bufferRowCount 72 return csv, nil 73 } 74 75 // GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned. 76 func (csv *csvReader) GetRow(row int) ([]string, error) { 77 if row < len(csv.buffer) && row >= 0 { 78 return csv.buffer[row], nil 79 } 80 if csv.eof { 81 return nil, nil 82 } 83 for { 84 fields, err := csv.readNextRow() 85 if err != nil { 86 return nil, err 87 } 88 if csv.eof { 89 return nil, nil 90 } 91 csv.line++ 92 if csv.line-1 == row { 93 return fields, nil 94 } 95 } 96 } 97 98 func (csv *csvReader) readNextRow() ([]string, error) { 99 if csv.eof { 100 return nil, nil 101 } 102 row, err := csv.reader.Read() 103 if err != nil { 104 if err != io.EOF { 105 return nil, err 106 } 107 csv.eof = true 108 } 109 return row, nil 110 } 111 112 // CreateCsvDiff creates a tabular diff based on two CSV readers. 113 func CreateCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) { 114 if baseReader != nil && headReader != nil { 115 return createCsvDiff(diffFile, baseReader, headReader) 116 } 117 118 if baseReader != nil { 119 return createCsvDiffSingle(baseReader, TableDiffCellDel) 120 } 121 return createCsvDiffSingle(headReader, TableDiffCellAdd) 122 } 123 124 // createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted. 125 func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*TableDiffSection, error) { 126 var rows []*TableDiffRow 127 i := 1 128 for { 129 row, err := reader.Read() 130 if err != nil { 131 if err == io.EOF { 132 break 133 } 134 return nil, err 135 } 136 cells := make([]*TableDiffCell, len(row)) 137 for j := 0; j < len(row); j++ { 138 if celltype == TableDiffCellDel { 139 cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype} 140 } else { 141 cells[j] = &TableDiffCell{RightCell: row[j], Type: celltype} 142 } 143 } 144 rows = append(rows, &TableDiffRow{RowIdx: i, Cells: cells}) 145 i++ 146 } 147 148 return []*TableDiffSection{{Rows: rows}}, nil 149 } 150 151 func createCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) { 152 // Given the baseReader and headReader, we are going to create CSV Reader for each, baseCSVReader and b respectively 153 baseCSVReader, err := createCsvReader(baseReader, maxRowsToInspect) 154 if err != nil { 155 return nil, err 156 } 157 headCSVReader, err := createCsvReader(headReader, maxRowsToInspect) 158 if err != nil { 159 return nil, err 160 } 161 162 // Initializing the mappings of base to head (a2bColMap) and head to base (b2aColMap) columns 163 a2bColMap, b2aColMap := getColumnMapping(baseCSVReader, headCSVReader) 164 165 // Determines how many cols there will be in the diff table, which includes deleted columns from base and added columns to base 166 numDiffTableCols := len(a2bColMap) + countUnmappedColumns(b2aColMap) 167 if len(a2bColMap) < len(b2aColMap) { 168 numDiffTableCols = len(b2aColMap) + countUnmappedColumns(a2bColMap) 169 } 170 171 // createDiffTableRow takes the row # of the `a` line and `b` line of a diff (starting from 1), 0 if the line doesn't exist (undefined) 172 // in the base or head respectively. 173 // Returns a TableDiffRow which has the row index 174 createDiffTableRow := func(aLineNum, bLineNum int) (*TableDiffRow, error) { 175 // diffTableCells is a row of the diff table. It will have a cells for added, deleted, changed, and unchanged content, thus either 176 // the same size as the head table or bigger 177 diffTableCells := make([]*TableDiffCell, numDiffTableCols) 178 var bRow *[]string 179 if bLineNum > 0 { 180 row, err := headCSVReader.GetRow(bLineNum - 1) 181 if err != nil { 182 return nil, err 183 } 184 bRow = &row 185 } 186 var aRow *[]string 187 if aLineNum > 0 { 188 row, err := baseCSVReader.GetRow(aLineNum - 1) 189 if err != nil { 190 return nil, err 191 } 192 aRow = &row 193 } 194 if aRow == nil && bRow == nil { 195 // No content 196 return nil, nil 197 } 198 199 aIndex := 0 // tracks where we are in the a2bColMap 200 bIndex := 0 // tracks where we are in the b2aColMap 201 colsAdded := 0 // incremented whenever we found a column was added 202 colsDeleted := 0 // incrememted whenever a column was deleted 203 204 // We loop until both the aIndex and bIndex are greater than their col map, which then we are done 205 for aIndex < len(a2bColMap) || bIndex < len(b2aColMap) { 206 // Starting from where aIndex is currently pointing, we see if the map is -1 (dleeted) and if is, create column to note that, increment, and look at the next aIndex 207 for aIndex < len(a2bColMap) && a2bColMap[aIndex] == -1 && (bIndex >= len(b2aColMap) || aIndex <= bIndex) { 208 var aCell string 209 if aRow != nil { 210 if cell, err := getCell(*aRow, aIndex); err != nil { 211 if err != ErrorUndefinedCell { 212 return nil, err 213 } 214 } else { 215 aCell = cell 216 } 217 } 218 diffTableCells[bIndex+colsDeleted] = &TableDiffCell{LeftCell: aCell, Type: TableDiffCellDel} 219 aIndex++ 220 colsDeleted++ 221 } 222 223 // aIndex is now pointing to a column that also exists in b, or is at the end of a2bColMap. If the former, 224 // we can just increment aIndex until it points to a -1 column or one greater than the current bIndex 225 for aIndex < len(a2bColMap) && a2bColMap[aIndex] != -1 { 226 aIndex++ 227 } 228 229 // Starting from where bIndex is currently pointing, we see if the map is -1 (added) and if is, create column to note that, increment, and look at the next aIndex 230 for bIndex < len(b2aColMap) && b2aColMap[bIndex] == -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) { 231 var bCell string 232 cellType := TableDiffCellAdd 233 if bRow != nil { 234 if cell, err := getCell(*bRow, bIndex); err != nil { 235 if err != ErrorUndefinedCell { 236 return nil, err 237 } 238 } else { 239 bCell = cell 240 } 241 } else { 242 cellType = TableDiffCellDel 243 } 244 diffTableCells[bIndex+colsDeleted] = &TableDiffCell{RightCell: bCell, Type: cellType} 245 bIndex++ 246 colsAdded++ 247 } 248 249 // aIndex is now pointing to a column that also exists in a, or is at the end of b2aColMap. If the former, 250 // we get the a col and b col values (if they exist), figure out if they are the same or not, and if the column moved, and add it to the diff table 251 for bIndex < len(b2aColMap) && b2aColMap[bIndex] != -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) { 252 var diffTableCell TableDiffCell 253 254 var aCell *string 255 // get the aCell value if the aRow exists 256 if aRow != nil { 257 if cell, err := getCell(*aRow, b2aColMap[bIndex]); err != nil { 258 if err != ErrorUndefinedCell { 259 return nil, err 260 } 261 } else { 262 aCell = &cell 263 diffTableCell.LeftCell = cell 264 } 265 } else { 266 diffTableCell.Type = TableDiffCellAdd 267 } 268 269 var bCell *string 270 // get the bCell value if the bRow exists 271 if bRow != nil { 272 if cell, err := getCell(*bRow, bIndex); err != nil { 273 if err != ErrorUndefinedCell { 274 return nil, err 275 } 276 } else { 277 bCell = &cell 278 diffTableCell.RightCell = cell 279 } 280 } else { 281 diffTableCell.Type = TableDiffCellDel 282 } 283 284 // if both a and b have a row that exists, compare the value and determine if the row has moved 285 if aCell != nil && bCell != nil { 286 moved := ((bIndex + colsDeleted) != (b2aColMap[bIndex] + colsAdded)) 287 if *aCell != *bCell { 288 if moved { 289 diffTableCell.Type = TableDiffCellMovedChanged 290 } else { 291 diffTableCell.Type = TableDiffCellChanged 292 } 293 } else { 294 if moved { 295 diffTableCell.Type = TableDiffCellMovedUnchanged 296 } else { 297 diffTableCell.Type = TableDiffCellUnchanged 298 } 299 diffTableCell.LeftCell = "" 300 } 301 } 302 303 // Add the diff column to the diff row 304 diffTableCells[bIndex+colsDeleted] = &diffTableCell 305 bIndex++ 306 } 307 } 308 309 return &TableDiffRow{RowIdx: bLineNum, Cells: diffTableCells}, nil 310 } 311 312 // diffTableSections are TableDiffSections which represent the diffTableSections we get when doing a diff, each will be its own table in the view 313 var diffTableSections []*TableDiffSection 314 315 for i, section := range diffFile.Sections { 316 // Each section has multiple diffTableRows 317 var diffTableRows []*TableDiffRow 318 lines := tryMergeLines(section.Lines) 319 // Loop through the merged lines to get each row of the CSV diff table for this section 320 for j, line := range lines { 321 if i == 0 && j == 0 && (line[0] != 1 || line[1] != 1) { 322 diffTableRow, err := createDiffTableRow(1, 1) 323 if err != nil { 324 return nil, err 325 } 326 if diffTableRow != nil { 327 diffTableRows = append(diffTableRows, diffTableRow) 328 } 329 } 330 diffTableRow, err := createDiffTableRow(line[0], line[1]) 331 if err != nil { 332 return nil, err 333 } 334 if diffTableRow != nil { 335 diffTableRows = append(diffTableRows, diffTableRow) 336 } 337 } 338 339 if len(diffTableRows) > 0 { 340 diffTableSections = append(diffTableSections, &TableDiffSection{Rows: diffTableRows}) 341 } 342 } 343 344 return diffTableSections, nil 345 } 346 347 // getColumnMapping creates a mapping of columns between a and b 348 func getColumnMapping(baseCSVReader, headCSVReader *csvReader) ([]int, []int) { 349 baseRow, _ := baseCSVReader.GetRow(0) 350 headRow, _ := headCSVReader.GetRow(0) 351 352 base2HeadColMap := []int{} 353 head2BaseColMap := []int{} 354 355 if baseRow != nil { 356 base2HeadColMap = make([]int, len(baseRow)) 357 } 358 if headRow != nil { 359 head2BaseColMap = make([]int, len(headRow)) 360 } 361 362 // Initializes all head2base mappings to be unmappedColumn (-1) 363 for i := 0; i < len(head2BaseColMap); i++ { 364 head2BaseColMap[i] = unmappedColumn 365 } 366 367 // Loops through the baseRow and see if there is a match in the head row 368 for i := 0; i < len(baseRow); i++ { 369 base2HeadColMap[i] = unmappedColumn 370 baseCell, err := getCell(baseRow, i) 371 if err == nil { 372 for j := 0; j < len(headRow); j++ { 373 if head2BaseColMap[j] == -1 { 374 headCell, err := getCell(headRow, j) 375 if err == nil && baseCell == headCell { 376 base2HeadColMap[i] = j 377 head2BaseColMap[j] = i 378 break 379 } 380 } 381 } 382 } 383 } 384 385 tryMapColumnsByContent(baseCSVReader, base2HeadColMap, headCSVReader, head2BaseColMap) 386 tryMapColumnsByContent(headCSVReader, head2BaseColMap, baseCSVReader, base2HeadColMap) 387 388 return base2HeadColMap, head2BaseColMap 389 } 390 391 // tryMapColumnsByContent tries to map missing columns by the content of the first lines. 392 func tryMapColumnsByContent(baseCSVReader *csvReader, base2HeadColMap []int, headCSVReader *csvReader, head2BaseColMap []int) { 393 for i := 0; i < len(base2HeadColMap); i++ { 394 headStart := 0 395 for base2HeadColMap[i] == unmappedColumn && headStart < len(head2BaseColMap) { 396 if head2BaseColMap[headStart] == unmappedColumn { 397 rows := min(maxRowsToInspect, max(0, min(len(baseCSVReader.buffer), len(headCSVReader.buffer))-1)) 398 same := 0 399 for j := 1; j <= rows; j++ { 400 baseCell, baseErr := getCell(baseCSVReader.buffer[j], i) 401 headCell, headErr := getCell(headCSVReader.buffer[j], headStart) 402 if baseErr == nil && headErr == nil && baseCell == headCell { 403 same++ 404 } 405 } 406 if (float32(same) / float32(rows)) > minRatioToMatch { 407 base2HeadColMap[i] = headStart 408 head2BaseColMap[headStart] = i 409 } 410 } 411 headStart++ 412 } 413 } 414 } 415 416 // getCell returns the specific cell or nil if not present. 417 func getCell(row []string, column int) (string, error) { 418 if column < len(row) { 419 return row[column], nil 420 } 421 return "", ErrorUndefinedCell 422 } 423 424 // countUnmappedColumns returns the count of unmapped columns. 425 func countUnmappedColumns(mapping []int) int { 426 count := 0 427 for i := 0; i < len(mapping); i++ { 428 if mapping[i] == unmappedColumn { 429 count++ 430 } 431 } 432 return count 433 } 434 435 // tryMergeLines maps the separated line numbers of a git diff. The result is assumed to be ordered. 436 func tryMergeLines(lines []*DiffLine) [][2]int { 437 ids := make([][2]int, len(lines)) 438 439 i := 0 440 for _, line := range lines { 441 if line.Type != DiffLineSection { 442 ids[i][0] = line.LeftIdx 443 ids[i][1] = line.RightIdx 444 i++ 445 } 446 } 447 448 ids = ids[:i] 449 450 result := make([][2]int, len(ids)) 451 452 j := 0 453 for i = 0; i < len(ids); i++ { 454 if ids[i][0] == 0 { 455 if j > 0 && result[j-1][1] == 0 { 456 temp := j 457 for temp > 0 && result[temp-1][1] == 0 { 458 temp-- 459 } 460 result[temp][1] = ids[i][1] 461 continue 462 } 463 } 464 result[j] = ids[i] 465 j++ 466 } 467 468 return result[:j] 469 }