github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/local/diff.go (about)

     1  package local
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"net/http"
     7  	"net/url"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  
    12  	"github.com/go-openapi/swag"
    13  	"github.com/treeverse/lakefs/pkg/api/apigen"
    14  	"github.com/treeverse/lakefs/pkg/block"
    15  	"github.com/treeverse/lakefs/pkg/block/local"
    16  	"github.com/treeverse/lakefs/pkg/block/params"
    17  	"github.com/treeverse/lakefs/pkg/uri"
    18  )
    19  
    20  type ChangeSource int
    21  
    22  const (
    23  	ChangeSourceRemote ChangeSource = iota
    24  	ChangeSourceLocal
    25  )
    26  
    27  type ChangeType int
    28  
    29  const (
    30  	ChangeTypeAdded ChangeType = iota
    31  	ChangeTypeModified
    32  	ChangeTypeRemoved
    33  	ChangeTypeConflict
    34  )
    35  
    36  type Change struct {
    37  	Source ChangeSource
    38  	Path   string
    39  	Type   ChangeType
    40  }
    41  
    42  func (c *Change) String() string {
    43  	return fmt.Sprintf("%s\t%s\t%s", ChangeSourceString(c.Source), ChangeTypeString(c.Type), c.Path)
    44  }
    45  
    46  func ChangeTypeFromString(changeType string) ChangeType {
    47  	switch changeType {
    48  	case "added":
    49  		return ChangeTypeAdded
    50  	case "removed":
    51  		return ChangeTypeRemoved
    52  	case "modified", "changed":
    53  		return ChangeTypeModified
    54  	case "conflict":
    55  		return ChangeTypeConflict
    56  	default:
    57  		panic("invalid change type")
    58  	}
    59  }
    60  
    61  func ChangeTypeString(changeType ChangeType) string {
    62  	switch changeType {
    63  	case ChangeTypeAdded:
    64  		return "added"
    65  	case ChangeTypeRemoved:
    66  		return "removed"
    67  	case ChangeTypeModified:
    68  		return "modified"
    69  	case ChangeTypeConflict:
    70  		return "conflict"
    71  	default:
    72  		panic("invalid change type")
    73  	}
    74  }
    75  
    76  func ChangeSourceString(changeSource ChangeSource) string {
    77  	switch changeSource {
    78  	case ChangeSourceLocal:
    79  		return "local"
    80  	case ChangeSourceRemote:
    81  		return "remote"
    82  	default:
    83  		panic("invalid change source")
    84  	}
    85  }
    86  
    87  type Changes []*Change
    88  
    89  func (c Changes) String() string {
    90  	strs := make([]string, len(c))
    91  	for i, cc := range c {
    92  		strs[i] = cc.String()
    93  	}
    94  	return strings.Join(strs, "\n")
    95  }
    96  
    97  type MergeStrategy int
    98  
    99  const (
   100  	MergeStrategyNone MergeStrategy = iota
   101  	MergeStrategyThis
   102  	MergeStrategyOther
   103  )
   104  
   105  // MergeWith combines changes from two diffs, sorting by lexicographic order.
   106  // If the same path appears in both diffs, it's marked as a conflict.
   107  func (c Changes) MergeWith(other Changes, strategy MergeStrategy) Changes {
   108  	cIdx := 0
   109  	oIdx := 0
   110  	result := make(Changes, 0)
   111  	for cIdx < len(c) && oIdx < len(other) {
   112  		switch {
   113  		case c[cIdx].Path > other[oIdx].Path:
   114  			// other is first
   115  			result = append(result, other[oIdx])
   116  			oIdx++
   117  		case c[cIdx].Path < other[oIdx].Path:
   118  			result = append(result, c[cIdx])
   119  			cIdx++
   120  		default: // both modified the same path!!
   121  			switch strategy {
   122  			case MergeStrategyNone:
   123  				result = append(result, &Change{
   124  					Source: c[cIdx].Source,
   125  					Path:   c[cIdx].Path,
   126  					Type:   ChangeTypeConflict,
   127  				})
   128  			case MergeStrategyOther:
   129  				result = append(result, other[oIdx])
   130  			case MergeStrategyThis:
   131  				result = append(result, c[cIdx])
   132  			default:
   133  				panic("invalid merge strategy")
   134  			}
   135  			cIdx++
   136  			oIdx++
   137  		}
   138  	}
   139  	if cIdx < len(c) {
   140  		result = append(result, c[cIdx:]...)
   141  	}
   142  	if oIdx < len(other) {
   143  		result = append(result, other[oIdx:]...)
   144  	}
   145  	return result
   146  }
   147  
   148  func switchSource(source ChangeSource) ChangeSource {
   149  	switch source {
   150  	case ChangeSourceRemote:
   151  		return ChangeSourceLocal
   152  	case ChangeSourceLocal:
   153  		return ChangeSourceRemote
   154  	default:
   155  		panic("invalid change source")
   156  	}
   157  }
   158  
   159  // Undo Creates a new list of changes that reverses the given changes list.
   160  func Undo(c Changes) Changes {
   161  	reversed := make(Changes, len(c))
   162  	for i, op := range c {
   163  		switch op.Type {
   164  		case ChangeTypeAdded:
   165  			reversed[i] = &Change{
   166  				Source: switchSource(op.Source),
   167  				Path:   op.Path,
   168  				Type:   ChangeTypeRemoved,
   169  			}
   170  		case ChangeTypeModified:
   171  			reversed[i] = &Change{
   172  				Source: switchSource(op.Source),
   173  				Path:   op.Path,
   174  				Type:   ChangeTypeModified,
   175  			}
   176  		case ChangeTypeRemoved:
   177  			reversed[i] = &Change{
   178  				Source: switchSource(op.Source),
   179  				Path:   op.Path,
   180  				Type:   ChangeTypeModified, // mark as modified so it will trigger download
   181  			}
   182  		case ChangeTypeConflict:
   183  		default:
   184  			// Should never reach
   185  			panic(fmt.Sprintf("got unsupported change type %d in undo", op.Type))
   186  		}
   187  	}
   188  	return reversed
   189  }
   190  
   191  // DiffLocalWithHead Checks changes between a local directory and the head it is pointing to. The diff check assumes the remote
   192  // is an immutable set so any changes found resulted from changes in the local directory
   193  // left is an object channel which contains results from a remote source. rightPath is the local directory to diff with
   194  func DiffLocalWithHead(left <-chan apigen.ObjectStats, rightPath string) (Changes, error) {
   195  	// left should be the base commit
   196  	changes := make([]*Change, 0)
   197  	var (
   198  		currentRemoteFile apigen.ObjectStats
   199  		hasMore           bool
   200  	)
   201  	absPath, err := filepath.Abs(rightPath)
   202  	if err != nil {
   203  		return nil, err
   204  	}
   205  	uri := url.URL{Scheme: "local", Path: absPath}
   206  	reader := local.NewLocalWalker(params.Local{
   207  		ImportEnabled:           false,
   208  		ImportHidden:            true,
   209  		AllowedExternalPrefixes: []string{absPath},
   210  	})
   211  	err = reader.Walk(context.Background(), &uri, block.WalkOptions{}, func(e block.ObjectStoreEntry) error {
   212  		info, err := os.Stat(e.FullKey)
   213  		if err != nil {
   214  			return err
   215  		}
   216  		if info.IsDir() || diffShouldIgnore(info.Name()) {
   217  			return nil
   218  		}
   219  		localPath := e.RelativeKey
   220  		localPath = strings.TrimPrefix(localPath, string(filepath.Separator))
   221  		localPath = filepath.ToSlash(localPath) // normalize to use "/" always
   222  
   223  		localBytes := info.Size()
   224  		localMtime := info.ModTime().Unix()
   225  		for {
   226  			if currentRemoteFile.Path == "" {
   227  				if currentRemoteFile, hasMore = <-left; !hasMore {
   228  					// nothing left on the left side, we definitely added stuff!
   229  					changes = append(changes, &Change{ChangeSourceLocal, localPath, ChangeTypeAdded})
   230  					break
   231  				}
   232  			}
   233  			switch {
   234  			case currentRemoteFile.Path < localPath: // We removed a file locally
   235  				changes = append(changes, &Change{ChangeSourceLocal, currentRemoteFile.Path, ChangeTypeRemoved})
   236  				currentRemoteFile.Path = ""
   237  			case currentRemoteFile.Path == localPath:
   238  				remoteMtime, err := getMtimeFromStats(currentRemoteFile)
   239  				if err != nil {
   240  					return err
   241  				}
   242  				if localBytes != swag.Int64Value(currentRemoteFile.SizeBytes) || localMtime != remoteMtime {
   243  					// we made a change!
   244  					changes = append(changes, &Change{ChangeSourceLocal, localPath, ChangeTypeModified})
   245  				}
   246  				currentRemoteFile.Path = ""
   247  				return nil
   248  			default: // currentRemoteFile.Path > localPath  - we added a new file locally
   249  				changes = append(changes, &Change{ChangeSourceLocal, localPath, ChangeTypeAdded})
   250  				return nil
   251  			}
   252  		}
   253  		return nil
   254  	})
   255  	if err != nil {
   256  		return nil, err
   257  	}
   258  
   259  	// remaining remote files
   260  	if currentRemoteFile.Path != "" {
   261  		changes = append(changes, &Change{ChangeSourceLocal, currentRemoteFile.Path, ChangeTypeRemoved})
   262  	}
   263  	for currentRemoteFile = range left {
   264  		changes = append(changes, &Change{ChangeSourceLocal, currentRemoteFile.Path, ChangeTypeRemoved})
   265  	}
   266  	return changes, nil
   267  }
   268  
   269  // ListRemote - Lists objects from a remote uri and inserts them into the objects channel
   270  func ListRemote(ctx context.Context, client apigen.ClientWithResponsesInterface, loc *uri.URI, objects chan<- apigen.ObjectStats) error {
   271  	hasMore := true
   272  	var after string
   273  	defer func() {
   274  		close(objects)
   275  	}()
   276  
   277  	for hasMore {
   278  		listResp, err := client.ListObjectsWithResponse(ctx, loc.Repository, loc.Ref, &apigen.ListObjectsParams{
   279  			After:        (*apigen.PaginationAfter)(swag.String(after)),
   280  			Prefix:       (*apigen.PaginationPrefix)(loc.Path),
   281  			UserMetadata: swag.Bool(true),
   282  		})
   283  		if err != nil {
   284  			return err
   285  		}
   286  
   287  		if listResp.HTTPResponse.StatusCode != http.StatusOK {
   288  			return fmt.Errorf("list remote failed. HTTP %d: %w", listResp.StatusCode(), ErrRemoteFailure)
   289  		}
   290  		for _, o := range listResp.JSON200.Results {
   291  			path := strings.TrimPrefix(o.Path, loc.GetPath())
   292  			// skip directory markers
   293  			if path == "" || (strings.HasSuffix(path, uri.PathSeparator) && swag.Int64Value(o.SizeBytes) == 0) {
   294  				continue
   295  			}
   296  			path = strings.TrimPrefix(path, uri.PathSeparator)
   297  			objects <- apigen.ObjectStats{
   298  				Checksum:        o.Checksum,
   299  				ContentType:     o.ContentType,
   300  				Metadata:        o.Metadata,
   301  				Mtime:           o.Mtime,
   302  				Path:            path,
   303  				PathType:        o.PathType,
   304  				PhysicalAddress: o.PhysicalAddress,
   305  				SizeBytes:       o.SizeBytes,
   306  			}
   307  		}
   308  		hasMore = listResp.JSON200.Pagination.HasMore
   309  		after = listResp.JSON200.Pagination.NextOffset
   310  	}
   311  	return nil
   312  }
   313  
   314  func diffShouldIgnore(name string) bool {
   315  	switch name {
   316  	case IndexFileName, ".DS_Store":
   317  		return true
   318  	default:
   319  		return false
   320  	}
   321  }