github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/local/diff.go (about) 1 package local 2 3 import ( 4 "context" 5 "fmt" 6 "net/http" 7 "net/url" 8 "os" 9 "path/filepath" 10 "strings" 11 12 "github.com/go-openapi/swag" 13 "github.com/treeverse/lakefs/pkg/api/apigen" 14 "github.com/treeverse/lakefs/pkg/block" 15 "github.com/treeverse/lakefs/pkg/block/local" 16 "github.com/treeverse/lakefs/pkg/block/params" 17 "github.com/treeverse/lakefs/pkg/uri" 18 ) 19 20 type ChangeSource int 21 22 const ( 23 ChangeSourceRemote ChangeSource = iota 24 ChangeSourceLocal 25 ) 26 27 type ChangeType int 28 29 const ( 30 ChangeTypeAdded ChangeType = iota 31 ChangeTypeModified 32 ChangeTypeRemoved 33 ChangeTypeConflict 34 ) 35 36 type Change struct { 37 Source ChangeSource 38 Path string 39 Type ChangeType 40 } 41 42 func (c *Change) String() string { 43 return fmt.Sprintf("%s\t%s\t%s", ChangeSourceString(c.Source), ChangeTypeString(c.Type), c.Path) 44 } 45 46 func ChangeTypeFromString(changeType string) ChangeType { 47 switch changeType { 48 case "added": 49 return ChangeTypeAdded 50 case "removed": 51 return ChangeTypeRemoved 52 case "modified", "changed": 53 return ChangeTypeModified 54 case "conflict": 55 return ChangeTypeConflict 56 default: 57 panic("invalid change type") 58 } 59 } 60 61 func ChangeTypeString(changeType ChangeType) string { 62 switch changeType { 63 case ChangeTypeAdded: 64 return "added" 65 case ChangeTypeRemoved: 66 return "removed" 67 case ChangeTypeModified: 68 return "modified" 69 case ChangeTypeConflict: 70 return "conflict" 71 default: 72 panic("invalid change type") 73 } 74 } 75 76 func ChangeSourceString(changeSource ChangeSource) string { 77 switch changeSource { 78 case ChangeSourceLocal: 79 return "local" 80 case ChangeSourceRemote: 81 return "remote" 82 default: 83 panic("invalid change source") 84 } 85 } 86 87 type Changes []*Change 88 89 func (c Changes) String() string { 90 strs := make([]string, len(c)) 91 for i, cc := range c { 92 strs[i] = cc.String() 93 } 94 return strings.Join(strs, "\n") 95 } 96 97 type MergeStrategy int 98 99 const ( 100 MergeStrategyNone MergeStrategy = iota 101 MergeStrategyThis 102 MergeStrategyOther 103 ) 104 105 // MergeWith combines changes from two diffs, sorting by lexicographic order. 106 // If the same path appears in both diffs, it's marked as a conflict. 107 func (c Changes) MergeWith(other Changes, strategy MergeStrategy) Changes { 108 cIdx := 0 109 oIdx := 0 110 result := make(Changes, 0) 111 for cIdx < len(c) && oIdx < len(other) { 112 switch { 113 case c[cIdx].Path > other[oIdx].Path: 114 // other is first 115 result = append(result, other[oIdx]) 116 oIdx++ 117 case c[cIdx].Path < other[oIdx].Path: 118 result = append(result, c[cIdx]) 119 cIdx++ 120 default: // both modified the same path!! 121 switch strategy { 122 case MergeStrategyNone: 123 result = append(result, &Change{ 124 Source: c[cIdx].Source, 125 Path: c[cIdx].Path, 126 Type: ChangeTypeConflict, 127 }) 128 case MergeStrategyOther: 129 result = append(result, other[oIdx]) 130 case MergeStrategyThis: 131 result = append(result, c[cIdx]) 132 default: 133 panic("invalid merge strategy") 134 } 135 cIdx++ 136 oIdx++ 137 } 138 } 139 if cIdx < len(c) { 140 result = append(result, c[cIdx:]...) 141 } 142 if oIdx < len(other) { 143 result = append(result, other[oIdx:]...) 144 } 145 return result 146 } 147 148 func switchSource(source ChangeSource) ChangeSource { 149 switch source { 150 case ChangeSourceRemote: 151 return ChangeSourceLocal 152 case ChangeSourceLocal: 153 return ChangeSourceRemote 154 default: 155 panic("invalid change source") 156 } 157 } 158 159 // Undo Creates a new list of changes that reverses the given changes list. 160 func Undo(c Changes) Changes { 161 reversed := make(Changes, len(c)) 162 for i, op := range c { 163 switch op.Type { 164 case ChangeTypeAdded: 165 reversed[i] = &Change{ 166 Source: switchSource(op.Source), 167 Path: op.Path, 168 Type: ChangeTypeRemoved, 169 } 170 case ChangeTypeModified: 171 reversed[i] = &Change{ 172 Source: switchSource(op.Source), 173 Path: op.Path, 174 Type: ChangeTypeModified, 175 } 176 case ChangeTypeRemoved: 177 reversed[i] = &Change{ 178 Source: switchSource(op.Source), 179 Path: op.Path, 180 Type: ChangeTypeModified, // mark as modified so it will trigger download 181 } 182 case ChangeTypeConflict: 183 default: 184 // Should never reach 185 panic(fmt.Sprintf("got unsupported change type %d in undo", op.Type)) 186 } 187 } 188 return reversed 189 } 190 191 // DiffLocalWithHead Checks changes between a local directory and the head it is pointing to. The diff check assumes the remote 192 // is an immutable set so any changes found resulted from changes in the local directory 193 // left is an object channel which contains results from a remote source. rightPath is the local directory to diff with 194 func DiffLocalWithHead(left <-chan apigen.ObjectStats, rightPath string) (Changes, error) { 195 // left should be the base commit 196 changes := make([]*Change, 0) 197 var ( 198 currentRemoteFile apigen.ObjectStats 199 hasMore bool 200 ) 201 absPath, err := filepath.Abs(rightPath) 202 if err != nil { 203 return nil, err 204 } 205 uri := url.URL{Scheme: "local", Path: absPath} 206 reader := local.NewLocalWalker(params.Local{ 207 ImportEnabled: false, 208 ImportHidden: true, 209 AllowedExternalPrefixes: []string{absPath}, 210 }) 211 err = reader.Walk(context.Background(), &uri, block.WalkOptions{}, func(e block.ObjectStoreEntry) error { 212 info, err := os.Stat(e.FullKey) 213 if err != nil { 214 return err 215 } 216 if info.IsDir() || diffShouldIgnore(info.Name()) { 217 return nil 218 } 219 localPath := e.RelativeKey 220 localPath = strings.TrimPrefix(localPath, string(filepath.Separator)) 221 localPath = filepath.ToSlash(localPath) // normalize to use "/" always 222 223 localBytes := info.Size() 224 localMtime := info.ModTime().Unix() 225 for { 226 if currentRemoteFile.Path == "" { 227 if currentRemoteFile, hasMore = <-left; !hasMore { 228 // nothing left on the left side, we definitely added stuff! 229 changes = append(changes, &Change{ChangeSourceLocal, localPath, ChangeTypeAdded}) 230 break 231 } 232 } 233 switch { 234 case currentRemoteFile.Path < localPath: // We removed a file locally 235 changes = append(changes, &Change{ChangeSourceLocal, currentRemoteFile.Path, ChangeTypeRemoved}) 236 currentRemoteFile.Path = "" 237 case currentRemoteFile.Path == localPath: 238 remoteMtime, err := getMtimeFromStats(currentRemoteFile) 239 if err != nil { 240 return err 241 } 242 if localBytes != swag.Int64Value(currentRemoteFile.SizeBytes) || localMtime != remoteMtime { 243 // we made a change! 244 changes = append(changes, &Change{ChangeSourceLocal, localPath, ChangeTypeModified}) 245 } 246 currentRemoteFile.Path = "" 247 return nil 248 default: // currentRemoteFile.Path > localPath - we added a new file locally 249 changes = append(changes, &Change{ChangeSourceLocal, localPath, ChangeTypeAdded}) 250 return nil 251 } 252 } 253 return nil 254 }) 255 if err != nil { 256 return nil, err 257 } 258 259 // remaining remote files 260 if currentRemoteFile.Path != "" { 261 changes = append(changes, &Change{ChangeSourceLocal, currentRemoteFile.Path, ChangeTypeRemoved}) 262 } 263 for currentRemoteFile = range left { 264 changes = append(changes, &Change{ChangeSourceLocal, currentRemoteFile.Path, ChangeTypeRemoved}) 265 } 266 return changes, nil 267 } 268 269 // ListRemote - Lists objects from a remote uri and inserts them into the objects channel 270 func ListRemote(ctx context.Context, client apigen.ClientWithResponsesInterface, loc *uri.URI, objects chan<- apigen.ObjectStats) error { 271 hasMore := true 272 var after string 273 defer func() { 274 close(objects) 275 }() 276 277 for hasMore { 278 listResp, err := client.ListObjectsWithResponse(ctx, loc.Repository, loc.Ref, &apigen.ListObjectsParams{ 279 After: (*apigen.PaginationAfter)(swag.String(after)), 280 Prefix: (*apigen.PaginationPrefix)(loc.Path), 281 UserMetadata: swag.Bool(true), 282 }) 283 if err != nil { 284 return err 285 } 286 287 if listResp.HTTPResponse.StatusCode != http.StatusOK { 288 return fmt.Errorf("list remote failed. HTTP %d: %w", listResp.StatusCode(), ErrRemoteFailure) 289 } 290 for _, o := range listResp.JSON200.Results { 291 path := strings.TrimPrefix(o.Path, loc.GetPath()) 292 // skip directory markers 293 if path == "" || (strings.HasSuffix(path, uri.PathSeparator) && swag.Int64Value(o.SizeBytes) == 0) { 294 continue 295 } 296 path = strings.TrimPrefix(path, uri.PathSeparator) 297 objects <- apigen.ObjectStats{ 298 Checksum: o.Checksum, 299 ContentType: o.ContentType, 300 Metadata: o.Metadata, 301 Mtime: o.Mtime, 302 Path: path, 303 PathType: o.PathType, 304 PhysicalAddress: o.PhysicalAddress, 305 SizeBytes: o.SizeBytes, 306 } 307 } 308 hasMore = listResp.JSON200.Pagination.HasMore 309 after = listResp.JSON200.Pagination.NextOffset 310 } 311 return nil 312 } 313 314 func diffShouldIgnore(name string) bool { 315 switch name { 316 case IndexFileName, ".DS_Store": 317 return true 318 default: 319 return false 320 } 321 }