github.com/minio/mc@v0.0.0-20240503112107-b471de8d1882/cmd/difference.go (about) 1 // Copyright (c) 2015-2022 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package cmd 19 20 import ( 21 "context" 22 "strings" 23 "time" 24 "unicode/utf8" 25 26 // golang does not support flat keys for path matching, find does 27 28 "github.com/minio/mc/pkg/probe" 29 "github.com/minio/minio-go/v7" 30 "golang.org/x/text/unicode/norm" 31 ) 32 33 // differType difference in type. 34 type differType int 35 36 const ( 37 differInUnknown differType = iota 38 differInNone // does not differ 39 differInSize // differs in size 40 differInMetadata // differs in metadata 41 differInType // differs in type, exfile/directory 42 differInFirst // only in source (FIRST) 43 differInSecond // only in target (SECOND) 44 differInAASourceMTime // differs in active-active source modtime 45 ) 46 47 func (d differType) String() string { 48 switch d { 49 case differInNone: 50 return "" 51 case differInSize: 52 return "size" 53 case differInMetadata: 54 return "metadata" 55 case differInAASourceMTime: 56 return "mm-source-mtime" 57 case differInType: 58 return "type" 59 case differInFirst: 60 return "only-in-first" 61 case differInSecond: 62 return "only-in-second" 63 } 64 return "unknown" 65 } 66 67 const activeActiveSourceModTimeKey = "X-Amz-Meta-Mm-Source-Mtime" 68 69 func getSourceModTimeKey(metadata map[string]string) string { 70 if metadata[activeActiveSourceModTimeKey] != "" { 71 return metadata[activeActiveSourceModTimeKey] 72 } 73 if metadata[strings.ToLower(activeActiveSourceModTimeKey)] != "" { 74 return metadata[strings.ToLower(activeActiveSourceModTimeKey)] 75 } 76 if metadata[strings.ToLower("Mm-Source-Mtime")] != "" { 77 return metadata[strings.ToLower("Mm-Source-Mtime")] 78 } 79 if metadata["Mm-Source-Mtime"] != "" { 80 return metadata["Mm-Source-Mtime"] 81 } 82 return "" 83 } 84 85 // activeActiveModTimeUpdated tries to calculate if the object copy in the target 86 // is older than the one in the source by comparing the modtime of the data. 87 func activeActiveModTimeUpdated(src, dst *ClientContent) bool { 88 if src == nil || dst == nil { 89 return false 90 } 91 92 if src.Time.IsZero() || dst.Time.IsZero() { 93 // This should only happen in a messy environment 94 // but we are returning false anyway so the caller 95 // function won't take any action. 96 return false 97 } 98 99 srcActualModTime := src.Time 100 dstActualModTime := dst.Time 101 102 srcModTime := getSourceModTimeKey(src.UserMetadata) 103 dstModTime := getSourceModTimeKey(dst.UserMetadata) 104 if srcModTime == "" && dstModTime == "" { 105 // No active-active mirror context found, fallback to modTimes presented 106 // by the client content 107 return srcActualModTime.After(dstActualModTime) 108 } 109 110 var srcOriginLastModified, dstOriginLastModified time.Time 111 var err error 112 if srcModTime != "" { 113 srcOriginLastModified, err = time.Parse(time.RFC3339Nano, srcModTime) 114 if err != nil { 115 // failure to parse source modTime, modTime tampered ignore the file 116 return false 117 } 118 } 119 if dstModTime != "" { 120 dstOriginLastModified, err = time.Parse(time.RFC3339Nano, dstModTime) 121 if err != nil { 122 // failure to parse source modTime, modTime tampered ignore the file 123 return false 124 } 125 } 126 127 if !srcOriginLastModified.IsZero() && srcOriginLastModified.After(src.Time) { 128 srcActualModTime = srcOriginLastModified 129 } 130 131 if !dstOriginLastModified.IsZero() && dstOriginLastModified.After(dst.Time) { 132 dstActualModTime = dstOriginLastModified 133 } 134 135 return srcActualModTime.After(dstActualModTime) 136 } 137 138 func metadataEqual(m1, m2 map[string]string) bool { 139 for k, v := range m1 { 140 if k == activeActiveSourceModTimeKey { 141 continue 142 } 143 if k == strings.ToLower(activeActiveSourceModTimeKey) { 144 continue 145 } 146 if m2[k] != v { 147 return false 148 } 149 } 150 for k, v := range m2 { 151 if k == activeActiveSourceModTimeKey { 152 continue 153 } 154 if k == strings.ToLower(activeActiveSourceModTimeKey) { 155 continue 156 } 157 if m1[k] != v { 158 return false 159 } 160 } 161 return true 162 } 163 164 func objectDifference(ctx context.Context, sourceClnt, targetClnt Client, isMetadata bool) (diffCh chan diffMessage) { 165 sourceURL := sourceClnt.GetURL().String() 166 sourceCh := sourceClnt.List(ctx, ListOptions{Recursive: true, WithMetadata: isMetadata, ShowDir: DirNone}) 167 168 targetURL := targetClnt.GetURL().String() 169 targetCh := targetClnt.List(ctx, ListOptions{Recursive: true, WithMetadata: isMetadata, ShowDir: DirNone}) 170 171 return difference(sourceURL, sourceCh, targetURL, targetCh, isMetadata, false) 172 } 173 174 func bucketDifference(ctx context.Context, sourceClnt, targetClnt Client) (diffCh chan diffMessage) { 175 sourceURL := sourceClnt.GetURL().String() 176 sourceCh := make(chan *ClientContent) 177 178 go func() { 179 defer close(sourceCh) 180 buckets, err := sourceClnt.ListBuckets(ctx) 181 if err != nil { 182 select { 183 case <-ctx.Done(): 184 case sourceCh <- &ClientContent{Err: err}: 185 } 186 return 187 } 188 for _, b := range buckets { 189 select { 190 case <-ctx.Done(): 191 return 192 case sourceCh <- b: 193 } 194 } 195 }() 196 197 targetURL := targetClnt.GetURL().String() 198 targetCh := make(chan *ClientContent) 199 go func() { 200 defer close(targetCh) 201 buckets, err := targetClnt.ListBuckets(ctx) 202 if err != nil { 203 select { 204 case <-ctx.Done(): 205 case targetCh <- &ClientContent{Err: err}: 206 } 207 return 208 } 209 for _, b := range buckets { 210 select { 211 case <-ctx.Done(): 212 return 213 case targetCh <- b: 214 } 215 } 216 }() 217 218 return difference(sourceURL, sourceCh, targetURL, targetCh, false, false) 219 } 220 221 func differenceInternal(sourceURL string, srcCh <-chan *ClientContent, targetURL string, tgtCh <-chan *ClientContent, 222 cmpMetadata, returnSimilar bool, diffCh chan<- diffMessage, 223 ) *probe.Error { 224 // Pop first entries from the source and targets 225 srcCtnt, srcOk := <-srcCh 226 tgtCtnt, tgtOk := <-tgtCh 227 228 var srcEOF, tgtEOF bool 229 230 for { 231 srcEOF = !srcOk 232 tgtEOF = !tgtOk 233 234 // No objects from source AND target: Finish 235 if srcEOF && tgtEOF { 236 break 237 } 238 239 if !srcEOF && srcCtnt.Err != nil { 240 return srcCtnt.Err.Trace(sourceURL, targetURL) 241 } 242 243 if !tgtEOF && tgtCtnt.Err != nil { 244 return tgtCtnt.Err.Trace(sourceURL, targetURL) 245 } 246 247 // If source doesn't have objects anymore, comparison becomes obvious 248 if srcEOF { 249 diffCh <- diffMessage{ 250 SecondURL: tgtCtnt.URL.String(), 251 Diff: differInSecond, 252 secondContent: tgtCtnt, 253 } 254 tgtCtnt, tgtOk = <-tgtCh 255 continue 256 } 257 258 // The same for target 259 if tgtEOF { 260 diffCh <- diffMessage{ 261 FirstURL: srcCtnt.URL.String(), 262 Diff: differInFirst, 263 firstContent: srcCtnt, 264 } 265 srcCtnt, srcOk = <-srcCh 266 continue 267 } 268 269 srcSuffix := strings.TrimPrefix(srcCtnt.URL.String(), sourceURL) 270 tgtSuffix := strings.TrimPrefix(tgtCtnt.URL.String(), targetURL) 271 272 current := urlJoinPath(targetURL, srcSuffix) 273 expected := urlJoinPath(targetURL, tgtSuffix) 274 275 if !utf8.ValidString(srcSuffix) { 276 // Error. Keys must be valid UTF-8. 277 diffCh <- diffMessage{Error: errInvalidSource(current).Trace()} 278 srcCtnt, srcOk = <-srcCh 279 continue 280 } 281 if !utf8.ValidString(tgtSuffix) { 282 // Error. Keys must be valid UTF-8. 283 diffCh <- diffMessage{Error: errInvalidTarget(expected).Trace()} 284 tgtCtnt, tgtOk = <-tgtCh 285 continue 286 } 287 288 // Normalize to avoid situations where multiple byte representations are possible. 289 // e.g. 'รค' can be represented as precomposed U+00E4 (UTF-8 0xc3a4) or decomposed 290 // U+0061 U+0308 (UTF-8 0x61cc88). 291 normalizedCurrent := norm.NFC.String(current) 292 normalizedExpected := norm.NFC.String(expected) 293 294 if normalizedExpected > normalizedCurrent { 295 diffCh <- diffMessage{ 296 FirstURL: srcCtnt.URL.String(), 297 Diff: differInFirst, 298 firstContent: srcCtnt, 299 } 300 srcCtnt, srcOk = <-srcCh 301 continue 302 } 303 if normalizedExpected == normalizedCurrent { 304 srcType, tgtType := srcCtnt.Type, tgtCtnt.Type 305 srcSize, tgtSize := srcCtnt.Size, tgtCtnt.Size 306 if srcType.IsRegular() && !tgtType.IsRegular() || 307 !srcType.IsRegular() && tgtType.IsRegular() { 308 // Type differs. Source is never a directory. 309 diffCh <- diffMessage{ 310 FirstURL: srcCtnt.URL.String(), 311 SecondURL: tgtCtnt.URL.String(), 312 Diff: differInType, 313 firstContent: srcCtnt, 314 secondContent: tgtCtnt, 315 } 316 continue 317 } 318 if srcSize != tgtSize { 319 // Regular files differing in size. 320 diffCh <- diffMessage{ 321 FirstURL: srcCtnt.URL.String(), 322 SecondURL: tgtCtnt.URL.String(), 323 Diff: differInSize, 324 firstContent: srcCtnt, 325 secondContent: tgtCtnt, 326 } 327 } else if activeActiveModTimeUpdated(srcCtnt, tgtCtnt) { 328 diffCh <- diffMessage{ 329 FirstURL: srcCtnt.URL.String(), 330 SecondURL: tgtCtnt.URL.String(), 331 Diff: differInAASourceMTime, 332 firstContent: srcCtnt, 333 secondContent: tgtCtnt, 334 } 335 } else if cmpMetadata && 336 !metadataEqual(srcCtnt.UserMetadata, tgtCtnt.UserMetadata) && 337 !metadataEqual(srcCtnt.Metadata, tgtCtnt.Metadata) { 338 339 // Regular files user requesting additional metadata to same file. 340 diffCh <- diffMessage{ 341 FirstURL: srcCtnt.URL.String(), 342 SecondURL: tgtCtnt.URL.String(), 343 Diff: differInMetadata, 344 firstContent: srcCtnt, 345 secondContent: tgtCtnt, 346 } 347 } 348 349 // No differ 350 if returnSimilar { 351 diffCh <- diffMessage{ 352 FirstURL: srcCtnt.URL.String(), 353 SecondURL: tgtCtnt.URL.String(), 354 Diff: differInNone, 355 firstContent: srcCtnt, 356 secondContent: tgtCtnt, 357 } 358 } 359 srcCtnt, srcOk = <-srcCh 360 tgtCtnt, tgtOk = <-tgtCh 361 continue 362 } 363 // Differ in second 364 diffCh <- diffMessage{ 365 SecondURL: tgtCtnt.URL.String(), 366 Diff: differInSecond, 367 secondContent: tgtCtnt, 368 } 369 tgtCtnt, tgtOk = <-tgtCh 370 continue 371 } 372 373 return nil 374 } 375 376 // objectDifference function finds the difference between all objects 377 // recursively in sorted order from source and target. 378 func difference(sourceURL string, sourceCh <-chan *ClientContent, targetURL string, targetCh <-chan *ClientContent, cmpMetadata, returnSimilar bool) (diffCh chan diffMessage) { 379 diffCh = make(chan diffMessage, 10000) 380 381 go func() { 382 defer close(diffCh) 383 384 err := differenceInternal(sourceURL, sourceCh, targetURL, targetCh, cmpMetadata, returnSimilar, diffCh) 385 if err != nil { 386 // handle this specifically for filesystem related errors. 387 switch v := err.ToGoError().(type) { 388 case PathNotFound, PathInsufficientPermission, PathNotADirectory: 389 diffCh <- diffMessage{ 390 Error: err, 391 } 392 return 393 case minio.ErrorResponse: 394 switch v.Code { 395 case "NoSuchBucket", "NoSuchKey": 396 diffCh <- diffMessage{ 397 Error: err, 398 } 399 return 400 } 401 } 402 errorIf(err, "Unable to list comparison retrying..") 403 } 404 }() 405 406 return diffCh 407 }