github.com/minio/mc@v0.0.0-20240503112107-b471de8d1882/cmd/difference.go (about)

     1  // Copyright (c) 2015-2022 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package cmd
    19  
    20  import (
    21  	"context"
    22  	"strings"
    23  	"time"
    24  	"unicode/utf8"
    25  
    26  	// golang does not support flat keys for path matching, find does
    27  
    28  	"github.com/minio/mc/pkg/probe"
    29  	"github.com/minio/minio-go/v7"
    30  	"golang.org/x/text/unicode/norm"
    31  )
    32  
    33  // differType difference in type.
    34  type differType int
    35  
    36  const (
    37  	differInUnknown       differType = iota
    38  	differInNone                     // does not differ
    39  	differInSize                     // differs in size
    40  	differInMetadata                 // differs in metadata
    41  	differInType                     // differs in type, exfile/directory
    42  	differInFirst                    // only in source (FIRST)
    43  	differInSecond                   // only in target (SECOND)
    44  	differInAASourceMTime            // differs in active-active source modtime
    45  )
    46  
    47  func (d differType) String() string {
    48  	switch d {
    49  	case differInNone:
    50  		return ""
    51  	case differInSize:
    52  		return "size"
    53  	case differInMetadata:
    54  		return "metadata"
    55  	case differInAASourceMTime:
    56  		return "mm-source-mtime"
    57  	case differInType:
    58  		return "type"
    59  	case differInFirst:
    60  		return "only-in-first"
    61  	case differInSecond:
    62  		return "only-in-second"
    63  	}
    64  	return "unknown"
    65  }
    66  
    67  const activeActiveSourceModTimeKey = "X-Amz-Meta-Mm-Source-Mtime"
    68  
    69  func getSourceModTimeKey(metadata map[string]string) string {
    70  	if metadata[activeActiveSourceModTimeKey] != "" {
    71  		return metadata[activeActiveSourceModTimeKey]
    72  	}
    73  	if metadata[strings.ToLower(activeActiveSourceModTimeKey)] != "" {
    74  		return metadata[strings.ToLower(activeActiveSourceModTimeKey)]
    75  	}
    76  	if metadata[strings.ToLower("Mm-Source-Mtime")] != "" {
    77  		return metadata[strings.ToLower("Mm-Source-Mtime")]
    78  	}
    79  	if metadata["Mm-Source-Mtime"] != "" {
    80  		return metadata["Mm-Source-Mtime"]
    81  	}
    82  	return ""
    83  }
    84  
    85  // activeActiveModTimeUpdated tries to calculate if the object copy in the target
    86  // is older than the one in the source by comparing the modtime of the data.
    87  func activeActiveModTimeUpdated(src, dst *ClientContent) bool {
    88  	if src == nil || dst == nil {
    89  		return false
    90  	}
    91  
    92  	if src.Time.IsZero() || dst.Time.IsZero() {
    93  		// This should only happen in a messy environment
    94  		// but we are returning false anyway so the caller
    95  		// function won't take any action.
    96  		return false
    97  	}
    98  
    99  	srcActualModTime := src.Time
   100  	dstActualModTime := dst.Time
   101  
   102  	srcModTime := getSourceModTimeKey(src.UserMetadata)
   103  	dstModTime := getSourceModTimeKey(dst.UserMetadata)
   104  	if srcModTime == "" && dstModTime == "" {
   105  		// No active-active mirror context found, fallback to modTimes presented
   106  		// by the client content
   107  		return srcActualModTime.After(dstActualModTime)
   108  	}
   109  
   110  	var srcOriginLastModified, dstOriginLastModified time.Time
   111  	var err error
   112  	if srcModTime != "" {
   113  		srcOriginLastModified, err = time.Parse(time.RFC3339Nano, srcModTime)
   114  		if err != nil {
   115  			// failure to parse source modTime, modTime tampered ignore the file
   116  			return false
   117  		}
   118  	}
   119  	if dstModTime != "" {
   120  		dstOriginLastModified, err = time.Parse(time.RFC3339Nano, dstModTime)
   121  		if err != nil {
   122  			// failure to parse source modTime, modTime tampered ignore the file
   123  			return false
   124  		}
   125  	}
   126  
   127  	if !srcOriginLastModified.IsZero() && srcOriginLastModified.After(src.Time) {
   128  		srcActualModTime = srcOriginLastModified
   129  	}
   130  
   131  	if !dstOriginLastModified.IsZero() && dstOriginLastModified.After(dst.Time) {
   132  		dstActualModTime = dstOriginLastModified
   133  	}
   134  
   135  	return srcActualModTime.After(dstActualModTime)
   136  }
   137  
   138  func metadataEqual(m1, m2 map[string]string) bool {
   139  	for k, v := range m1 {
   140  		if k == activeActiveSourceModTimeKey {
   141  			continue
   142  		}
   143  		if k == strings.ToLower(activeActiveSourceModTimeKey) {
   144  			continue
   145  		}
   146  		if m2[k] != v {
   147  			return false
   148  		}
   149  	}
   150  	for k, v := range m2 {
   151  		if k == activeActiveSourceModTimeKey {
   152  			continue
   153  		}
   154  		if k == strings.ToLower(activeActiveSourceModTimeKey) {
   155  			continue
   156  		}
   157  		if m1[k] != v {
   158  			return false
   159  		}
   160  	}
   161  	return true
   162  }
   163  
   164  func objectDifference(ctx context.Context, sourceClnt, targetClnt Client, isMetadata bool) (diffCh chan diffMessage) {
   165  	sourceURL := sourceClnt.GetURL().String()
   166  	sourceCh := sourceClnt.List(ctx, ListOptions{Recursive: true, WithMetadata: isMetadata, ShowDir: DirNone})
   167  
   168  	targetURL := targetClnt.GetURL().String()
   169  	targetCh := targetClnt.List(ctx, ListOptions{Recursive: true, WithMetadata: isMetadata, ShowDir: DirNone})
   170  
   171  	return difference(sourceURL, sourceCh, targetURL, targetCh, isMetadata, false)
   172  }
   173  
   174  func bucketDifference(ctx context.Context, sourceClnt, targetClnt Client) (diffCh chan diffMessage) {
   175  	sourceURL := sourceClnt.GetURL().String()
   176  	sourceCh := make(chan *ClientContent)
   177  
   178  	go func() {
   179  		defer close(sourceCh)
   180  		buckets, err := sourceClnt.ListBuckets(ctx)
   181  		if err != nil {
   182  			select {
   183  			case <-ctx.Done():
   184  			case sourceCh <- &ClientContent{Err: err}:
   185  			}
   186  			return
   187  		}
   188  		for _, b := range buckets {
   189  			select {
   190  			case <-ctx.Done():
   191  				return
   192  			case sourceCh <- b:
   193  			}
   194  		}
   195  	}()
   196  
   197  	targetURL := targetClnt.GetURL().String()
   198  	targetCh := make(chan *ClientContent)
   199  	go func() {
   200  		defer close(targetCh)
   201  		buckets, err := targetClnt.ListBuckets(ctx)
   202  		if err != nil {
   203  			select {
   204  			case <-ctx.Done():
   205  			case targetCh <- &ClientContent{Err: err}:
   206  			}
   207  			return
   208  		}
   209  		for _, b := range buckets {
   210  			select {
   211  			case <-ctx.Done():
   212  				return
   213  			case targetCh <- b:
   214  			}
   215  		}
   216  	}()
   217  
   218  	return difference(sourceURL, sourceCh, targetURL, targetCh, false, false)
   219  }
   220  
   221  func differenceInternal(sourceURL string, srcCh <-chan *ClientContent, targetURL string, tgtCh <-chan *ClientContent,
   222  	cmpMetadata, returnSimilar bool, diffCh chan<- diffMessage,
   223  ) *probe.Error {
   224  	// Pop first entries from the source and targets
   225  	srcCtnt, srcOk := <-srcCh
   226  	tgtCtnt, tgtOk := <-tgtCh
   227  
   228  	var srcEOF, tgtEOF bool
   229  
   230  	for {
   231  		srcEOF = !srcOk
   232  		tgtEOF = !tgtOk
   233  
   234  		// No objects from source AND target: Finish
   235  		if srcEOF && tgtEOF {
   236  			break
   237  		}
   238  
   239  		if !srcEOF && srcCtnt.Err != nil {
   240  			return srcCtnt.Err.Trace(sourceURL, targetURL)
   241  		}
   242  
   243  		if !tgtEOF && tgtCtnt.Err != nil {
   244  			return tgtCtnt.Err.Trace(sourceURL, targetURL)
   245  		}
   246  
   247  		// If source doesn't have objects anymore, comparison becomes obvious
   248  		if srcEOF {
   249  			diffCh <- diffMessage{
   250  				SecondURL:     tgtCtnt.URL.String(),
   251  				Diff:          differInSecond,
   252  				secondContent: tgtCtnt,
   253  			}
   254  			tgtCtnt, tgtOk = <-tgtCh
   255  			continue
   256  		}
   257  
   258  		// The same for target
   259  		if tgtEOF {
   260  			diffCh <- diffMessage{
   261  				FirstURL:     srcCtnt.URL.String(),
   262  				Diff:         differInFirst,
   263  				firstContent: srcCtnt,
   264  			}
   265  			srcCtnt, srcOk = <-srcCh
   266  			continue
   267  		}
   268  
   269  		srcSuffix := strings.TrimPrefix(srcCtnt.URL.String(), sourceURL)
   270  		tgtSuffix := strings.TrimPrefix(tgtCtnt.URL.String(), targetURL)
   271  
   272  		current := urlJoinPath(targetURL, srcSuffix)
   273  		expected := urlJoinPath(targetURL, tgtSuffix)
   274  
   275  		if !utf8.ValidString(srcSuffix) {
   276  			// Error. Keys must be valid UTF-8.
   277  			diffCh <- diffMessage{Error: errInvalidSource(current).Trace()}
   278  			srcCtnt, srcOk = <-srcCh
   279  			continue
   280  		}
   281  		if !utf8.ValidString(tgtSuffix) {
   282  			// Error. Keys must be valid UTF-8.
   283  			diffCh <- diffMessage{Error: errInvalidTarget(expected).Trace()}
   284  			tgtCtnt, tgtOk = <-tgtCh
   285  			continue
   286  		}
   287  
   288  		// Normalize to avoid situations where multiple byte representations are possible.
   289  		// e.g. 'รค' can be represented as precomposed U+00E4 (UTF-8 0xc3a4) or decomposed
   290  		// U+0061 U+0308 (UTF-8 0x61cc88).
   291  		normalizedCurrent := norm.NFC.String(current)
   292  		normalizedExpected := norm.NFC.String(expected)
   293  
   294  		if normalizedExpected > normalizedCurrent {
   295  			diffCh <- diffMessage{
   296  				FirstURL:     srcCtnt.URL.String(),
   297  				Diff:         differInFirst,
   298  				firstContent: srcCtnt,
   299  			}
   300  			srcCtnt, srcOk = <-srcCh
   301  			continue
   302  		}
   303  		if normalizedExpected == normalizedCurrent {
   304  			srcType, tgtType := srcCtnt.Type, tgtCtnt.Type
   305  			srcSize, tgtSize := srcCtnt.Size, tgtCtnt.Size
   306  			if srcType.IsRegular() && !tgtType.IsRegular() ||
   307  				!srcType.IsRegular() && tgtType.IsRegular() {
   308  				// Type differs. Source is never a directory.
   309  				diffCh <- diffMessage{
   310  					FirstURL:      srcCtnt.URL.String(),
   311  					SecondURL:     tgtCtnt.URL.String(),
   312  					Diff:          differInType,
   313  					firstContent:  srcCtnt,
   314  					secondContent: tgtCtnt,
   315  				}
   316  				continue
   317  			}
   318  			if srcSize != tgtSize {
   319  				// Regular files differing in size.
   320  				diffCh <- diffMessage{
   321  					FirstURL:      srcCtnt.URL.String(),
   322  					SecondURL:     tgtCtnt.URL.String(),
   323  					Diff:          differInSize,
   324  					firstContent:  srcCtnt,
   325  					secondContent: tgtCtnt,
   326  				}
   327  			} else if activeActiveModTimeUpdated(srcCtnt, tgtCtnt) {
   328  				diffCh <- diffMessage{
   329  					FirstURL:      srcCtnt.URL.String(),
   330  					SecondURL:     tgtCtnt.URL.String(),
   331  					Diff:          differInAASourceMTime,
   332  					firstContent:  srcCtnt,
   333  					secondContent: tgtCtnt,
   334  				}
   335  			} else if cmpMetadata &&
   336  				!metadataEqual(srcCtnt.UserMetadata, tgtCtnt.UserMetadata) &&
   337  				!metadataEqual(srcCtnt.Metadata, tgtCtnt.Metadata) {
   338  
   339  				// Regular files user requesting additional metadata to same file.
   340  				diffCh <- diffMessage{
   341  					FirstURL:      srcCtnt.URL.String(),
   342  					SecondURL:     tgtCtnt.URL.String(),
   343  					Diff:          differInMetadata,
   344  					firstContent:  srcCtnt,
   345  					secondContent: tgtCtnt,
   346  				}
   347  			}
   348  
   349  			// No differ
   350  			if returnSimilar {
   351  				diffCh <- diffMessage{
   352  					FirstURL:      srcCtnt.URL.String(),
   353  					SecondURL:     tgtCtnt.URL.String(),
   354  					Diff:          differInNone,
   355  					firstContent:  srcCtnt,
   356  					secondContent: tgtCtnt,
   357  				}
   358  			}
   359  			srcCtnt, srcOk = <-srcCh
   360  			tgtCtnt, tgtOk = <-tgtCh
   361  			continue
   362  		}
   363  		// Differ in second
   364  		diffCh <- diffMessage{
   365  			SecondURL:     tgtCtnt.URL.String(),
   366  			Diff:          differInSecond,
   367  			secondContent: tgtCtnt,
   368  		}
   369  		tgtCtnt, tgtOk = <-tgtCh
   370  		continue
   371  	}
   372  
   373  	return nil
   374  }
   375  
   376  // objectDifference function finds the difference between all objects
   377  // recursively in sorted order from source and target.
   378  func difference(sourceURL string, sourceCh <-chan *ClientContent, targetURL string, targetCh <-chan *ClientContent, cmpMetadata, returnSimilar bool) (diffCh chan diffMessage) {
   379  	diffCh = make(chan diffMessage, 10000)
   380  
   381  	go func() {
   382  		defer close(diffCh)
   383  
   384  		err := differenceInternal(sourceURL, sourceCh, targetURL, targetCh, cmpMetadata, returnSimilar, diffCh)
   385  		if err != nil {
   386  			// handle this specifically for filesystem related errors.
   387  			switch v := err.ToGoError().(type) {
   388  			case PathNotFound, PathInsufficientPermission, PathNotADirectory:
   389  				diffCh <- diffMessage{
   390  					Error: err,
   391  				}
   392  				return
   393  			case minio.ErrorResponse:
   394  				switch v.Code {
   395  				case "NoSuchBucket", "NoSuchKey":
   396  					diffCh <- diffMessage{
   397  						Error: err,
   398  					}
   399  					return
   400  				}
   401  			}
   402  			errorIf(err, "Unable to list comparison retrying..")
   403  		}
   404  	}()
   405  
   406  	return diffCh
   407  }