github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/fs/operations/dedupe_test.go (about)

     1  package operations_test
     2  
     3  import (
     4  	"context"
     5  	"testing"
     6  	"time"
     7  
     8  	"github.com/rclone/rclone/fs"
     9  	"github.com/rclone/rclone/fs/hash"
    10  	"github.com/rclone/rclone/fs/operations"
    11  	"github.com/rclone/rclone/fs/walk"
    12  	"github.com/rclone/rclone/fstest"
    13  	"github.com/rclone/rclone/lib/random"
    14  	"github.com/spf13/pflag"
    15  	"github.com/stretchr/testify/assert"
    16  	"github.com/stretchr/testify/require"
    17  )
    18  
    19  // Check flag satisfies the interface
    20  var _ pflag.Value = (*operations.DeduplicateMode)(nil)
    21  
    22  func skipIfCantDedupe(t *testing.T, f fs.Fs) {
    23  	if !f.Features().DuplicateFiles {
    24  		t.Skip("Can't test deduplicate - no duplicate files possible")
    25  	}
    26  	if f.Features().PutUnchecked == nil {
    27  		t.Skip("Can't test deduplicate - no PutUnchecked")
    28  	}
    29  	if f.Features().MergeDirs == nil {
    30  		t.Skip("Can't test deduplicate - no MergeDirs")
    31  	}
    32  }
    33  
    34  func skipIfNoHash(t *testing.T, f fs.Fs) {
    35  	if f.Hashes().GetOne() == hash.None {
    36  		t.Skip("Can't run this test without a hash")
    37  	}
    38  }
    39  
    40  func skipIfNoModTime(t *testing.T, f fs.Fs) {
    41  	if f.Precision() >= fs.ModTimeNotSupported {
    42  		t.Skip("Can't run this test without modtimes")
    43  	}
    44  }
    45  
    46  func TestDeduplicateInteractive(t *testing.T) {
    47  	r := fstest.NewRun(t)
    48  	skipIfCantDedupe(t, r.Fremote)
    49  	skipIfNoHash(t, r.Fremote)
    50  
    51  	file1 := r.WriteUncheckedObject(context.Background(), "one", "This is one", t1)
    52  	file2 := r.WriteUncheckedObject(context.Background(), "one", "This is one", t1)
    53  	file3 := r.WriteUncheckedObject(context.Background(), "one", "This is one", t1)
    54  	r.CheckWithDuplicates(t, file1, file2, file3)
    55  
    56  	err := operations.Deduplicate(context.Background(), r.Fremote, operations.DeduplicateInteractive, false)
    57  	require.NoError(t, err)
    58  
    59  	r.CheckRemoteItems(t, file1)
    60  }
    61  
    62  func TestDeduplicateSkip(t *testing.T) {
    63  	r := fstest.NewRun(t)
    64  	skipIfCantDedupe(t, r.Fremote)
    65  	haveHash := r.Fremote.Hashes().GetOne() != hash.None
    66  
    67  	file1 := r.WriteUncheckedObject(context.Background(), "one", "This is one", t1)
    68  	files := []fstest.Item{file1}
    69  	if haveHash {
    70  		file2 := r.WriteUncheckedObject(context.Background(), "one", "This is one", t1)
    71  		files = append(files, file2)
    72  	}
    73  	file3 := r.WriteUncheckedObject(context.Background(), "one", "This is another one", t1)
    74  	files = append(files, file3)
    75  	r.CheckWithDuplicates(t, files...)
    76  
    77  	err := operations.Deduplicate(context.Background(), r.Fremote, operations.DeduplicateSkip, false)
    78  	require.NoError(t, err)
    79  
    80  	r.CheckWithDuplicates(t, file1, file3)
    81  }
    82  
    83  func TestDeduplicateSizeOnly(t *testing.T) {
    84  	r := fstest.NewRun(t)
    85  	skipIfCantDedupe(t, r.Fremote)
    86  	ctx := context.Background()
    87  	ci := fs.GetConfig(ctx)
    88  
    89  	file1 := r.WriteUncheckedObject(context.Background(), "one", "This is one", t1)
    90  	file2 := r.WriteUncheckedObject(context.Background(), "one", "THIS IS ONE", t1)
    91  	file3 := r.WriteUncheckedObject(context.Background(), "one", "This is another one", t1)
    92  	r.CheckWithDuplicates(t, file1, file2, file3)
    93  
    94  	ci.SizeOnly = true
    95  	defer func() {
    96  		ci.SizeOnly = false
    97  	}()
    98  
    99  	err := operations.Deduplicate(context.Background(), r.Fremote, operations.DeduplicateSkip, false)
   100  	require.NoError(t, err)
   101  
   102  	r.CheckWithDuplicates(t, file1, file3)
   103  }
   104  
   105  func TestDeduplicateFirst(t *testing.T) {
   106  	r := fstest.NewRun(t)
   107  	skipIfCantDedupe(t, r.Fremote)
   108  
   109  	file1 := r.WriteUncheckedObject(context.Background(), "one", "This is one", t1)
   110  	file2 := r.WriteUncheckedObject(context.Background(), "one", "This is one A", t1)
   111  	file3 := r.WriteUncheckedObject(context.Background(), "one", "This is one BB", t1)
   112  	r.CheckWithDuplicates(t, file1, file2, file3)
   113  
   114  	err := operations.Deduplicate(context.Background(), r.Fremote, operations.DeduplicateFirst, false)
   115  	require.NoError(t, err)
   116  
   117  	// list until we get one object
   118  	var objects, size int64
   119  	for try := 1; try <= *fstest.ListRetries; try++ {
   120  		objects, size, _, err = operations.Count(context.Background(), r.Fremote)
   121  		require.NoError(t, err)
   122  		if objects == 1 {
   123  			break
   124  		}
   125  		time.Sleep(time.Second)
   126  	}
   127  	assert.Equal(t, int64(1), objects)
   128  	if size != file1.Size && size != file2.Size && size != file3.Size {
   129  		t.Errorf("Size not one of the object sizes %d", size)
   130  	}
   131  }
   132  
   133  func TestDeduplicateNewest(t *testing.T) {
   134  	r := fstest.NewRun(t)
   135  	skipIfCantDedupe(t, r.Fremote)
   136  	skipIfNoModTime(t, r.Fremote)
   137  
   138  	file1 := r.WriteUncheckedObject(context.Background(), "one", "This is one", t1)
   139  	file2 := r.WriteUncheckedObject(context.Background(), "one", "This is one too", t2)
   140  	file3 := r.WriteUncheckedObject(context.Background(), "one", "This is another one", t3)
   141  	r.CheckWithDuplicates(t, file1, file2, file3)
   142  
   143  	err := operations.Deduplicate(context.Background(), r.Fremote, operations.DeduplicateNewest, false)
   144  	require.NoError(t, err)
   145  
   146  	r.CheckRemoteItems(t, file3)
   147  }
   148  
   149  func TestDeduplicateNewestByHash(t *testing.T) {
   150  	r := fstest.NewRun(t)
   151  	skipIfNoHash(t, r.Fremote)
   152  	skipIfNoModTime(t, r.Fremote)
   153  	contents := random.String(100)
   154  
   155  	file1 := r.WriteObject(context.Background(), "one", contents, t1)
   156  	file2 := r.WriteObject(context.Background(), "also/one", contents, t2)
   157  	file3 := r.WriteObject(context.Background(), "another", contents, t3)
   158  	file4 := r.WriteObject(context.Background(), "not-one", "stuff", t3)
   159  	r.CheckRemoteItems(t, file1, file2, file3, file4)
   160  
   161  	err := operations.Deduplicate(context.Background(), r.Fremote, operations.DeduplicateNewest, true)
   162  	require.NoError(t, err)
   163  
   164  	r.CheckRemoteItems(t, file3, file4)
   165  }
   166  
   167  func TestDeduplicateOldest(t *testing.T) {
   168  	r := fstest.NewRun(t)
   169  	skipIfCantDedupe(t, r.Fremote)
   170  
   171  	file1 := r.WriteUncheckedObject(context.Background(), "one", "This is one", t1)
   172  	file2 := r.WriteUncheckedObject(context.Background(), "one", "This is one too", t2)
   173  	file3 := r.WriteUncheckedObject(context.Background(), "one", "This is another one", t3)
   174  	r.CheckWithDuplicates(t, file1, file2, file3)
   175  
   176  	err := operations.Deduplicate(context.Background(), r.Fremote, operations.DeduplicateOldest, false)
   177  	require.NoError(t, err)
   178  
   179  	r.CheckRemoteItems(t, file1)
   180  }
   181  
   182  func TestDeduplicateLargest(t *testing.T) {
   183  	r := fstest.NewRun(t)
   184  	skipIfCantDedupe(t, r.Fremote)
   185  
   186  	file1 := r.WriteUncheckedObject(context.Background(), "one", "This is one", t1)
   187  	file2 := r.WriteUncheckedObject(context.Background(), "one", "This is one too", t2)
   188  	file3 := r.WriteUncheckedObject(context.Background(), "one", "This is another one", t3)
   189  	r.CheckWithDuplicates(t, file1, file2, file3)
   190  
   191  	err := operations.Deduplicate(context.Background(), r.Fremote, operations.DeduplicateLargest, false)
   192  	require.NoError(t, err)
   193  
   194  	r.CheckRemoteItems(t, file3)
   195  }
   196  
   197  func TestDeduplicateSmallest(t *testing.T) {
   198  	r := fstest.NewRun(t)
   199  	skipIfCantDedupe(t, r.Fremote)
   200  
   201  	file1 := r.WriteUncheckedObject(context.Background(), "one", "This is one", t1)
   202  	file2 := r.WriteUncheckedObject(context.Background(), "one", "This is one too", t2)
   203  	file3 := r.WriteUncheckedObject(context.Background(), "one", "This is another one", t3)
   204  	r.CheckWithDuplicates(t, file1, file2, file3)
   205  
   206  	err := operations.Deduplicate(context.Background(), r.Fremote, operations.DeduplicateSmallest, false)
   207  	require.NoError(t, err)
   208  
   209  	r.CheckRemoteItems(t, file1)
   210  }
   211  
   212  func TestDeduplicateRename(t *testing.T) {
   213  	r := fstest.NewRun(t)
   214  	skipIfCantDedupe(t, r.Fremote)
   215  
   216  	file1 := r.WriteUncheckedObject(context.Background(), "one.txt", "This is one", t1)
   217  	file2 := r.WriteUncheckedObject(context.Background(), "one.txt", "This is one too", t2)
   218  	file3 := r.WriteUncheckedObject(context.Background(), "one.txt", "This is another one", t3)
   219  	file4 := r.WriteUncheckedObject(context.Background(), "one-1.txt", "This is not a duplicate", t1)
   220  	r.CheckWithDuplicates(t, file1, file2, file3, file4)
   221  
   222  	err := operations.Deduplicate(context.Background(), r.Fremote, operations.DeduplicateRename, false)
   223  	require.NoError(t, err)
   224  
   225  	require.NoError(t, walk.ListR(context.Background(), r.Fremote, "", true, -1, walk.ListObjects, func(entries fs.DirEntries) error {
   226  		entries.ForObject(func(o fs.Object) {
   227  			remote := o.Remote()
   228  			if remote != "one-1.txt" &&
   229  				remote != "one-2.txt" &&
   230  				remote != "one-3.txt" &&
   231  				remote != "one-4.txt" {
   232  				t.Errorf("Bad file name after rename %q", remote)
   233  			}
   234  			size := o.Size()
   235  			if size != file1.Size &&
   236  				size != file2.Size &&
   237  				size != file3.Size &&
   238  				size != file4.Size {
   239  				t.Errorf("Size not one of the object sizes %d", size)
   240  			}
   241  			if remote == "one-1.txt" && size != file4.Size {
   242  				t.Errorf("Existing non-duplicate file modified %q", remote)
   243  			}
   244  		})
   245  		return nil
   246  	}))
   247  }
   248  
   249  // This should really be a unit test, but the test framework there
   250  // doesn't have enough tools to make it easy
   251  func TestMergeDirs(t *testing.T) {
   252  	r := fstest.NewRun(t)
   253  
   254  	mergeDirs := r.Fremote.Features().MergeDirs
   255  	if mergeDirs == nil {
   256  		t.Skip("Can't merge directories")
   257  	}
   258  
   259  	file1 := r.WriteObject(context.Background(), "dupe1/one.txt", "This is one", t1)
   260  	file2 := r.WriteObject(context.Background(), "dupe2/two.txt", "This is one too", t2)
   261  	file3 := r.WriteObject(context.Background(), "dupe3/three.txt", "This is another one", t3)
   262  
   263  	objs, dirs, err := walk.GetAll(context.Background(), r.Fremote, "", true, 1)
   264  	require.NoError(t, err)
   265  	assert.Equal(t, 3, len(dirs))
   266  	assert.Equal(t, 0, len(objs))
   267  
   268  	err = mergeDirs(context.Background(), dirs)
   269  	require.NoError(t, err)
   270  
   271  	file2.Path = "dupe1/two.txt"
   272  	file3.Path = "dupe1/three.txt"
   273  	r.CheckRemoteItems(t, file1, file2, file3)
   274  
   275  	objs, dirs, err = walk.GetAll(context.Background(), r.Fremote, "", true, 1)
   276  	require.NoError(t, err)
   277  	assert.Equal(t, 1, len(dirs))
   278  	assert.Equal(t, 0, len(objs))
   279  	assert.Equal(t, "dupe1", dirs[0].Remote())
   280  }