github.com/xhghs/rclone@v1.51.1-0.20200430155106-e186a28cced8/cmd/dedupe/dedupe.go (about)

     1  package dedupe
     2  
     3  import (
     4  	"context"
     5  	"log"
     6  
     7  	"github.com/rclone/rclone/cmd"
     8  	"github.com/rclone/rclone/fs/config/flags"
     9  	"github.com/rclone/rclone/fs/operations"
    10  	"github.com/spf13/cobra"
    11  )
    12  
    13  var (
    14  	dedupeMode = operations.DeduplicateInteractive
    15  )
    16  
    17  func init() {
    18  	cmd.Root.AddCommand(commandDefinition)
    19  	cmdFlag := commandDefinition.Flags()
    20  	flags.FVarP(cmdFlag, &dedupeMode, "dedupe-mode", "", "Dedupe mode interactive|skip|first|newest|oldest|largest|smallest|rename.")
    21  }
    22  
    23  var commandDefinition = &cobra.Command{
    24  	Use:   "dedupe [mode] remote:path",
    25  	Short: `Interactively find duplicate files and delete/rename them.`,
    26  	Long: `
    27  By default ` + "`" + `dedupe` + "`" + ` interactively finds duplicate files and offers to
    28  delete all but one or rename them to be different. Only useful with
    29  Google Drive which can have duplicate file names.
    30  
    31  In the first pass it will merge directories with the same name.  It
    32  will do this iteratively until all the identical directories have been
    33  merged.
    34  
    35  The ` + "`" + `dedupe` + "`" + ` command will delete all but one of any identical (same
    36  md5sum) files it finds without confirmation.  This means that for most
    37  duplicated files the ` + "`" + `dedupe` + "`" + ` command will not be interactive.  You
    38  can use ` + "`" + `--dry-run` + "`" + ` to see what would happen without doing anything.
    39  
    40  Here is an example run.
    41  
    42  Before - with duplicates
    43  
    44      $ rclone lsl drive:dupes
    45        6048320 2016-03-05 16:23:16.798000000 one.txt
    46        6048320 2016-03-05 16:23:11.775000000 one.txt
    47         564374 2016-03-05 16:23:06.731000000 one.txt
    48        6048320 2016-03-05 16:18:26.092000000 one.txt
    49        6048320 2016-03-05 16:22:46.185000000 two.txt
    50        1744073 2016-03-05 16:22:38.104000000 two.txt
    51         564374 2016-03-05 16:22:52.118000000 two.txt
    52  
    53  Now the ` + "`" + `dedupe` + "`" + ` session
    54  
    55      $ rclone dedupe drive:dupes
    56      2016/03/05 16:24:37 Google drive root 'dupes': Looking for duplicates using interactive mode.
    57      one.txt: Found 4 duplicates - deleting identical copies
    58      one.txt: Deleting 2/3 identical duplicates (md5sum "1eedaa9fe86fd4b8632e2ac549403b36")
    59      one.txt: 2 duplicates remain
    60        1:      6048320 bytes, 2016-03-05 16:23:16.798000000, md5sum 1eedaa9fe86fd4b8632e2ac549403b36
    61        2:       564374 bytes, 2016-03-05 16:23:06.731000000, md5sum 7594e7dc9fc28f727c42ee3e0749de81
    62      s) Skip and do nothing
    63      k) Keep just one (choose which in next step)
    64      r) Rename all to be different (by changing file.jpg to file-1.jpg)
    65      s/k/r> k
    66      Enter the number of the file to keep> 1
    67      one.txt: Deleted 1 extra copies
    68      two.txt: Found 3 duplicates - deleting identical copies
    69      two.txt: 3 duplicates remain
    70        1:       564374 bytes, 2016-03-05 16:22:52.118000000, md5sum 7594e7dc9fc28f727c42ee3e0749de81
    71        2:      6048320 bytes, 2016-03-05 16:22:46.185000000, md5sum 1eedaa9fe86fd4b8632e2ac549403b36
    72        3:      1744073 bytes, 2016-03-05 16:22:38.104000000, md5sum 851957f7fb6f0bc4ce76be966d336802
    73      s) Skip and do nothing
    74      k) Keep just one (choose which in next step)
    75      r) Rename all to be different (by changing file.jpg to file-1.jpg)
    76      s/k/r> r
    77      two-1.txt: renamed from: two.txt
    78      two-2.txt: renamed from: two.txt
    79      two-3.txt: renamed from: two.txt
    80  
    81  The result being
    82  
    83      $ rclone lsl drive:dupes
    84        6048320 2016-03-05 16:23:16.798000000 one.txt
    85         564374 2016-03-05 16:22:52.118000000 two-1.txt
    86        6048320 2016-03-05 16:22:46.185000000 two-2.txt
    87        1744073 2016-03-05 16:22:38.104000000 two-3.txt
    88  
    89  Dedupe can be run non interactively using the ` + "`" + `--dedupe-mode` + "`" + ` flag or by using an extra parameter with the same value
    90  
    91    * ` + "`" + `--dedupe-mode interactive` + "`" + ` - interactive as above.
    92    * ` + "`" + `--dedupe-mode skip` + "`" + ` - removes identical files then skips anything left.
    93    * ` + "`" + `--dedupe-mode first` + "`" + ` - removes identical files then keeps the first one.
    94    * ` + "`" + `--dedupe-mode newest` + "`" + ` - removes identical files then keeps the newest one.
    95    * ` + "`" + `--dedupe-mode oldest` + "`" + ` - removes identical files then keeps the oldest one.
    96    * ` + "`" + `--dedupe-mode largest` + "`" + ` - removes identical files then keeps the largest one.
    97    * ` + "`" + `--dedupe-mode smallest` + "`" + ` - removes identical files then keeps the smallest one.
    98    * ` + "`" + `--dedupe-mode rename` + "`" + ` - removes identical files then renames the rest to be different.
    99  
   100  For example to rename all the identically named photos in your Google Photos directory, do
   101  
   102      rclone dedupe --dedupe-mode rename "drive:Google Photos"
   103  
   104  Or
   105  
   106      rclone dedupe rename "drive:Google Photos"
   107  `,
   108  	Run: func(command *cobra.Command, args []string) {
   109  		cmd.CheckArgs(1, 2, command, args)
   110  		if len(args) > 1 {
   111  			err := dedupeMode.Set(args[0])
   112  			if err != nil {
   113  				log.Fatal(err)
   114  			}
   115  			args = args[1:]
   116  		}
   117  		fdst := cmd.NewFsSrc(args)
   118  		cmd.Run(false, false, command, func() error {
   119  			return operations.Deduplicate(context.Background(), fdst, dedupeMode)
   120  		})
   121  	},
   122  }