github.com/ncw/rclone@v1.48.1-0.20190724201158-a35aa1360e3e/cmd/dedupe/dedupe.go (about)

     1  package dedupe
     2  
     3  import (
     4  	"context"
     5  	"log"
     6  
     7  	"github.com/ncw/rclone/cmd"
     8  	"github.com/ncw/rclone/fs/operations"
     9  	"github.com/spf13/cobra"
    10  )
    11  
    12  var (
    13  	dedupeMode = operations.DeduplicateInteractive
    14  )
    15  
    16  func init() {
    17  	cmd.Root.AddCommand(commandDefintion)
    18  	commandDefintion.Flags().VarP(&dedupeMode, "dedupe-mode", "", "Dedupe mode interactive|skip|first|newest|oldest|rename.")
    19  }
    20  
    21  var commandDefintion = &cobra.Command{
    22  	Use:   "dedupe [mode] remote:path",
    23  	Short: `Interactively find duplicate files and delete/rename them.`,
    24  	Long: `
    25  By default ` + "`" + `dedupe` + "`" + ` interactively finds duplicate files and offers to
    26  delete all but one or rename them to be different. Only useful with
    27  Google Drive which can have duplicate file names.
    28  
    29  In the first pass it will merge directories with the same name.  It
    30  will do this iteratively until all the identical directories have been
    31  merged.
    32  
    33  The ` + "`" + `dedupe` + "`" + ` command will delete all but one of any identical (same
    34  md5sum) files it finds without confirmation.  This means that for most
    35  duplicated files the ` + "`" + `dedupe` + "`" + ` command will not be interactive.  You
    36  can use ` + "`" + `--dry-run` + "`" + ` to see what would happen without doing anything.
    37  
    38  Here is an example run.
    39  
    40  Before - with duplicates
    41  
    42      $ rclone lsl drive:dupes
    43        6048320 2016-03-05 16:23:16.798000000 one.txt
    44        6048320 2016-03-05 16:23:11.775000000 one.txt
    45         564374 2016-03-05 16:23:06.731000000 one.txt
    46        6048320 2016-03-05 16:18:26.092000000 one.txt
    47        6048320 2016-03-05 16:22:46.185000000 two.txt
    48        1744073 2016-03-05 16:22:38.104000000 two.txt
    49         564374 2016-03-05 16:22:52.118000000 two.txt
    50  
    51  Now the ` + "`" + `dedupe` + "`" + ` session
    52  
    53      $ rclone dedupe drive:dupes
    54      2016/03/05 16:24:37 Google drive root 'dupes': Looking for duplicates using interactive mode.
    55      one.txt: Found 4 duplicates - deleting identical copies
    56      one.txt: Deleting 2/3 identical duplicates (md5sum "1eedaa9fe86fd4b8632e2ac549403b36")
    57      one.txt: 2 duplicates remain
    58        1:      6048320 bytes, 2016-03-05 16:23:16.798000000, md5sum 1eedaa9fe86fd4b8632e2ac549403b36
    59        2:       564374 bytes, 2016-03-05 16:23:06.731000000, md5sum 7594e7dc9fc28f727c42ee3e0749de81
    60      s) Skip and do nothing
    61      k) Keep just one (choose which in next step)
    62      r) Rename all to be different (by changing file.jpg to file-1.jpg)
    63      s/k/r> k
    64      Enter the number of the file to keep> 1
    65      one.txt: Deleted 1 extra copies
    66      two.txt: Found 3 duplicates - deleting identical copies
    67      two.txt: 3 duplicates remain
    68        1:       564374 bytes, 2016-03-05 16:22:52.118000000, md5sum 7594e7dc9fc28f727c42ee3e0749de81
    69        2:      6048320 bytes, 2016-03-05 16:22:46.185000000, md5sum 1eedaa9fe86fd4b8632e2ac549403b36
    70        3:      1744073 bytes, 2016-03-05 16:22:38.104000000, md5sum 851957f7fb6f0bc4ce76be966d336802
    71      s) Skip and do nothing
    72      k) Keep just one (choose which in next step)
    73      r) Rename all to be different (by changing file.jpg to file-1.jpg)
    74      s/k/r> r
    75      two-1.txt: renamed from: two.txt
    76      two-2.txt: renamed from: two.txt
    77      two-3.txt: renamed from: two.txt
    78  
    79  The result being
    80  
    81      $ rclone lsl drive:dupes
    82        6048320 2016-03-05 16:23:16.798000000 one.txt
    83         564374 2016-03-05 16:22:52.118000000 two-1.txt
    84        6048320 2016-03-05 16:22:46.185000000 two-2.txt
    85        1744073 2016-03-05 16:22:38.104000000 two-3.txt
    86  
    87  Dedupe can be run non interactively using the ` + "`" + `--dedupe-mode` + "`" + ` flag or by using an extra parameter with the same value
    88  
    89    * ` + "`" + `--dedupe-mode interactive` + "`" + ` - interactive as above.
    90    * ` + "`" + `--dedupe-mode skip` + "`" + ` - removes identical files then skips anything left.
    91    * ` + "`" + `--dedupe-mode first` + "`" + ` - removes identical files then keeps the first one.
    92    * ` + "`" + `--dedupe-mode newest` + "`" + ` - removes identical files then keeps the newest one.
    93    * ` + "`" + `--dedupe-mode oldest` + "`" + ` - removes identical files then keeps the oldest one.
    94    * ` + "`" + `--dedupe-mode largest` + "`" + ` - removes identical files then keeps the largest one.
    95    * ` + "`" + `--dedupe-mode rename` + "`" + ` - removes identical files then renames the rest to be different.
    96  
    97  For example to rename all the identically named photos in your Google Photos directory, do
    98  
    99      rclone dedupe --dedupe-mode rename "drive:Google Photos"
   100  
   101  Or
   102  
   103      rclone dedupe rename "drive:Google Photos"
   104  `,
   105  	Run: func(command *cobra.Command, args []string) {
   106  		cmd.CheckArgs(1, 2, command, args)
   107  		if len(args) > 1 {
   108  			err := dedupeMode.Set(args[0])
   109  			if err != nil {
   110  				log.Fatal(err)
   111  			}
   112  			args = args[1:]
   113  		}
   114  		fdst := cmd.NewFsSrc(args)
   115  		cmd.Run(false, false, command, func() error {
   116  			return operations.Deduplicate(context.Background(), fdst, dedupeMode)
   117  		})
   118  	},
   119  }