github.com/shivakar/gdupes@v0.0.0-20180726052558-d5c070c306d0/gdupes/run.go (about)

     1  package gdupes
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"os"
     7  	"sort"
     8  	"sync"
     9  	"time"
    10  )
    11  
    12  // checkAddDirectory adds a directory to the list of directories if not already
    13  // present
    14  func checkAddDirectory(dirs []string, d string) []string {
    15  	for _, v := range dirs {
    16  		if v == d {
    17  			return dirs
    18  		}
    19  	}
    20  	return append(dirs, d)
    21  }
    22  
    23  // Run orchestrates gdupes execution
    24  func Run(c *Config, args []string) ([][]string, error) {
    25  	st := time.Now()
    26  	if c.Writer == nil {
    27  		c.Writer = os.Stdout
    28  	}
    29  
    30  	if c.PrintVersion {
    31  		fmt.Fprintf(c.Writer, "gdupes v%s\n", VERSION)
    32  		return nil, nil
    33  	}
    34  	if len(args) < 1 {
    35  		return nil, errors.New("must specify at least one directory to scan")
    36  	}
    37  	for _, d := range args {
    38  		if fi, err := os.Stat(d); err != nil || !fi.IsDir() {
    39  			return nil, fmt.Errorf("directory '%s' does not exist", d)
    40  		}
    41  		c.Directories = checkAddDirectory(c.Directories, d)
    42  	}
    43  
    44  	var wg sync.WaitGroup
    45  	var lock sync.Mutex
    46  
    47  	filesToProcess := make(chan string, 500)
    48  	fileHashes := make(map[string]FileMetaSlice)
    49  
    50  	// Populating filesToProcess
    51  	wg.Add(1)
    52  	go PopulateFiles(c, filesToProcess, c.Directories, &wg)
    53  
    54  	// Creating workers to process
    55  	wg.Add(c.NumWorkers)
    56  	for i := 0; i < c.NumWorkers; i++ {
    57  		go ProcessFiles(c, filesToProcess, fileHashes, &lock, &wg)
    58  	}
    59  	wg.Wait()
    60  
    61  	nSets := 0
    62  	nDups := 0
    63  	tSize := int64(0)
    64  
    65  	out := make([][]string, 0)
    66  	for _, v := range fileHashes {
    67  		if len(v) > 1 {
    68  			nSets++
    69  			nDups += len(v) - 1
    70  			for i, fm := range v {
    71  				if i > 0 {
    72  					tSize += fm.Info.Size()
    73  				}
    74  			}
    75  			duplicates := v.GetFilenames()
    76  			sort.Strings(duplicates)
    77  			out = append(out, duplicates)
    78  		}
    79  	}
    80  
    81  	suffix := "\n"
    82  	if c.Sameline {
    83  		suffix = " "
    84  	}
    85  	if !c.Summarize {
    86  		for _, s := range out {
    87  			for _, v := range s {
    88  				fmt.Fprintf(c.Writer, "%s%s", v, suffix)
    89  			}
    90  			fmt.Fprintln(c.Writer)
    91  		}
    92  	}
    93  
    94  	if c.Summarize {
    95  		fmt.Fprintf(c.Writer, "%d duplicate files (in %d sets), occupying %s.\n",
    96  			nDups, nSets, HumanizeSize(float64(tSize)))
    97  		fmt.Fprintf(c.Writer, "Total time for processing: %v\n", time.Since(st))
    98  	}
    99  
   100  	return out, nil
   101  }