github.com/mckael/restic@v0.8.3/internal/archiver/archiver.go

github.com/mckael/restic@v0.8.3/internal/archiver/archiver.go (about)

     1  package archiver
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"path/filepath"
    10  	"sort"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/restic/restic/internal/errors"
    15  	"github.com/restic/restic/internal/restic"
    16  	"github.com/restic/restic/internal/walk"
    17  
    18  	"github.com/restic/restic/internal/debug"
    19  	"github.com/restic/restic/internal/fs"
    20  	"github.com/restic/restic/internal/pipe"
    21  
    22  	"github.com/restic/chunker"
    23  )
    24  
    25  const (
    26  	maxConcurrentBlobs = 32
    27  	maxConcurrency     = 10
    28  )
    29  
    30  var archiverPrintWarnings = func(path string, fi os.FileInfo, err error) {
    31  	fmt.Fprintf(os.Stderr, "warning for %v: %v", path, err)
    32  }
    33  var archiverAllowAllFiles = func(string, os.FileInfo) bool { return true }
    34  
    35  // Archiver is used to backup a set of directories.
    36  type Archiver struct {
    37  	repo       restic.Repository
    38  	knownBlobs struct {
    39  		restic.IDSet
    40  		sync.Mutex
    41  	}
    42  
    43  	blobToken chan struct{}
    44  
    45  	Warn         func(dir string, fi os.FileInfo, err error)
    46  	SelectFilter pipe.SelectFunc
    47  	Excludes     []string
    48  
    49  	WithAccessTime bool
    50  }
    51  
    52  // New returns a new archiver.
    53  func New(repo restic.Repository) *Archiver {
    54  	arch := &Archiver{
    55  		repo:      repo,
    56  		blobToken: make(chan struct{}, maxConcurrentBlobs),
    57  		knownBlobs: struct {
    58  			restic.IDSet
    59  			sync.Mutex
    60  		}{
    61  			IDSet: restic.NewIDSet(),
    62  		},
    63  	}
    64  
    65  	for i := 0; i < maxConcurrentBlobs; i++ {
    66  		arch.blobToken <- struct{}{}
    67  	}
    68  
    69  	arch.Warn = archiverPrintWarnings
    70  	arch.SelectFilter = archiverAllowAllFiles
    71  
    72  	return arch
    73  }
    74  
    75  // isKnownBlob returns true iff the blob is not yet in the list of known blobs.
    76  // When the blob is not known, false is returned and the blob is added to the
    77  // list. This means that the caller false is returned to is responsible to save
    78  // the blob to the backend.
    79  func (arch *Archiver) isKnownBlob(id restic.ID, t restic.BlobType) bool {
    80  	arch.knownBlobs.Lock()
    81  	defer arch.knownBlobs.Unlock()
    82  
    83  	if arch.knownBlobs.Has(id) {
    84  		return true
    85  	}
    86  
    87  	arch.knownBlobs.Insert(id)
    88  
    89  	if arch.repo.Index().Has(id, t) {
    90  		return true
    91  	}
    92  
    93  	return false
    94  }
    95  
    96  // Save stores a blob read from rd in the repository.
    97  func (arch *Archiver) Save(ctx context.Context, t restic.BlobType, data []byte, id restic.ID) error {
    98  	debug.Log("Save(%v, %v)\n", t, id)
    99  
   100  	if arch.isKnownBlob(id, restic.DataBlob) {
   101  		debug.Log("blob %v is known\n", id)
   102  		return nil
   103  	}
   104  
   105  	_, err := arch.repo.SaveBlob(ctx, t, data, id)
   106  	if err != nil {
   107  		debug.Log("Save(%v, %v): error %v\n", t, id, err)
   108  		return err
   109  	}
   110  
   111  	debug.Log("Save(%v, %v): new blob\n", t, id)
   112  	return nil
   113  }
   114  
   115  // SaveTreeJSON stores a tree in the repository.
   116  func (arch *Archiver) SaveTreeJSON(ctx context.Context, tree *restic.Tree) (restic.ID, error) {
   117  	data, err := json.Marshal(tree)
   118  	if err != nil {
   119  		return restic.ID{}, errors.Wrap(err, "Marshal")
   120  	}
   121  	data = append(data, '\n')
   122  
   123  	// check if tree has been saved before
   124  	id := restic.Hash(data)
   125  	if arch.isKnownBlob(id, restic.TreeBlob) {
   126  		return id, nil
   127  	}
   128  
   129  	return arch.repo.SaveBlob(ctx, restic.TreeBlob, data, id)
   130  }
   131  
   132  func (arch *Archiver) reloadFileIfChanged(node *restic.Node, file fs.File) (*restic.Node, error) {
   133  	if !arch.WithAccessTime {
   134  		node.AccessTime = node.ModTime
   135  	}
   136  
   137  	fi, err := file.Stat()
   138  	if err != nil {
   139  		return nil, errors.Wrap(err, "restic.Stat")
   140  	}
   141  
   142  	if fi.ModTime().Equal(node.ModTime) {
   143  		return node, nil
   144  	}
   145  
   146  	arch.Warn(node.Path, fi, errors.New("file has changed"))
   147  
   148  	node, err = restic.NodeFromFileInfo(node.Path, fi)
   149  	if err != nil {
   150  		debug.Log("restic.NodeFromFileInfo returned error for %v: %v", node.Path, err)
   151  		arch.Warn(node.Path, fi, err)
   152  	}
   153  
   154  	if !arch.WithAccessTime {
   155  		node.AccessTime = node.ModTime
   156  	}
   157  
   158  	return node, nil
   159  }
   160  
   161  type saveResult struct {
   162  	id    restic.ID
   163  	bytes uint64
   164  }
   165  
   166  func (arch *Archiver) saveChunk(ctx context.Context, chunk chunker.Chunk, p *restic.Progress, token struct{}, file fs.File, resultChannel chan<- saveResult) {
   167  	defer freeBuf(chunk.Data)
   168  
   169  	id := restic.Hash(chunk.Data)
   170  	err := arch.Save(ctx, restic.DataBlob, chunk.Data, id)
   171  	// TODO handle error
   172  	if err != nil {
   173  		debug.Log("Save(%v) failed: %v", id, err)
   174  		fmt.Printf("\nerror while saving data to the repo: %+v\n", err)
   175  		panic(err)
   176  	}
   177  
   178  	p.Report(restic.Stat{Bytes: uint64(chunk.Length)})
   179  	arch.blobToken <- token
   180  	resultChannel <- saveResult{id: id, bytes: uint64(chunk.Length)}
   181  }
   182  
   183  func waitForResults(resultChannels [](<-chan saveResult)) ([]saveResult, error) {
   184  	results := []saveResult{}
   185  
   186  	for _, ch := range resultChannels {
   187  		results = append(results, <-ch)
   188  	}
   189  
   190  	if len(results) != len(resultChannels) {
   191  		return nil, errors.Errorf("chunker returned %v chunks, but only %v blobs saved", len(resultChannels), len(results))
   192  	}
   193  
   194  	return results, nil
   195  }
   196  
   197  func updateNodeContent(node *restic.Node, results []saveResult) error {
   198  	debug.Log("checking size for file %s", node.Path)
   199  
   200  	var bytes uint64
   201  	node.Content = make([]restic.ID, len(results))
   202  
   203  	for i, b := range results {
   204  		node.Content[i] = b.id
   205  		bytes += b.bytes
   206  
   207  		debug.Log("  adding blob %s, %d bytes", b.id, b.bytes)
   208  	}
   209  
   210  	if bytes != node.Size {
   211  		fmt.Fprintf(os.Stderr, "warning for %v: expected %d bytes, saved %d bytes\n", node.Path, node.Size, bytes)
   212  	}
   213  
   214  	debug.Log("SaveFile(%q): %v blobs\n", node.Path, len(results))
   215  
   216  	return nil
   217  }
   218  
   219  // SaveFile stores the content of the file on the backend as a Blob by calling
   220  // Save for each chunk.
   221  func (arch *Archiver) SaveFile(ctx context.Context, p *restic.Progress, node *restic.Node) (*restic.Node, error) {
   222  	file, err := fs.Open(node.Path)
   223  	if err != nil {
   224  		return node, errors.Wrap(err, "Open")
   225  	}
   226  	defer file.Close()
   227  
   228  	debug.RunHook("archiver.SaveFile", node.Path)
   229  
   230  	node, err = arch.reloadFileIfChanged(node, file)
   231  	if err != nil {
   232  		return node, err
   233  	}
   234  
   235  	chnker := chunker.New(file, arch.repo.Config().ChunkerPolynomial)
   236  	resultChannels := [](<-chan saveResult){}
   237  
   238  	for {
   239  		chunk, err := chnker.Next(getBuf())
   240  		if errors.Cause(err) == io.EOF {
   241  			break
   242  		}
   243  
   244  		if err != nil {
   245  			return node, errors.Wrap(err, "chunker.Next")
   246  		}
   247  
   248  		resCh := make(chan saveResult, 1)
   249  		go arch.saveChunk(ctx, chunk, p, <-arch.blobToken, file, resCh)
   250  		resultChannels = append(resultChannels, resCh)
   251  	}
   252  
   253  	results, err := waitForResults(resultChannels)
   254  	if err != nil {
   255  		return node, err
   256  	}
   257  	err = updateNodeContent(node, results)
   258  
   259  	return node, err
   260  }
   261  
   262  func (arch *Archiver) fileWorker(ctx context.Context, wg *sync.WaitGroup, p *restic.Progress, entCh <-chan pipe.Entry) {
   263  	defer func() {
   264  		debug.Log("done")
   265  		wg.Done()
   266  	}()
   267  	for {
   268  		select {
   269  		case e, ok := <-entCh:
   270  			if !ok {
   271  				// channel is closed
   272  				return
   273  			}
   274  
   275  			debug.Log("got job %v", e)
   276  
   277  			// check for errors
   278  			if e.Error() != nil {
   279  				debug.Log("job %v has errors: %v", e.Path(), e.Error())
   280  				// TODO: integrate error reporting
   281  				fmt.Fprintf(os.Stderr, "error for %v: %v\n", e.Path(), e.Error())
   282  				// ignore this file
   283  				e.Result() <- nil
   284  				p.Report(restic.Stat{Errors: 1})
   285  				continue
   286  			}
   287  
   288  			node, err := restic.NodeFromFileInfo(e.Fullpath(), e.Info())
   289  			if err != nil {
   290  				debug.Log("restic.NodeFromFileInfo returned error for %v: %v", node.Path, err)
   291  				arch.Warn(e.Fullpath(), e.Info(), err)
   292  			}
   293  
   294  			if !arch.WithAccessTime {
   295  				node.AccessTime = node.ModTime
   296  			}
   297  
   298  			// try to use old node, if present
   299  			if e.Node != nil {
   300  				debug.Log("   %v use old data", e.Path())
   301  
   302  				oldNode := e.Node.(*restic.Node)
   303  				// check if all content is still available in the repository
   304  				contentMissing := false
   305  				for _, blob := range oldNode.Content {
   306  					if !arch.repo.Index().Has(blob, restic.DataBlob) {
   307  						debug.Log("   %v not using old data, %v is missing", e.Path(), blob)
   308  						contentMissing = true
   309  						break
   310  					}
   311  				}
   312  
   313  				if !contentMissing {
   314  					node.Content = oldNode.Content
   315  					debug.Log("   %v content is complete", e.Path())
   316  				}
   317  			} else {
   318  				debug.Log("   %v no old data", e.Path())
   319  			}
   320  
   321  			// otherwise read file normally
   322  			if node.Type == "file" && len(node.Content) == 0 {
   323  				debug.Log("   read and save %v", e.Path())
   324  				node, err = arch.SaveFile(ctx, p, node)
   325  				if err != nil {
   326  					fmt.Fprintf(os.Stderr, "error for %v: %v\n", node.Path, err)
   327  					arch.Warn(e.Path(), nil, err)
   328  					// ignore this file
   329  					e.Result() <- nil
   330  					p.Report(restic.Stat{Errors: 1})
   331  					continue
   332  				}
   333  			} else {
   334  				// report old data size
   335  				p.Report(restic.Stat{Bytes: node.Size})
   336  			}
   337  
   338  			debug.Log("   processed %v, %d blobs", e.Path(), len(node.Content))
   339  			e.Result() <- node
   340  			p.Report(restic.Stat{Files: 1})
   341  		case <-ctx.Done():
   342  			// pipeline was cancelled
   343  			return
   344  		}
   345  	}
   346  }
   347  
   348  func (arch *Archiver) dirWorker(ctx context.Context, wg *sync.WaitGroup, p *restic.Progress, dirCh <-chan pipe.Dir) {
   349  	debug.Log("start")
   350  	defer func() {
   351  		debug.Log("done")
   352  		wg.Done()
   353  	}()
   354  	for {
   355  		select {
   356  		case dir, ok := <-dirCh:
   357  			if !ok {
   358  				// channel is closed
   359  				return
   360  			}
   361  			debug.Log("save dir %v (%d entries), error %v\n", dir.Path(), len(dir.Entries), dir.Error())
   362  
   363  			// ignore dir nodes with errors
   364  			if dir.Error() != nil {
   365  				fmt.Fprintf(os.Stderr, "error walking dir %v: %v\n", dir.Path(), dir.Error())
   366  				dir.Result() <- nil
   367  				p.Report(restic.Stat{Errors: 1})
   368  				continue
   369  			}
   370  
   371  			tree := restic.NewTree()
   372  
   373  			// wait for all content
   374  			for _, ch := range dir.Entries {
   375  				debug.Log("receiving result from %v", ch)
   376  				res := <-ch
   377  
   378  				// if we get a nil pointer here, an error has happened while
   379  				// processing this entry. Ignore it for now.
   380  				if res == nil {
   381  					debug.Log("got nil result?")
   382  					continue
   383  				}
   384  
   385  				// else insert node
   386  				node := res.(*restic.Node)
   387  
   388  				if node.Type == "dir" {
   389  					debug.Log("got tree node for %s: %v", node.Path, node.Subtree)
   390  
   391  					if node.Subtree == nil {
   392  						debug.Log("subtree is nil for node %v", node.Path)
   393  						continue
   394  					}
   395  
   396  					if node.Subtree.IsNull() {
   397  						panic("invalid null subtree restic.ID")
   398  					}
   399  				}
   400  
   401  				// insert node into tree, resolve name collisions
   402  				name := node.Name
   403  				i := 0
   404  				for {
   405  					i++
   406  					err := tree.Insert(node)
   407  					if err == nil {
   408  						break
   409  					}
   410  
   411  					newName := fmt.Sprintf("%v-%d", name, i)
   412  					fmt.Fprintf(os.Stderr, "%v: name collision for %q, renaming to %q\n", filepath.Dir(node.Path), node.Name, newName)
   413  					node.Name = newName
   414  				}
   415  
   416  			}
   417  
   418  			node := &restic.Node{}
   419  
   420  			if dir.Path() != "" && dir.Info() != nil {
   421  				n, err := restic.NodeFromFileInfo(dir.Fullpath(), dir.Info())
   422  				if err != nil {
   423  					arch.Warn(dir.Path(), dir.Info(), err)
   424  				}
   425  				node = n
   426  
   427  				if !arch.WithAccessTime {
   428  					node.AccessTime = node.ModTime
   429  				}
   430  			}
   431  
   432  			if err := dir.Error(); err != nil {
   433  				node.Error = err.Error()
   434  			}
   435  
   436  			id, err := arch.SaveTreeJSON(ctx, tree)
   437  			if err != nil {
   438  				panic(err)
   439  			}
   440  			debug.Log("save tree for %s: %v", dir.Path(), id)
   441  			if id.IsNull() {
   442  				panic("invalid null subtree restic.ID return from SaveTreeJSON()")
   443  			}
   444  
   445  			node.Subtree = &id
   446  
   447  			debug.Log("sending result to %v", dir.Result())
   448  
   449  			dir.Result() <- node
   450  			if dir.Path() != "" {
   451  				p.Report(restic.Stat{Dirs: 1})
   452  			}
   453  		case <-ctx.Done():
   454  			// pipeline was cancelled
   455  			return
   456  		}
   457  	}
   458  }
   459  
   460  type archivePipe struct {
   461  	Old <-chan walk.TreeJob
   462  	New <-chan pipe.Job
   463  }
   464  
   465  func copyJobs(ctx context.Context, in <-chan pipe.Job, out chan<- pipe.Job) {
   466  	var (
   467  		// disable sending on the outCh until we received a job
   468  		outCh chan<- pipe.Job
   469  		// enable receiving from in
   470  		inCh = in
   471  		job  pipe.Job
   472  		ok   bool
   473  	)
   474  
   475  	for {
   476  		select {
   477  		case <-ctx.Done():
   478  			return
   479  		case job, ok = <-inCh:
   480  			if !ok {
   481  				// input channel closed, we're done
   482  				debug.Log("input channel closed, we're done")
   483  				return
   484  			}
   485  			inCh = nil
   486  			outCh = out
   487  		case outCh <- job:
   488  			outCh = nil
   489  			inCh = in
   490  		}
   491  	}
   492  }
   493  
   494  type archiveJob struct {
   495  	hasOld bool
   496  	old    walk.TreeJob
   497  	new    pipe.Job
   498  }
   499  
   500  func (a *archivePipe) compare(ctx context.Context, out chan<- pipe.Job) {
   501  	defer func() {
   502  		close(out)
   503  		debug.Log("done")
   504  	}()
   505  
   506  	debug.Log("start")
   507  	var (
   508  		loadOld, loadNew bool = true, true
   509  		ok               bool
   510  		oldJob           walk.TreeJob
   511  		newJob           pipe.Job
   512  	)
   513  
   514  	for {
   515  		if loadOld {
   516  			oldJob, ok = <-a.Old
   517  			// if the old channel is closed, just pass through the new jobs
   518  			if !ok {
   519  				debug.Log("old channel is closed, copy from new channel")
   520  
   521  				// handle remaining newJob
   522  				if !loadNew {
   523  					out <- archiveJob{new: newJob}.Copy()
   524  				}
   525  
   526  				copyJobs(ctx, a.New, out)
   527  				return
   528  			}
   529  
   530  			loadOld = false
   531  		}
   532  
   533  		if loadNew {
   534  			newJob, ok = <-a.New
   535  			// if the new channel is closed, there are no more files in the current snapshot, return
   536  			if !ok {
   537  				debug.Log("new channel is closed, we're done")
   538  				return
   539  			}
   540  
   541  			loadNew = false
   542  		}
   543  
   544  		debug.Log("old job: %v", oldJob.Path)
   545  		debug.Log("new job: %v", newJob.Path())
   546  
   547  		// at this point we have received an old job as well as a new job, compare paths
   548  		file1 := oldJob.Path
   549  		file2 := newJob.Path()
   550  
   551  		dir1 := filepath.Dir(file1)
   552  		dir2 := filepath.Dir(file2)
   553  
   554  		if file1 == file2 {
   555  			debug.Log("    same filename %q", file1)
   556  
   557  			// send job
   558  			out <- archiveJob{hasOld: true, old: oldJob, new: newJob}.Copy()
   559  			loadOld = true
   560  			loadNew = true
   561  			continue
   562  		} else if dir1 < dir2 {
   563  			debug.Log("    %q < %q, file %q added", dir1, dir2, file2)
   564  			// file is new, send new job and load new
   565  			loadNew = true
   566  			out <- archiveJob{new: newJob}.Copy()
   567  			continue
   568  		} else if dir1 == dir2 {
   569  			if file1 < file2 {
   570  				debug.Log("    %q < %q, file %q removed", file1, file2, file1)
   571  				// file has been removed, load new old
   572  				loadOld = true
   573  				continue
   574  			} else {
   575  				debug.Log("    %q > %q, file %q added", file1, file2, file2)
   576  				// file is new, send new job and load new
   577  				loadNew = true
   578  				out <- archiveJob{new: newJob}.Copy()
   579  				continue
   580  			}
   581  		}
   582  
   583  		debug.Log("    %q > %q, file %q removed", file1, file2, file1)
   584  		// file has been removed, throw away old job and load new
   585  		loadOld = true
   586  	}
   587  }
   588  
   589  func (j archiveJob) Copy() pipe.Job {
   590  	if !j.hasOld {
   591  		return j.new
   592  	}
   593  
   594  	// handle files
   595  	if isRegularFile(j.new.Info()) {
   596  		debug.Log("   job %v is file", j.new.Path())
   597  
   598  		// if type has changed, return new job directly
   599  		if j.old.Node == nil {
   600  			return j.new
   601  		}
   602  
   603  		// if file is newer, return the new job
   604  		if j.old.Node.IsNewer(j.new.Fullpath(), j.new.Info()) {
   605  			debug.Log("   job %v is newer", j.new.Path())
   606  			return j.new
   607  		}
   608  
   609  		debug.Log("   job %v add old data", j.new.Path())
   610  		// otherwise annotate job with old data
   611  		e := j.new.(pipe.Entry)
   612  		e.Node = j.old.Node
   613  		return e
   614  	}
   615  
   616  	// dirs and other types are just returned
   617  	return j.new
   618  }
   619  
   620  const saveIndexTime = 30 * time.Second
   621  
   622  // saveIndexes regularly queries the master index for full indexes and saves them.
   623  func (arch *Archiver) saveIndexes(saveCtx, shutdownCtx context.Context, wg *sync.WaitGroup) {
   624  	defer wg.Done()
   625  
   626  	ticker := time.NewTicker(saveIndexTime)
   627  	defer ticker.Stop()
   628  
   629  	for {
   630  		select {
   631  		case <-saveCtx.Done():
   632  			return
   633  		case <-shutdownCtx.Done():
   634  			return
   635  		case <-ticker.C:
   636  			debug.Log("saving full indexes")
   637  			err := arch.repo.SaveFullIndex(saveCtx)
   638  			if err != nil {
   639  				debug.Log("save indexes returned an error: %v", err)
   640  				fmt.Fprintf(os.Stderr, "error saving preliminary index: %v\n", err)
   641  			}
   642  		}
   643  	}
   644  }
   645  
   646  // unique returns a slice that only contains unique strings.
   647  func unique(items []string) []string {
   648  	seen := make(map[string]struct{})
   649  	for _, item := range items {
   650  		seen[item] = struct{}{}
   651  	}
   652  
   653  	items = items[:0]
   654  	for item := range seen {
   655  		items = append(items, item)
   656  	}
   657  	return items
   658  }
   659  
   660  // baseNameSlice allows sorting paths by basename.
   661  //
   662  // Snapshots have contents sorted by basename, but we receive full paths.
   663  // For the archivePipe to advance them in pairs, we traverse the given
   664  // paths in the same order as the snapshot.
   665  type baseNameSlice []string
   666  
   667  func (p baseNameSlice) Len() int           { return len(p) }
   668  func (p baseNameSlice) Less(i, j int) bool { return filepath.Base(p[i]) < filepath.Base(p[j]) }
   669  func (p baseNameSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   670  
   671  // Snapshot creates a snapshot of the given paths. If parentrestic.ID is set, this is
   672  // used to compare the files to the ones archived at the time this snapshot was
   673  // taken.
   674  func (arch *Archiver) Snapshot(ctx context.Context, p *restic.Progress, paths, tags []string, hostname string, parentID *restic.ID, time time.Time) (*restic.Snapshot, restic.ID, error) {
   675  	paths = unique(paths)
   676  	sort.Sort(baseNameSlice(paths))
   677  
   678  	debug.Log("start for %v", paths)
   679  
   680  	debug.RunHook("Archiver.Snapshot", nil)
   681  
   682  	// signal the whole pipeline to stop
   683  	var err error
   684  
   685  	p.Start()
   686  	defer p.Done()
   687  
   688  	// create new snapshot
   689  	sn, err := restic.NewSnapshot(paths, tags, hostname, time)
   690  	if err != nil {
   691  		return nil, restic.ID{}, err
   692  	}
   693  	sn.Excludes = arch.Excludes
   694  
   695  	jobs := archivePipe{}
   696  
   697  	// use parent snapshot (if some was given)
   698  	if parentID != nil {
   699  		sn.Parent = parentID
   700  
   701  		// load parent snapshot
   702  		parent, err := restic.LoadSnapshot(ctx, arch.repo, *parentID)
   703  		if err != nil {
   704  			return nil, restic.ID{}, err
   705  		}
   706  
   707  		// start walker on old tree
   708  		ch := make(chan walk.TreeJob)
   709  		go walk.Tree(ctx, arch.repo, *parent.Tree, ch)
   710  		jobs.Old = ch
   711  	} else {
   712  		// use closed channel
   713  		ch := make(chan walk.TreeJob)
   714  		close(ch)
   715  		jobs.Old = ch
   716  	}
   717  
   718  	// start walker
   719  	pipeCh := make(chan pipe.Job)
   720  	resCh := make(chan pipe.Result, 1)
   721  	go func() {
   722  		pipe.Walk(ctx, paths, arch.SelectFilter, pipeCh, resCh)
   723  		debug.Log("pipe.Walk done")
   724  	}()
   725  	jobs.New = pipeCh
   726  
   727  	ch := make(chan pipe.Job)
   728  	go jobs.compare(ctx, ch)
   729  
   730  	var wg sync.WaitGroup
   731  	entCh := make(chan pipe.Entry)
   732  	dirCh := make(chan pipe.Dir)
   733  
   734  	// split
   735  	wg.Add(1)
   736  	go func() {
   737  		pipe.Split(ch, dirCh, entCh)
   738  		debug.Log("split done")
   739  		close(dirCh)
   740  		close(entCh)
   741  		wg.Done()
   742  	}()
   743  
   744  	// run workers
   745  	for i := 0; i < maxConcurrency; i++ {
   746  		wg.Add(2)
   747  		go arch.fileWorker(ctx, &wg, p, entCh)
   748  		go arch.dirWorker(ctx, &wg, p, dirCh)
   749  	}
   750  
   751  	// run index saver
   752  	var wgIndexSaver sync.WaitGroup
   753  	shutdownCtx, indexShutdown := context.WithCancel(ctx)
   754  	wgIndexSaver.Add(1)
   755  	go arch.saveIndexes(ctx, shutdownCtx, &wgIndexSaver)
   756  
   757  	// wait for all workers to terminate
   758  	debug.Log("wait for workers")
   759  	wg.Wait()
   760  
   761  	// stop index saver
   762  	indexShutdown()
   763  	wgIndexSaver.Wait()
   764  
   765  	debug.Log("workers terminated")
   766  
   767  	// flush repository
   768  	err = arch.repo.Flush(ctx)
   769  	if err != nil {
   770  		return nil, restic.ID{}, err
   771  	}
   772  
   773  	// receive the top-level tree
   774  	root := (<-resCh).(*restic.Node)
   775  	debug.Log("root node received: %v", root.Subtree)
   776  	sn.Tree = root.Subtree
   777  
   778  	// load top-level tree again to see if it is empty
   779  	toptree, err := arch.repo.LoadTree(ctx, *root.Subtree)
   780  	if err != nil {
   781  		return nil, restic.ID{}, err
   782  	}
   783  
   784  	if len(toptree.Nodes) == 0 {
   785  		return nil, restic.ID{}, errors.Fatal("no files/dirs saved, refusing to create empty snapshot")
   786  	}
   787  
   788  	// save index
   789  	err = arch.repo.SaveIndex(ctx)
   790  	if err != nil {
   791  		debug.Log("error saving index: %v", err)
   792  		return nil, restic.ID{}, err
   793  	}
   794  
   795  	debug.Log("saved indexes")
   796  
   797  	// save snapshot
   798  	id, err := arch.repo.SaveJSONUnpacked(ctx, restic.SnapshotFile, sn)
   799  	if err != nil {
   800  		return nil, restic.ID{}, err
   801  	}
   802  
   803  	debug.Log("saved snapshot %v", id)
   804  
   805  	return sn, id, nil
   806  }
   807  
   808  func isRegularFile(fi os.FileInfo) bool {
   809  	if fi == nil {
   810  		return false
   811  	}
   812  
   813  	return fi.Mode()&(os.ModeType|os.ModeCharDevice) == 0
   814  }
   815  
   816  // Scan traverses the dirs to collect restic.Stat information while emitting progress
   817  // information with p.
   818  func Scan(dirs []string, filter pipe.SelectFunc, p *restic.Progress) (restic.Stat, error) {
   819  	p.Start()
   820  	defer p.Done()
   821  
   822  	var stat restic.Stat
   823  
   824  	for _, dir := range dirs {
   825  		debug.Log("Start for %v", dir)
   826  		err := fs.Walk(dir, func(str string, fi os.FileInfo, err error) error {
   827  			// TODO: integrate error reporting
   828  			if err != nil {
   829  				fmt.Fprintf(os.Stderr, "error for %v: %v\n", str, err)
   830  				return nil
   831  			}
   832  			if fi == nil {
   833  				fmt.Fprintf(os.Stderr, "error for %v: FileInfo is nil\n", str)
   834  				return nil
   835  			}
   836  
   837  			if !filter(str, fi) {
   838  				debug.Log("path %v excluded", str)
   839  				if fi.IsDir() {
   840  					return filepath.SkipDir
   841  				}
   842  				return nil
   843  			}
   844  
   845  			s := restic.Stat{}
   846  			if fi.IsDir() {
   847  				s.Dirs++
   848  			} else {
   849  				s.Files++
   850  
   851  				if isRegularFile(fi) {
   852  					s.Bytes += uint64(fi.Size())
   853  				}
   854  			}
   855  
   856  			p.Report(s)
   857  			stat.Add(s)
   858  
   859  			// TODO: handle error?
   860  			return nil
   861  		})
   862  
   863  		debug.Log("Done for %v, err: %v", dir, err)
   864  		if err != nil {
   865  			return restic.Stat{}, errors.Wrap(err, "fs.Walk")
   866  		}
   867  	}
   868  
   869  	return stat, nil
   870  }