github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/cmd/camput/files.go (about)

     1  /*
     2  Copyright 2011 Google Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package main
    18  
    19  import (
    20  	"bufio"
    21  	"crypto/sha1"
    22  	"errors"
    23  	"flag"
    24  	"fmt"
    25  	"hash"
    26  	"io"
    27  	"io/ioutil"
    28  	"log"
    29  	"net/http"
    30  	"os"
    31  	"path/filepath"
    32  	"sort"
    33  	"strconv"
    34  	"strings"
    35  	"sync"
    36  	"sync/atomic"
    37  	"time"
    38  
    39  	"camlistore.org/pkg/blob"
    40  	"camlistore.org/pkg/blobserver"
    41  	statspkg "camlistore.org/pkg/blobserver/stats"
    42  	"camlistore.org/pkg/client"
    43  	"camlistore.org/pkg/client/android"
    44  	"camlistore.org/pkg/cmdmain"
    45  	"camlistore.org/pkg/schema"
    46  )
    47  
    48  type fileCmd struct {
    49  	title string
    50  	tag   string
    51  
    52  	makePermanode     bool // make new, unique permanode of the root (dir or file)
    53  	filePermanodes    bool // make planned permanodes for each file (based on their digest)
    54  	vivify            bool
    55  	exifTime          bool // use metadata (such as in EXIF) to find the creation time of the file
    56  	capCtime          bool // use mtime as creation time of the file, if it would be bigger than modification time
    57  	diskUsage         bool // show "du" disk usage only (dry run mode), don't actually upload
    58  	argsFromInput     bool // Android mode: filenames piped into stdin, one at a time.
    59  	deleteAfterUpload bool // with fileNodes, deletes the input file once uploaded
    60  
    61  	statcache bool
    62  
    63  	// Go into in-memory stats mode only; doesn't actually upload.
    64  	memstats bool
    65  	histo    string // optional histogram output filename
    66  }
    67  
    68  var flagUseSQLiteChildCache bool // Use sqlite for the statcache and havecache.
    69  
    70  func init() {
    71  	cmdmain.RegisterCommand("file", func(flags *flag.FlagSet) cmdmain.CommandRunner {
    72  		cmd := new(fileCmd)
    73  		flags.BoolVar(&cmd.makePermanode, "permanode", false, "Create an associate a new permanode for the uploaded file or directory.")
    74  		flags.BoolVar(&cmd.filePermanodes, "filenodes", false, "Create (if necessary) content-based permanodes for each uploaded file.")
    75  		flags.BoolVar(&cmd.deleteAfterUpload, "delete_after_upload", false, "If using -filenodes, deletes files once they're uploaded, of if they've already been uploaded.")
    76  		flags.BoolVar(&cmd.vivify, "vivify", false,
    77  			"If true, ask the server to create and sign permanode(s) associated with each uploaded"+
    78  				" file. This permits the server to have your signing key. Used mostly with untrusted"+
    79  				" or at-risk clients, such as phones.")
    80  		flags.BoolVar(&cmd.exifTime, "exiftime", false, "Try to use metadata (such as EXIF) to get a stable creation time. If found, used as the replacement for the modtime. Mainly useful with vivify or filenodes.")
    81  		flags.StringVar(&cmd.title, "title", "", "Optional title attribute to set on permanode when using -permanode.")
    82  		flags.StringVar(&cmd.tag, "tag", "", "Optional tag(s) to set on permanode when using -permanode or -filenodes. Single value or comma separated.")
    83  
    84  		flags.BoolVar(&cmd.diskUsage, "du", false, "Dry run mode: only show disk usage information, without upload or statting dest. Used for testing skipDirs configs, mostly.")
    85  
    86  		if debug, _ := strconv.ParseBool(os.Getenv("CAMLI_DEBUG")); debug {
    87  			flags.BoolVar(&cmd.statcache, "statcache", true, "Use the stat cache, assuming unchanged files already uploaded in the past are still there. Fast, but potentially dangerous.")
    88  			flags.BoolVar(&cmd.memstats, "debug-memstats", false, "Enter debug in-memory mode; collecting stats only. Doesn't upload anything.")
    89  			flags.StringVar(&cmd.histo, "debug-histogram-file", "", "Optional file to create and write the blob size for each file uploaded.  For use with GNU R and hist(read.table(\"filename\")$V1). Requires debug-memstats.")
    90  			flags.BoolVar(&cmd.capCtime, "capctime", false, "For file blobs use file modification time as creation time if it would be bigger (newer) than modification time. For stable filenode creation (you can forge mtime, but can't forge ctime).")
    91  			flags.BoolVar(&flagUseSQLiteChildCache, "sqlitecache", false, "Use sqlite for the statcache and havecache instead of a flat cache.")
    92  		} else {
    93  			cmd.statcache = true
    94  		}
    95  		if android.IsChild() {
    96  			flags.BoolVar(&cmd.argsFromInput, "stdinargs", false, "If true, filenames to upload are sent one-per-line on stdin. EOF means to quit the process with exit status 0.")
    97  		}
    98  		flagCacheLog = flags.Bool("logcache", false, "log caching details")
    99  
   100  		return cmd
   101  	})
   102  }
   103  
   104  func (c *fileCmd) Describe() string {
   105  	return "Upload file(s)."
   106  }
   107  
   108  func (c *fileCmd) Usage() {
   109  	fmt.Fprintf(cmdmain.Stderr, "Usage: camput [globalopts] file [fileopts] <file/director(ies)>\n")
   110  }
   111  
   112  func (c *fileCmd) Examples() []string {
   113  	return []string{
   114  		"[opts] <file(s)/director(ies)",
   115  		"--permanode --title='Homedir backup' --tag=backup,homedir $HOME",
   116  		"--filenodes /mnt/camera/DCIM",
   117  	}
   118  }
   119  
   120  func (c *fileCmd) RunCommand(args []string) error {
   121  	if c.vivify {
   122  		if c.makePermanode || c.filePermanodes || c.tag != "" || c.title != "" {
   123  			return cmdmain.UsageError("--vivify excludes any other option")
   124  		}
   125  	}
   126  	if c.title != "" && !c.makePermanode {
   127  		return cmdmain.UsageError("Can't set title without using --permanode")
   128  	}
   129  	if c.tag != "" && !c.makePermanode && !c.filePermanodes {
   130  		return cmdmain.UsageError("Can't set tag without using --permanode or --filenodes")
   131  	}
   132  	if c.histo != "" && !c.memstats {
   133  		return cmdmain.UsageError("Can't use histo without memstats")
   134  	}
   135  	if c.deleteAfterUpload && !c.filePermanodes {
   136  		return cmdmain.UsageError("Can't set use --delete_after_upload without --filenodes")
   137  	}
   138  	up := getUploader()
   139  	if c.memstats {
   140  		sr := new(statspkg.Receiver)
   141  		up.altStatReceiver = sr
   142  		defer func() { DumpStats(sr, c.histo) }()
   143  	}
   144  	c.initCaches(up)
   145  
   146  	if c.makePermanode || c.filePermanodes {
   147  		testSigBlobRef := up.Client.SignerPublicKeyBlobref()
   148  		if !testSigBlobRef.Valid() {
   149  			return cmdmain.UsageError("A GPG key is needed to create permanodes; configure one or use vivify mode.")
   150  		}
   151  	}
   152  	up.fileOpts = &fileOptions{
   153  		permanode: c.filePermanodes,
   154  		tag:       c.tag,
   155  		vivify:    c.vivify,
   156  		exifTime:  c.exifTime,
   157  		capCtime:  c.capCtime,
   158  	}
   159  
   160  	var (
   161  		permaNode *client.PutResult
   162  		lastPut   *client.PutResult
   163  		err       error
   164  	)
   165  	if c.makePermanode {
   166  		if len(args) != 1 {
   167  			return fmt.Errorf("The --permanode flag can only be used with exactly one file or directory argument")
   168  		}
   169  		permaNode, err = up.UploadNewPermanode()
   170  		if err != nil {
   171  			return fmt.Errorf("Uploading permanode: %v", err)
   172  		}
   173  	}
   174  	if c.diskUsage {
   175  		if len(args) != 1 {
   176  			return fmt.Errorf("The --du flag can only be used with exactly one directory argument")
   177  		}
   178  		dir := args[0]
   179  		fi, err := up.stat(dir)
   180  		if err != nil {
   181  			return err
   182  		}
   183  		if !fi.IsDir() {
   184  			return fmt.Errorf("%q is not a directory.", dir)
   185  		}
   186  		t := up.NewTreeUpload(dir)
   187  		t.DiskUsageMode = true
   188  		t.Start()
   189  		pr, err := t.Wait()
   190  		if err != nil {
   191  			return err
   192  		}
   193  		handleResult("tree-upload", pr, err)
   194  		return nil
   195  	}
   196  	if c.argsFromInput {
   197  		if len(args) > 0 {
   198  			return errors.New("args not supported with -argsfrominput")
   199  		}
   200  		tu := up.NewRootlessTreeUpload()
   201  		tu.Start()
   202  		br := bufio.NewReader(os.Stdin)
   203  		for {
   204  			path, err := br.ReadString('\n')
   205  			if path = strings.TrimSpace(path); path != "" {
   206  				tu.Enqueue(path)
   207  			}
   208  			if err == io.EOF {
   209  				os.Exit(0)
   210  			}
   211  			if err != nil {
   212  				log.Fatal(err)
   213  			}
   214  		}
   215  	}
   216  
   217  	if len(args) == 0 {
   218  		return cmdmain.UsageError("No files or directories given.")
   219  	}
   220  	if up.statCache != nil {
   221  		defer up.statCache.Close()
   222  	}
   223  	for _, filename := range args {
   224  		fi, err := os.Stat(filename)
   225  		if err != nil {
   226  			return err
   227  		}
   228  		// Skip ignored files or base directories.  Failing to skip the
   229  		// latter results in a panic.
   230  		if up.Client.IsIgnoredFile(filename) {
   231  			log.Printf("Client configured to ignore %s; skipping.", filename)
   232  			continue
   233  		}
   234  		if fi.IsDir() {
   235  			if up.fileOpts.wantVivify() {
   236  				vlog.Printf("Directories not supported in vivify mode; skipping %v\n", filename)
   237  				continue
   238  			}
   239  			t := up.NewTreeUpload(filename)
   240  			t.Start()
   241  			lastPut, err = t.Wait()
   242  		} else {
   243  			lastPut, err = up.UploadFile(filename)
   244  			if err == nil && c.deleteAfterUpload {
   245  				if err := os.Remove(filename); err != nil {
   246  					log.Printf("Error deleting %v: %v", filename, err)
   247  				} else {
   248  					log.Printf("Deleted %v", filename)
   249  				}
   250  			}
   251  		}
   252  		if handleResult("file", lastPut, err) != nil {
   253  			return err
   254  		}
   255  	}
   256  
   257  	if permaNode != nil && lastPut != nil {
   258  		put, err := up.UploadAndSignBlob(schema.NewSetAttributeClaim(permaNode.BlobRef, "camliContent", lastPut.BlobRef.String()))
   259  		if handleResult("claim-permanode-content", put, err) != nil {
   260  			return err
   261  		}
   262  		if c.title != "" {
   263  			put, err := up.UploadAndSignBlob(schema.NewSetAttributeClaim(permaNode.BlobRef, "title", c.title))
   264  			handleResult("claim-permanode-title", put, err)
   265  		}
   266  		if c.tag != "" {
   267  			tags := strings.Split(c.tag, ",")
   268  			for _, tag := range tags {
   269  				m := schema.NewAddAttributeClaim(permaNode.BlobRef, "tag", tag)
   270  				put, err := up.UploadAndSignBlob(m)
   271  				handleResult("claim-permanode-tag", put, err)
   272  			}
   273  		}
   274  		handleResult("permanode", permaNode, nil)
   275  	}
   276  	return nil
   277  }
   278  
   279  func (c *fileCmd) initCaches(up *Uploader) {
   280  	if !c.statcache {
   281  		return
   282  	}
   283  	gen, err := up.StorageGeneration()
   284  	if err != nil {
   285  		log.Printf("WARNING: not using local caches; failed to retrieve server's storage generation: %v", err)
   286  		return
   287  	}
   288  	if c.statcache {
   289  		up.statCache = NewKvStatCache(gen)
   290  	}
   291  }
   292  
   293  // DumpStats creates the destFile and writes a line per received blob,
   294  // with its blob size.
   295  func DumpStats(sr *statspkg.Receiver, destFile string) {
   296  	sr.Lock()
   297  	defer sr.Unlock()
   298  
   299  	f, err := os.Create(destFile)
   300  	if err != nil {
   301  		log.Fatal(err)
   302  	}
   303  
   304  	var sum int64
   305  	for _, size := range sr.Have {
   306  		fmt.Fprintf(f, "%d\n", size)
   307  	}
   308  	fmt.Printf("In-memory blob stats: %d blobs, %d bytes\n", len(sr.Have), sum)
   309  
   310  	err = f.Close()
   311  	if err != nil {
   312  		log.Fatal(err)
   313  	}
   314  }
   315  
   316  type stats struct {
   317  	files, bytes int64
   318  }
   319  
   320  func (s *stats) incr(n *node) {
   321  	s.files++
   322  	if !n.fi.IsDir() {
   323  		s.bytes += n.fi.Size()
   324  	}
   325  }
   326  
   327  func (up *Uploader) lstat(path string) (os.FileInfo, error) {
   328  	// TODO(bradfitz): use VFS
   329  	return os.Lstat(path)
   330  }
   331  
   332  func (up *Uploader) stat(path string) (os.FileInfo, error) {
   333  	if up.fs == nil {
   334  		return os.Stat(path)
   335  	}
   336  	f, err := up.fs.Open(path)
   337  	if err != nil {
   338  		return nil, err
   339  	}
   340  	defer f.Close()
   341  	return f.Stat()
   342  }
   343  
   344  func (up *Uploader) open(path string) (http.File, error) {
   345  	if up.fs == nil {
   346  		return os.Open(path)
   347  	}
   348  	return up.fs.Open(path)
   349  }
   350  
   351  func (n *node) directoryStaticSet() (*schema.StaticSet, error) {
   352  	ss := new(schema.StaticSet)
   353  	for _, c := range n.children {
   354  		pr, err := c.PutResult()
   355  		if err != nil {
   356  			return nil, fmt.Errorf("Error populating directory static set for child %q: %v", c.fullPath, err)
   357  		}
   358  		ss.Add(pr.BlobRef)
   359  	}
   360  	return ss, nil
   361  }
   362  
   363  func (up *Uploader) uploadNode(n *node) (*client.PutResult, error) {
   364  	fi := n.fi
   365  	mode := fi.Mode()
   366  	if mode&os.ModeType == 0 {
   367  		return up.uploadNodeRegularFile(n)
   368  	}
   369  	bb := schema.NewCommonFileMap(n.fullPath, fi)
   370  	switch {
   371  	case mode&os.ModeSymlink != 0:
   372  		// TODO(bradfitz): use VFS here; not os.Readlink
   373  		target, err := os.Readlink(n.fullPath)
   374  		if err != nil {
   375  			return nil, err
   376  		}
   377  		bb.SetSymlinkTarget(target)
   378  	case mode&os.ModeDevice != 0:
   379  		// including mode & os.ModeCharDevice
   380  		fallthrough
   381  	case mode&os.ModeSocket != 0:
   382  		fallthrough
   383  	case mode&os.ModeNamedPipe != 0: // FIFO
   384  		fallthrough
   385  	default:
   386  		return nil, fmt.Errorf("camput.files: unsupported file type %v for file %v", mode, n.fullPath)
   387  	case fi.IsDir():
   388  		ss, err := n.directoryStaticSet()
   389  		if err != nil {
   390  			return nil, err
   391  		}
   392  		sspr, err := up.UploadBlob(ss)
   393  		if err != nil {
   394  			return nil, err
   395  		}
   396  		bb.PopulateDirectoryMap(sspr.BlobRef)
   397  	}
   398  
   399  	mappr, err := up.UploadBlob(bb)
   400  	if err == nil {
   401  		if !mappr.Skipped {
   402  			vlog.Printf("Uploaded %q, %s for %s", bb.Type(), mappr.BlobRef, n.fullPath)
   403  		}
   404  	} else {
   405  		vlog.Printf("Error uploading map for %s (%s, %s): %v", n.fullPath, bb.Type(), bb.Blob().BlobRef(), err)
   406  	}
   407  	return mappr, err
   408  
   409  }
   410  
   411  // statReceiver returns the StatReceiver used for checking for and uploading blobs.
   412  //
   413  // The optional provided node is only used for conditionally printing out status info to stdout.
   414  func (up *Uploader) statReceiver(n *node) blobserver.StatReceiver {
   415  	statReceiver := up.altStatReceiver
   416  	if statReceiver == nil {
   417  		// TODO(mpl): simplify the altStatReceiver situation as well,
   418  		// see TODO in cmd/camput/uploader.go
   419  		statReceiver = up.Client
   420  	}
   421  	if android.IsChild() && n != nil && n.fi.Mode()&os.ModeType == 0 {
   422  		return android.StatusReceiver{Sr: statReceiver, Path: n.fullPath}
   423  	}
   424  	return statReceiver
   425  }
   426  
   427  var atomicDigestOps int64 // number of files digested
   428  
   429  // wholeFileDigest returns the sha1 digest of the regular file's absolute
   430  // path given in fullPath.
   431  func (up *Uploader) wholeFileDigest(fullPath string) (blob.Ref, error) {
   432  	// TODO(bradfitz): cache this.
   433  	file, err := up.open(fullPath)
   434  	if err != nil {
   435  		return blob.Ref{}, err
   436  	}
   437  	defer file.Close()
   438  	td := &trackDigestReader{r: file}
   439  	_, err = io.Copy(ioutil.Discard, td)
   440  	atomic.AddInt64(&atomicDigestOps, 1)
   441  	if err != nil {
   442  		return blob.Ref{}, err
   443  	}
   444  	return blob.MustParse(td.Sum()), nil
   445  }
   446  
   447  var noDupSearch, _ = strconv.ParseBool(os.Getenv("CAMLI_NO_FILE_DUP_SEARCH"))
   448  
   449  // fileMapFromDuplicate queries the server's search interface for an
   450  // existing file with an entire contents of sum (a blobref string).
   451  // If the server has it, it's validated, and then fileMap (which must
   452  // already be partially populated) has its "parts" field populated,
   453  // and then fileMap is uploaded (if necessary) and a PutResult with
   454  // its blobref is returned. If there's any problem, or a dup doesn't
   455  // exist, ok is false.
   456  // If required, Vivify is also done here.
   457  func (up *Uploader) fileMapFromDuplicate(bs blobserver.StatReceiver, fileMap *schema.Builder, sum string) (pr *client.PutResult, ok bool) {
   458  	if noDupSearch {
   459  		return
   460  	}
   461  	_, err := up.Client.SearchRoot()
   462  	if err != nil {
   463  		return
   464  	}
   465  	dupFileRef, err := up.Client.SearchExistingFileSchema(blob.MustParse(sum))
   466  	if err != nil {
   467  		log.Printf("Warning: error searching for already-uploaded copy of %s: %v", sum, err)
   468  		return nil, false
   469  	}
   470  	if !dupFileRef.Valid() {
   471  		return nil, false
   472  	}
   473  	if *cmdmain.FlagVerbose {
   474  		log.Printf("Found dup of contents %s in file schema %s", sum, dupFileRef)
   475  	}
   476  	dupMap, err := up.Client.FetchSchemaBlob(dupFileRef)
   477  	if err != nil {
   478  		log.Printf("Warning: error fetching %v: %v", dupFileRef, err)
   479  		return nil, false
   480  	}
   481  
   482  	fileMap.PopulateParts(dupMap.PartsSize(), dupMap.ByteParts())
   483  
   484  	json, err := fileMap.JSON()
   485  	if err != nil {
   486  		return nil, false
   487  	}
   488  	uh := client.NewUploadHandleFromString(json)
   489  	if up.fileOpts.wantVivify() {
   490  		uh.Vivify = true
   491  	}
   492  	if !uh.Vivify && uh.BlobRef == dupFileRef {
   493  		// Unchanged (same filename, modtime, JSON serialization, etc)
   494  		return &client.PutResult{BlobRef: dupFileRef, Size: int64(len(json)), Skipped: true}, true
   495  	}
   496  	pr, err = up.Upload(uh)
   497  	if err != nil {
   498  		log.Printf("Warning: error uploading file map after finding server dup of %v: %v", sum, err)
   499  		return nil, false
   500  	}
   501  	return pr, true
   502  }
   503  
   504  func (up *Uploader) uploadNodeRegularFile(n *node) (*client.PutResult, error) {
   505  	filebb := schema.NewCommonFileMap(n.fullPath, n.fi)
   506  	filebb.SetType("file")
   507  
   508  	up.fdGate.Start()
   509  	defer up.fdGate.Done()
   510  
   511  	file, err := up.open(n.fullPath)
   512  	if err != nil {
   513  		return nil, err
   514  	}
   515  	defer file.Close()
   516  	if up.fileOpts.exifTime {
   517  		ra, ok := file.(io.ReaderAt)
   518  		if !ok {
   519  			return nil, errors.New("Error asserting local file to io.ReaderAt")
   520  		}
   521  		modtime, err := schema.FileTime(ra)
   522  		if err != nil {
   523  			log.Printf("warning: getting time from EXIF failed for %v: %v", n.fullPath, err)
   524  		} else {
   525  			filebb.SetModTime(modtime)
   526  		}
   527  	}
   528  	if up.fileOpts.capCtime {
   529  		filebb.CapCreationTime()
   530  	}
   531  
   532  	var (
   533  		size                           = n.fi.Size()
   534  		fileContents io.Reader         = io.LimitReader(file, size)
   535  		br           blob.Ref          // of file schemaref
   536  		sum          string            // sha1 hashsum of the file to upload
   537  		pr           *client.PutResult // of the final "file" schema blob
   538  	)
   539  
   540  	const dupCheckThreshold = 256 << 10
   541  	if size > dupCheckThreshold {
   542  		sumRef, err := up.wholeFileDigest(n.fullPath)
   543  		if err == nil {
   544  			sum = sumRef.String()
   545  			ok := false
   546  			pr, ok = up.fileMapFromDuplicate(up.statReceiver(n), filebb, sum)
   547  			if ok {
   548  				br = pr.BlobRef
   549  				android.NoteFileUploaded(n.fullPath, !pr.Skipped)
   550  				if up.fileOpts.wantVivify() {
   551  					// we can return early in that case, because the other options
   552  					// are disallowed in the vivify case.
   553  					return pr, nil
   554  				}
   555  			}
   556  		}
   557  	}
   558  
   559  	if up.fileOpts.wantVivify() {
   560  		// If vivify wasn't already done in fileMapFromDuplicate.
   561  		err := schema.WriteFileChunks(up.statReceiver(n), filebb, fileContents)
   562  		if err != nil {
   563  			return nil, err
   564  		}
   565  		json, err := filebb.JSON()
   566  		if err != nil {
   567  			return nil, err
   568  		}
   569  		br = blob.SHA1FromString(json)
   570  		h := &client.UploadHandle{
   571  			BlobRef:  br,
   572  			Size:     int64(len(json)),
   573  			Contents: strings.NewReader(json),
   574  			Vivify:   true,
   575  		}
   576  		pr, err = up.Upload(h)
   577  		if err != nil {
   578  			return nil, err
   579  		}
   580  		android.NoteFileUploaded(n.fullPath, true)
   581  		return pr, nil
   582  	}
   583  
   584  	if !br.Valid() {
   585  		// br still nil means fileMapFromDuplicate did not find the file on the server,
   586  		// and the file has not just been uploaded subsequently to a vivify request.
   587  		// So we do the full file + file schema upload here.
   588  		if sum == "" && up.fileOpts.wantFilePermanode() {
   589  			fileContents = &trackDigestReader{r: fileContents}
   590  		}
   591  		br, err = schema.WriteFileMap(up.statReceiver(n), filebb, fileContents)
   592  		if err != nil {
   593  			return nil, err
   594  		}
   595  	}
   596  
   597  	// The work for those planned permanodes (and the claims) is redone
   598  	// everytime we get here (i.e past the stat cache). However, they're
   599  	// caught by the have cache, so they won't be reuploaded for nothing
   600  	// at least.
   601  	if up.fileOpts.wantFilePermanode() {
   602  		if td, ok := fileContents.(*trackDigestReader); ok {
   603  			sum = td.Sum()
   604  		}
   605  		// claimTime is both the time of the "claimDate" in the
   606  		// JSON claim, as well as the date in the OpenPGP
   607  		// header.
   608  		// TODO(bradfitz): this is a little clumsy to do by hand.
   609  		// There should probably be a method on *Uploader to do this
   610  		// from an unsigned schema map. Maybe ditch the schema.Claimer
   611  		// type and just have the Uploader override the claimDate.
   612  		claimTime, ok := filebb.ModTime()
   613  		if !ok {
   614  			return nil, fmt.Errorf("couldn't get modtime for file %v", n.fullPath)
   615  		}
   616  		err = up.uploadFilePermanode(sum, br, claimTime)
   617  		if err != nil {
   618  			return nil, fmt.Errorf("Error uploading permanode for node %v: %v", n, err)
   619  		}
   620  	}
   621  
   622  	// TODO(bradfitz): faking a PutResult here to return
   623  	// is kinda gross.  should instead make a
   624  	// blobserver.Storage wrapper type (wrapping
   625  	// statReceiver) that can track some of this?  or make
   626  	// schemaWriteFileMap return it?
   627  	json, _ := filebb.JSON()
   628  	pr = &client.PutResult{BlobRef: br, Size: int64(len(json)), Skipped: false}
   629  	return pr, nil
   630  }
   631  
   632  // uploadFilePermanode creates and uploads the planned permanode (with sum as a
   633  // fixed key) associated with the file blobref fileRef.
   634  // It also sets the optional tags for this permanode.
   635  func (up *Uploader) uploadFilePermanode(sum string, fileRef blob.Ref, claimTime time.Time) error {
   636  	// Use a fixed time value for signing; not using modtime
   637  	// so two identical files don't have different modtimes?
   638  	// TODO(bradfitz): consider this more?
   639  	permaNodeSigTime := time.Unix(0, 0)
   640  	permaNode, err := up.UploadPlannedPermanode(sum, permaNodeSigTime)
   641  	if err != nil {
   642  		return fmt.Errorf("Error uploading planned permanode: %v", err)
   643  	}
   644  	handleResult("node-permanode", permaNode, nil)
   645  
   646  	contentAttr := schema.NewSetAttributeClaim(permaNode.BlobRef, "camliContent", fileRef.String())
   647  	contentAttr.SetClaimDate(claimTime)
   648  	signer, err := up.Signer()
   649  	if err != nil {
   650  		return err
   651  	}
   652  	signed, err := contentAttr.SignAt(signer, claimTime)
   653  	if err != nil {
   654  		return fmt.Errorf("Failed to sign content claim: %v", err)
   655  	}
   656  	put, err := up.uploadString(signed)
   657  	if err != nil {
   658  		return fmt.Errorf("Error uploading permanode's attribute: %v", err)
   659  	}
   660  
   661  	handleResult("node-permanode-contentattr", put, nil)
   662  	if tags := up.fileOpts.tags(); len(tags) > 0 {
   663  		errch := make(chan error)
   664  		for _, tag := range tags {
   665  			go func(tag string) {
   666  				m := schema.NewAddAttributeClaim(permaNode.BlobRef, "tag", tag)
   667  				m.SetClaimDate(claimTime)
   668  				signed, err := m.SignAt(signer, claimTime)
   669  				if err != nil {
   670  					errch <- fmt.Errorf("Failed to sign tag claim: %v", err)
   671  					return
   672  				}
   673  				put, err := up.uploadString(signed)
   674  				if err != nil {
   675  					errch <- fmt.Errorf("Error uploading permanode's tag attribute %v: %v", tag, err)
   676  					return
   677  				}
   678  				handleResult("node-permanode-tag", put, nil)
   679  				errch <- nil
   680  			}(tag)
   681  		}
   682  
   683  		for _ = range tags {
   684  			if e := <-errch; e != nil && err == nil {
   685  				err = e
   686  			}
   687  		}
   688  		if err != nil {
   689  			return err
   690  		}
   691  	}
   692  	return nil
   693  }
   694  
   695  func (up *Uploader) UploadFile(filename string) (*client.PutResult, error) {
   696  	fullPath, err := filepath.Abs(filename)
   697  	if err != nil {
   698  		return nil, err
   699  	}
   700  	fi, err := up.lstat(fullPath)
   701  	if err != nil {
   702  		return nil, err
   703  	}
   704  
   705  	if fi.IsDir() {
   706  		panic("must use UploadTree now for directories")
   707  	}
   708  	n := &node{
   709  		fullPath: fullPath,
   710  		fi:       fi,
   711  	}
   712  
   713  	withPermanode := up.fileOpts.wantFilePermanode()
   714  	if up.statCache != nil && !up.fileOpts.wantVivify() {
   715  		// Note: ignoring cache hits if wantVivify, otherwise
   716  		// a non-vivify put followed by a vivify one wouldn't
   717  		// end up doing the vivify.
   718  		if cachedRes, err := up.statCache.CachedPutResult(
   719  			up.pwd, n.fullPath, n.fi, withPermanode); err == nil {
   720  			return cachedRes, nil
   721  		}
   722  	}
   723  
   724  	pr, err := up.uploadNode(n)
   725  	if err == nil && up.statCache != nil {
   726  		up.statCache.AddCachedPutResult(
   727  			up.pwd, n.fullPath, n.fi, pr, withPermanode)
   728  	}
   729  
   730  	return pr, err
   731  }
   732  
   733  // NewTreeUpload returns a TreeUpload. It doesn't begin uploading any files until a
   734  // call to Start
   735  func (up *Uploader) NewTreeUpload(dir string) *TreeUpload {
   736  	tu := up.NewRootlessTreeUpload()
   737  	tu.rootless = false
   738  	tu.base = dir
   739  	return tu
   740  }
   741  
   742  func (up *Uploader) NewRootlessTreeUpload() *TreeUpload {
   743  	return &TreeUpload{
   744  		rootless: true,
   745  		base:     "",
   746  		up:       up,
   747  		donec:    make(chan bool, 1),
   748  		errc:     make(chan error, 1),
   749  		stattedc: make(chan *node, buffered),
   750  	}
   751  }
   752  
   753  func (t *TreeUpload) Start() {
   754  	go t.run()
   755  }
   756  
   757  type node struct {
   758  	tu       *TreeUpload // nil if not doing a tree upload
   759  	fullPath string
   760  	fi       os.FileInfo
   761  	children []*node
   762  
   763  	// cond (and its &mu Lock) guard err and res.
   764  	cond sync.Cond // with L being &mu
   765  	mu   sync.Mutex
   766  	err  error
   767  	res  *client.PutResult
   768  
   769  	sumBytes int64 // cached value, if non-zero. also guarded by mu.
   770  }
   771  
   772  func (n *node) String() string {
   773  	if n == nil {
   774  		return "<nil *node>"
   775  	}
   776  	return fmt.Sprintf("[node %s, isDir=%v, nchild=%d]", n.fullPath, n.fi.IsDir(), len(n.children))
   777  }
   778  
   779  func (n *node) SetPutResult(res *client.PutResult, err error) {
   780  	n.mu.Lock()
   781  	defer n.mu.Unlock()
   782  	if res == nil && err == nil {
   783  		panic("SetPutResult called with (nil, nil)")
   784  	}
   785  	if n.res != nil || n.err != nil {
   786  		panic("SetPutResult called twice on node " + n.fullPath)
   787  	}
   788  	n.res, n.err = res, err
   789  	n.cond.Signal()
   790  }
   791  
   792  func (n *node) PutResult() (*client.PutResult, error) {
   793  	n.mu.Lock()
   794  	defer n.mu.Unlock()
   795  	for n.err == nil && n.res == nil {
   796  		n.cond.Wait()
   797  	}
   798  	return n.res, n.err
   799  }
   800  
   801  func (n *node) SumBytes() (v int64) {
   802  	n.mu.Lock()
   803  	defer n.mu.Unlock()
   804  	if n.sumBytes != 0 {
   805  		return n.sumBytes
   806  	}
   807  	for _, c := range n.children {
   808  		v += c.SumBytes()
   809  	}
   810  	if n.fi.Mode()&os.ModeType == 0 {
   811  		v += n.fi.Size()
   812  	}
   813  	n.sumBytes = v
   814  	return
   815  }
   816  
   817  /*
   818  A TreeUpload holds the state of an ongoing recursive directory tree
   819  upload.  Call Wait to get the final result.
   820  
   821  Uploading a directory tree involves several concurrent processes, each
   822  which may involve multiple goroutines:
   823  
   824  1) one process stats all files and walks all directories as fast as possible
   825     to calculate how much total work there will be.  this goroutine also
   826     filters out directories to be skipped. (caches, temp files, skipDirs, etc)
   827  
   828   2) one process works though the files that were discovered and checks
   829      the statcache to see what actually needs to be uploaded.
   830      The statcache is
   831          full path => {last os.FileInfo signature, put result from last time}
   832      and is used to avoid re-reading/digesting the file even locally,
   833      trusting that it's already on the server.
   834  
   835   3) one process uploads files & metadata.  This process checks the "havecache"
   836      to see which blobs are already on the server.  For awhile the local havecache
   837      (if configured) and the remote blobserver "stat" RPC are raced to determine
   838      if the local havecache is even faster. If not, it's not consulted. But if the
   839      latency of remote stats is high enough, checking locally is preferred.
   840  */
   841  type TreeUpload struct {
   842  	// If DiskUsageMode is set true before Start, only
   843  	// per-directory disk usage stats are output, like the "du"
   844  	// command.
   845  	DiskUsageMode bool
   846  
   847  	// Immutable:
   848  	rootless bool   // if true, "base" will be empty.
   849  	base     string // base directory
   850  	up       *Uploader
   851  	stattedc chan *node // from stat-the-world goroutine to run()
   852  
   853  	donec chan bool // closed when run() finishes
   854  	err   error
   855  	errc  chan error // with 1 buffer item
   856  
   857  	// Owned by run goroutine:
   858  	total    stats // total bytes on disk
   859  	skipped  stats // not even tried to upload (trusting stat cache)
   860  	uploaded stats // uploaded (even if server said it already had it and bytes weren't sent)
   861  
   862  	finalPutRes *client.PutResult // set after run() returns
   863  }
   864  
   865  // Enqueue starts uploading path (a file, directory, etc).
   866  func (t *TreeUpload) Enqueue(path string) {
   867  	t.statPath(path, nil)
   868  }
   869  
   870  // fi is optional (will be statted if nil)
   871  func (t *TreeUpload) statPath(fullPath string, fi os.FileInfo) (nod *node, err error) {
   872  	defer func() {
   873  		if err == nil && nod != nil {
   874  			t.stattedc <- nod
   875  		}
   876  	}()
   877  	if t.up.Client.IsIgnoredFile(fullPath) {
   878  		return nil, nil
   879  	}
   880  	if fi == nil {
   881  		fi, err = t.up.lstat(fullPath)
   882  		if err != nil {
   883  			return nil, err
   884  		}
   885  	}
   886  	n := &node{
   887  		tu:       t,
   888  		fullPath: fullPath,
   889  		fi:       fi,
   890  	}
   891  	n.cond.L = &n.mu
   892  
   893  	if !fi.IsDir() {
   894  		return n, nil
   895  	}
   896  	f, err := t.up.open(fullPath)
   897  	if err != nil {
   898  		return nil, err
   899  	}
   900  	fis, err := f.Readdir(-1)
   901  	f.Close()
   902  	if err != nil {
   903  		return nil, err
   904  	}
   905  	sort.Sort(byFileName(fis))
   906  	for _, fi := range fis {
   907  		depn, err := t.statPath(filepath.Join(fullPath, filepath.Base(fi.Name())), fi)
   908  		if err != nil {
   909  			return nil, err
   910  		}
   911  		if depn != nil {
   912  			n.children = append(n.children, depn)
   913  		}
   914  	}
   915  	return n, nil
   916  }
   917  
   918  func (t *TreeUpload) run() {
   919  	defer close(t.donec)
   920  
   921  	// Kick off scanning all files, eventually learning the root
   922  	// node (which references all its children).
   923  	var root *node // nil until received and set in loop below.
   924  	rootc := make(chan *node, 1)
   925  	if !t.rootless {
   926  		go func() {
   927  			n, err := t.statPath(t.base, nil)
   928  			if err != nil {
   929  				log.Fatalf("Error scanning files under %s: %v", t.base, err)
   930  			}
   931  			close(t.stattedc)
   932  			rootc <- n
   933  		}()
   934  	}
   935  
   936  	var lastStat, lastUpload string
   937  	dumpStats := func() {
   938  		if android.IsChild() {
   939  			printAndroidCamputStatus(t)
   940  			return
   941  		}
   942  		statStatus := ""
   943  		if root == nil {
   944  			statStatus = fmt.Sprintf("last stat: %s", lastStat)
   945  		}
   946  		blobStats := t.up.Stats()
   947  		log.Printf("FILES: Total: %+v Skipped: %+v Uploaded: %+v %s BLOBS: %s Digested: %d last upload: %s",
   948  			t.total, t.skipped, t.uploaded,
   949  			statStatus,
   950  			blobStats.String(),
   951  			atomic.LoadInt64(&atomicDigestOps),
   952  			lastUpload)
   953  	}
   954  
   955  	// Channels for stats & progress bars. These are never closed:
   956  	uploadedc := make(chan *node) // at least tried to upload; server might have had blob
   957  	skippedc := make(chan *node)  // didn't even hit blobserver; trusted our stat cache
   958  
   959  	uploadsdonec := make(chan bool)
   960  	var upload chan<- *node
   961  	withPermanode := t.up.fileOpts.wantFilePermanode()
   962  	if t.DiskUsageMode {
   963  		upload = NewNodeWorker(1, func(n *node, ok bool) {
   964  			if !ok {
   965  				uploadsdonec <- true
   966  				return
   967  			}
   968  			if n.fi.IsDir() {
   969  				fmt.Printf("%d\t%s\n", n.SumBytes()>>10, n.fullPath)
   970  			}
   971  		})
   972  	} else {
   973  		upload = NewNodeWorker(-1, func(n *node, ok bool) {
   974  			if !ok {
   975  				log.Printf("done with all uploads.")
   976  				uploadsdonec <- true
   977  				return
   978  			}
   979  			put, err := t.up.uploadNode(n)
   980  			if err != nil {
   981  				log.Fatalf("Error uploading %s: %v", n.fullPath, err)
   982  			}
   983  			n.SetPutResult(put, nil)
   984  			if c := t.up.statCache; c != nil && !n.fi.IsDir() {
   985  				c.AddCachedPutResult(
   986  					t.up.pwd, n.fullPath, n.fi, put, withPermanode)
   987  			}
   988  			uploadedc <- n
   989  		})
   990  	}
   991  
   992  	checkStatCache := NewNodeWorker(10, func(n *node, ok bool) {
   993  		if !ok {
   994  			if t.up.statCache != nil {
   995  				log.Printf("done checking stat cache")
   996  			}
   997  			close(upload)
   998  			return
   999  		}
  1000  		if t.DiskUsageMode || t.up.statCache == nil {
  1001  			upload <- n
  1002  			return
  1003  		}
  1004  		if !n.fi.IsDir() {
  1005  			cachedRes, err := t.up.statCache.CachedPutResult(
  1006  				t.up.pwd, n.fullPath, n.fi, withPermanode)
  1007  			if err == nil {
  1008  				n.SetPutResult(cachedRes, nil)
  1009  				cachelog.Printf("Cache HIT on %q -> %v", n.fullPath, cachedRes)
  1010  				android.NoteFileUploaded(n.fullPath, false)
  1011  				skippedc <- n
  1012  				return
  1013  			}
  1014  		}
  1015  		upload <- n
  1016  	})
  1017  
  1018  	ticker := time.NewTicker(500 * time.Millisecond)
  1019  	defer ticker.Stop()
  1020  
  1021  	stattedc := t.stattedc
  1022  Loop:
  1023  	for {
  1024  		select {
  1025  		case <-uploadsdonec:
  1026  			break Loop
  1027  		case n := <-rootc:
  1028  			root = n
  1029  		case n := <-uploadedc:
  1030  			t.uploaded.incr(n)
  1031  			lastUpload = n.fullPath
  1032  		case n := <-skippedc:
  1033  			t.skipped.incr(n)
  1034  		case n, ok := <-stattedc:
  1035  			if !ok {
  1036  				log.Printf("done stattting:")
  1037  				dumpStats()
  1038  				close(checkStatCache)
  1039  				stattedc = nil
  1040  				continue
  1041  			}
  1042  			lastStat = n.fullPath
  1043  			t.total.incr(n)
  1044  			checkStatCache <- n
  1045  		case <-ticker.C:
  1046  			dumpStats()
  1047  		}
  1048  	}
  1049  
  1050  	log.Printf("tree upload finished. final stats:")
  1051  	dumpStats()
  1052  
  1053  	if root == nil {
  1054  		panic("unexpected nil root node")
  1055  	}
  1056  	var err error
  1057  	log.Printf("Waiting on root node %q", root.fullPath)
  1058  	t.finalPutRes, err = root.PutResult()
  1059  	log.Printf("Waited on root node %q: %v", root.fullPath, t.finalPutRes)
  1060  	if err != nil {
  1061  		t.err = err
  1062  	}
  1063  }
  1064  
  1065  func (t *TreeUpload) Wait() (*client.PutResult, error) {
  1066  	<-t.donec
  1067  	// If an error is waiting and we don't otherwise have one, use it:
  1068  	if t.err == nil {
  1069  		select {
  1070  		case t.err = <-t.errc:
  1071  		default:
  1072  		}
  1073  	}
  1074  	if t.err == nil && t.finalPutRes == nil {
  1075  		panic("Nothing ever set t.finalPutRes, but no error set")
  1076  	}
  1077  	return t.finalPutRes, t.err
  1078  }
  1079  
  1080  type byFileName []os.FileInfo
  1081  
  1082  func (s byFileName) Len() int           { return len(s) }
  1083  func (s byFileName) Less(i, j int) bool { return s[i].Name() < s[j].Name() }
  1084  func (s byFileName) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
  1085  
  1086  // trackDigestReader is an io.Reader wrapper which records the digest of what it reads.
  1087  type trackDigestReader struct {
  1088  	r io.Reader
  1089  	h hash.Hash
  1090  }
  1091  
  1092  func (t *trackDigestReader) Read(p []byte) (n int, err error) {
  1093  	if t.h == nil {
  1094  		t.h = sha1.New()
  1095  	}
  1096  	n, err = t.r.Read(p)
  1097  	t.h.Write(p[:n])
  1098  	return
  1099  }
  1100  
  1101  func (t *trackDigestReader) Sum() string {
  1102  	return fmt.Sprintf("sha1-%x", t.h.Sum(nil))
  1103  }