github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/cmd/camput/files.go (about) 1 /* 2 Copyright 2011 Google Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "bufio" 21 "crypto/sha1" 22 "errors" 23 "flag" 24 "fmt" 25 "hash" 26 "io" 27 "io/ioutil" 28 "log" 29 "net/http" 30 "os" 31 "path/filepath" 32 "sort" 33 "strconv" 34 "strings" 35 "sync" 36 "sync/atomic" 37 "time" 38 39 "camlistore.org/pkg/blob" 40 "camlistore.org/pkg/blobserver" 41 statspkg "camlistore.org/pkg/blobserver/stats" 42 "camlistore.org/pkg/client" 43 "camlistore.org/pkg/client/android" 44 "camlistore.org/pkg/cmdmain" 45 "camlistore.org/pkg/schema" 46 ) 47 48 type fileCmd struct { 49 title string 50 tag string 51 52 makePermanode bool // make new, unique permanode of the root (dir or file) 53 filePermanodes bool // make planned permanodes for each file (based on their digest) 54 vivify bool 55 exifTime bool // use metadata (such as in EXIF) to find the creation time of the file 56 capCtime bool // use mtime as creation time of the file, if it would be bigger than modification time 57 diskUsage bool // show "du" disk usage only (dry run mode), don't actually upload 58 argsFromInput bool // Android mode: filenames piped into stdin, one at a time. 59 deleteAfterUpload bool // with fileNodes, deletes the input file once uploaded 60 61 statcache bool 62 63 // Go into in-memory stats mode only; doesn't actually upload. 64 memstats bool 65 histo string // optional histogram output filename 66 } 67 68 var flagUseSQLiteChildCache bool // Use sqlite for the statcache and havecache. 69 70 var ( 71 uploadWorkers = 5 // concurrent upload workers (negative means unbounded: memory hog) 72 dirUploadWorkers = 3 // concurrent directory uploading workers 73 statCacheWorkers = 5 // concurrent statcache workers 74 ) 75 76 func init() { 77 cmdmain.RegisterCommand("file", func(flags *flag.FlagSet) cmdmain.CommandRunner { 78 cmd := new(fileCmd) 79 flags.BoolVar(&cmd.makePermanode, "permanode", false, "Create an associate a new permanode for the uploaded file or directory.") 80 flags.BoolVar(&cmd.filePermanodes, "filenodes", false, "Create (if necessary) content-based permanodes for each uploaded file.") 81 flags.BoolVar(&cmd.deleteAfterUpload, "delete_after_upload", false, "If using -filenodes, deletes files once they're uploaded, of if they've already been uploaded.") 82 flags.BoolVar(&cmd.vivify, "vivify", false, 83 "If true, ask the server to create and sign permanode(s) associated with each uploaded"+ 84 " file. This permits the server to have your signing key. Used mostly with untrusted"+ 85 " or at-risk clients, such as phones.") 86 flags.BoolVar(&cmd.exifTime, "exiftime", false, "Try to use metadata (such as EXIF) to get a stable creation time. If found, used as the replacement for the modtime. Mainly useful with vivify or filenodes.") 87 flags.StringVar(&cmd.title, "title", "", "Optional title attribute to set on permanode when using -permanode.") 88 flags.StringVar(&cmd.tag, "tag", "", "Optional tag(s) to set on permanode when using -permanode or -filenodes. Single value or comma separated.") 89 90 flags.BoolVar(&cmd.diskUsage, "du", false, "Dry run mode: only show disk usage information, without upload or statting dest. Used for testing skipDirs configs, mostly.") 91 92 if debug, _ := strconv.ParseBool(os.Getenv("CAMLI_DEBUG")); debug { 93 flags.BoolVar(&cmd.statcache, "statcache", true, "Use the stat cache, assuming unchanged files already uploaded in the past are still there. Fast, but potentially dangerous.") 94 flags.BoolVar(&cmd.memstats, "debug-memstats", false, "Enter debug in-memory mode; collecting stats only. Doesn't upload anything.") 95 flags.StringVar(&cmd.histo, "debug-histogram-file", "", "Optional file to create and write the blob size for each file uploaded. For use with GNU R and hist(read.table(\"filename\")$V1). Requires debug-memstats.") 96 flags.BoolVar(&cmd.capCtime, "capctime", false, "For file blobs use file modification time as creation time if it would be bigger (newer) than modification time. For stable filenode creation (you can forge mtime, but can't forge ctime).") 97 flags.BoolVar(&flagUseSQLiteChildCache, "sqlitecache", false, "Use sqlite for the statcache and havecache instead of a flat cache.") 98 } else { 99 cmd.statcache = true 100 } 101 if android.IsChild() { 102 flags.BoolVar(&cmd.argsFromInput, "stdinargs", false, "If true, filenames to upload are sent one-per-line on stdin. EOF means to quit the process with exit status 0.") 103 // limit number of goroutines to limit memory 104 uploadWorkers = 2 105 dirUploadWorkers = 2 106 statCacheWorkers = 2 107 } 108 flagCacheLog = flags.Bool("logcache", false, "log caching details") 109 110 return cmd 111 }) 112 } 113 114 func (c *fileCmd) Describe() string { 115 return "Upload file(s)." 116 } 117 118 func (c *fileCmd) Usage() { 119 fmt.Fprintf(cmdmain.Stderr, "Usage: camput [globalopts] file [fileopts] <file/director(ies)>\n") 120 } 121 122 func (c *fileCmd) Examples() []string { 123 return []string{ 124 "[opts] <file(s)/director(ies)", 125 "--permanode --title='Homedir backup' --tag=backup,homedir $HOME", 126 "--filenodes /mnt/camera/DCIM", 127 } 128 } 129 130 func (c *fileCmd) RunCommand(args []string) error { 131 if c.vivify { 132 if c.makePermanode || c.filePermanodes || c.tag != "" || c.title != "" { 133 return cmdmain.UsageError("--vivify excludes any other option") 134 } 135 } 136 if c.title != "" && !c.makePermanode { 137 return cmdmain.UsageError("Can't set title without using --permanode") 138 } 139 if c.tag != "" && !c.makePermanode && !c.filePermanodes { 140 return cmdmain.UsageError("Can't set tag without using --permanode or --filenodes") 141 } 142 if c.histo != "" && !c.memstats { 143 return cmdmain.UsageError("Can't use histo without memstats") 144 } 145 if c.deleteAfterUpload && !c.filePermanodes { 146 return cmdmain.UsageError("Can't set use --delete_after_upload without --filenodes") 147 } 148 up := getUploader() 149 if c.memstats { 150 sr := new(statspkg.Receiver) 151 up.altStatReceiver = sr 152 defer func() { DumpStats(sr, c.histo) }() 153 } 154 c.initCaches(up) 155 156 if c.makePermanode || c.filePermanodes { 157 testSigBlobRef := up.Client.SignerPublicKeyBlobref() 158 if !testSigBlobRef.Valid() { 159 return cmdmain.UsageError("A GPG key is needed to create permanodes; configure one or use vivify mode.") 160 } 161 } 162 up.fileOpts = &fileOptions{ 163 permanode: c.filePermanodes, 164 tag: c.tag, 165 vivify: c.vivify, 166 exifTime: c.exifTime, 167 capCtime: c.capCtime, 168 } 169 170 var ( 171 permaNode *client.PutResult 172 lastPut *client.PutResult 173 err error 174 ) 175 if c.makePermanode { 176 if len(args) != 1 { 177 return fmt.Errorf("The --permanode flag can only be used with exactly one file or directory argument") 178 } 179 permaNode, err = up.UploadNewPermanode() 180 if err != nil { 181 return fmt.Errorf("Uploading permanode: %v", err) 182 } 183 } 184 if c.diskUsage { 185 if len(args) != 1 { 186 return fmt.Errorf("The --du flag can only be used with exactly one directory argument") 187 } 188 dir := args[0] 189 fi, err := up.stat(dir) 190 if err != nil { 191 return err 192 } 193 if !fi.IsDir() { 194 return fmt.Errorf("%q is not a directory.", dir) 195 } 196 t := up.NewTreeUpload(dir) 197 t.DiskUsageMode = true 198 t.Start() 199 pr, err := t.Wait() 200 if err != nil { 201 return err 202 } 203 handleResult("tree-upload", pr, err) 204 return nil 205 } 206 if c.argsFromInput { 207 if len(args) > 0 { 208 return errors.New("args not supported with -argsfrominput") 209 } 210 tu := up.NewRootlessTreeUpload() 211 tu.Start() 212 br := bufio.NewReader(os.Stdin) 213 for { 214 path, err := br.ReadString('\n') 215 if path = strings.TrimSpace(path); path != "" { 216 tu.Enqueue(path) 217 } 218 if err == io.EOF { 219 android.PreExit() 220 os.Exit(0) 221 } 222 if err != nil { 223 log.Fatal(err) 224 } 225 } 226 } 227 228 if len(args) == 0 { 229 return cmdmain.UsageError("No files or directories given.") 230 } 231 if up.statCache != nil { 232 defer up.statCache.Close() 233 } 234 for _, filename := range args { 235 fi, err := os.Stat(filename) 236 if err != nil { 237 return err 238 } 239 // Skip ignored files or base directories. Failing to skip the 240 // latter results in a panic. 241 if up.Client.IsIgnoredFile(filename) { 242 log.Printf("Client configured to ignore %s; skipping.", filename) 243 continue 244 } 245 if fi.IsDir() { 246 if up.fileOpts.wantVivify() { 247 vlog.Printf("Directories not supported in vivify mode; skipping %v\n", filename) 248 continue 249 } 250 if !*cmdmain.FlagVerbose { 251 log.SetOutput(ioutil.Discard) 252 } 253 t := up.NewTreeUpload(filename) 254 t.Start() 255 lastPut, err = t.Wait() 256 } else { 257 lastPut, err = up.UploadFile(filename) 258 if err == nil && c.deleteAfterUpload { 259 if err := os.Remove(filename); err != nil { 260 log.Printf("Error deleting %v: %v", filename, err) 261 } else { 262 log.Printf("Deleted %v", filename) 263 } 264 } 265 } 266 if handleResult("file", lastPut, err) != nil { 267 return err 268 } 269 } 270 271 if permaNode != nil && lastPut != nil { 272 put, err := up.UploadAndSignBlob(schema.NewSetAttributeClaim(permaNode.BlobRef, "camliContent", lastPut.BlobRef.String())) 273 if handleResult("claim-permanode-content", put, err) != nil { 274 return err 275 } 276 if c.title != "" { 277 put, err := up.UploadAndSignBlob(schema.NewSetAttributeClaim(permaNode.BlobRef, "title", c.title)) 278 handleResult("claim-permanode-title", put, err) 279 } 280 if c.tag != "" { 281 tags := strings.Split(c.tag, ",") 282 for _, tag := range tags { 283 m := schema.NewAddAttributeClaim(permaNode.BlobRef, "tag", tag) 284 put, err := up.UploadAndSignBlob(m) 285 handleResult("claim-permanode-tag", put, err) 286 } 287 } 288 handleResult("permanode", permaNode, nil) 289 } 290 return nil 291 } 292 293 func (c *fileCmd) initCaches(up *Uploader) { 294 if !c.statcache || *flagBlobDir != "" { 295 return 296 } 297 gen, err := up.StorageGeneration() 298 if err != nil { 299 log.Printf("WARNING: not using local caches; failed to retrieve server's storage generation: %v", err) 300 return 301 } 302 if c.statcache { 303 up.statCache = NewKvStatCache(gen) 304 } 305 } 306 307 // DumpStats creates the destFile and writes a line per received blob, 308 // with its blob size. 309 func DumpStats(sr *statspkg.Receiver, destFile string) { 310 sr.Lock() 311 defer sr.Unlock() 312 313 f, err := os.Create(destFile) 314 if err != nil { 315 log.Fatal(err) 316 } 317 318 var sum int64 319 for _, size := range sr.Have { 320 fmt.Fprintf(f, "%d\n", size) 321 } 322 fmt.Printf("In-memory blob stats: %d blobs, %d bytes\n", len(sr.Have), sum) 323 324 err = f.Close() 325 if err != nil { 326 log.Fatal(err) 327 } 328 } 329 330 type stats struct { 331 files, bytes int64 332 } 333 334 func (s *stats) incr(n *node) { 335 s.files++ 336 if !n.fi.IsDir() { 337 s.bytes += n.fi.Size() 338 } 339 } 340 341 func (up *Uploader) lstat(path string) (os.FileInfo, error) { 342 // TODO(bradfitz): use VFS 343 return os.Lstat(path) 344 } 345 346 func (up *Uploader) stat(path string) (os.FileInfo, error) { 347 if up.fs == nil { 348 return os.Stat(path) 349 } 350 f, err := up.fs.Open(path) 351 if err != nil { 352 return nil, err 353 } 354 defer f.Close() 355 return f.Stat() 356 } 357 358 func (up *Uploader) open(path string) (http.File, error) { 359 if up.fs == nil { 360 return os.Open(path) 361 } 362 return up.fs.Open(path) 363 } 364 365 func (n *node) directoryStaticSet() (*schema.StaticSet, error) { 366 ss := new(schema.StaticSet) 367 for _, c := range n.children { 368 pr, err := c.PutResult() 369 if err != nil { 370 return nil, fmt.Errorf("Error populating directory static set for child %q: %v", c.fullPath, err) 371 } 372 ss.Add(pr.BlobRef) 373 } 374 return ss, nil 375 } 376 377 func (up *Uploader) uploadNode(n *node) (*client.PutResult, error) { 378 fi := n.fi 379 mode := fi.Mode() 380 if mode&os.ModeType == 0 { 381 return up.uploadNodeRegularFile(n) 382 } 383 bb := schema.NewCommonFileMap(n.fullPath, fi) 384 switch { 385 case mode&os.ModeSymlink != 0: 386 // TODO(bradfitz): use VFS here; not os.Readlink 387 target, err := os.Readlink(n.fullPath) 388 if err != nil { 389 return nil, err 390 } 391 bb.SetSymlinkTarget(target) 392 case mode&os.ModeDevice != 0: 393 // including mode & os.ModeCharDevice 394 fallthrough 395 case mode&os.ModeSocket != 0: 396 fallthrough 397 case mode&os.ModeNamedPipe != 0: // FIFO 398 fallthrough 399 default: 400 return nil, fmt.Errorf("camput.files: unsupported file type %v for file %v", mode, n.fullPath) 401 case fi.IsDir(): 402 ss, err := n.directoryStaticSet() 403 if err != nil { 404 return nil, err 405 } 406 sspr, err := up.UploadBlob(ss) 407 if err != nil { 408 return nil, err 409 } 410 bb.PopulateDirectoryMap(sspr.BlobRef) 411 } 412 413 mappr, err := up.UploadBlob(bb) 414 if err == nil { 415 if !mappr.Skipped { 416 vlog.Printf("Uploaded %q, %s for %s", bb.Type(), mappr.BlobRef, n.fullPath) 417 } 418 } else { 419 vlog.Printf("Error uploading map for %s (%s, %s): %v", n.fullPath, bb.Type(), bb.Blob().BlobRef(), err) 420 } 421 return mappr, err 422 423 } 424 425 // statReceiver returns the StatReceiver used for checking for and uploading blobs. 426 // 427 // The optional provided node is only used for conditionally printing out status info to stdout. 428 func (up *Uploader) statReceiver(n *node) blobserver.StatReceiver { 429 statReceiver := up.altStatReceiver 430 if statReceiver == nil { 431 // TODO(mpl): simplify the altStatReceiver situation as well, 432 // see TODO in cmd/camput/uploader.go 433 statReceiver = up.Client 434 } 435 if android.IsChild() && n != nil && n.fi.Mode()&os.ModeType == 0 { 436 return android.StatusReceiver{Sr: statReceiver, Path: n.fullPath} 437 } 438 return statReceiver 439 } 440 441 func (up *Uploader) noStatReceiver(r blobserver.BlobReceiver) blobserver.StatReceiver { 442 return noStatReceiver{r} 443 } 444 445 // A haveCacheStatReceiver relays Receive calls to the embedded 446 // BlobReceiver and treats all Stat calls like the blob doesn't exist. 447 // 448 // This is used by the client once it's already asked the server that 449 // it doesn't have the whole file in some chunk layout already, so we 450 // know we're just writing new stuff. For resuming in the middle of 451 // larger uploads, it turns out that the pkg/client.Client.Upload 452 // already checks the have cache anyway, so going right to mid-chunk 453 // receives is fine. 454 // 455 // TODO(bradfitz): this probabaly all needs an audit/rationalization/tests 456 // to make sure all the players are agreeing on the responsibilities. 457 // And maybe the Android stats are wrong, too. (see pkg/client/android's 458 // StatReceiver) 459 type noStatReceiver struct { 460 blobserver.BlobReceiver 461 } 462 463 func (noStatReceiver) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error { 464 return nil 465 } 466 467 var atomicDigestOps int64 // number of files digested 468 469 // wholeFileDigest returns the sha1 digest of the regular file's absolute 470 // path given in fullPath. 471 func (up *Uploader) wholeFileDigest(fullPath string) (blob.Ref, error) { 472 // TODO(bradfitz): cache this. 473 file, err := up.open(fullPath) 474 if err != nil { 475 return blob.Ref{}, err 476 } 477 defer file.Close() 478 td := &trackDigestReader{r: file} 479 _, err = io.Copy(ioutil.Discard, td) 480 atomic.AddInt64(&atomicDigestOps, 1) 481 if err != nil { 482 return blob.Ref{}, err 483 } 484 return blob.MustParse(td.Sum()), nil 485 } 486 487 var noDupSearch, _ = strconv.ParseBool(os.Getenv("CAMLI_NO_FILE_DUP_SEARCH")) 488 489 // fileMapFromDuplicate queries the server's search interface for an 490 // existing file with an entire contents of sum (a blobref string). 491 // If the server has it, it's validated, and then fileMap (which must 492 // already be partially populated) has its "parts" field populated, 493 // and then fileMap is uploaded (if necessary) and a PutResult with 494 // its blobref is returned. If there's any problem, or a dup doesn't 495 // exist, ok is false. 496 // If required, Vivify is also done here. 497 func (up *Uploader) fileMapFromDuplicate(bs blobserver.StatReceiver, fileMap *schema.Builder, sum string) (pr *client.PutResult, ok bool) { 498 if noDupSearch { 499 return 500 } 501 _, err := up.Client.SearchRoot() 502 if err != nil { 503 return 504 } 505 dupFileRef, err := up.Client.SearchExistingFileSchema(blob.MustParse(sum)) 506 if err != nil { 507 log.Printf("Warning: error searching for already-uploaded copy of %s: %v", sum, err) 508 return nil, false 509 } 510 if !dupFileRef.Valid() { 511 return nil, false 512 } 513 if *cmdmain.FlagVerbose { 514 log.Printf("Found dup of contents %s in file schema %s", sum, dupFileRef) 515 } 516 dupMap, err := up.Client.FetchSchemaBlob(dupFileRef) 517 if err != nil { 518 log.Printf("Warning: error fetching %v: %v", dupFileRef, err) 519 return nil, false 520 } 521 522 fileMap.PopulateParts(dupMap.PartsSize(), dupMap.ByteParts()) 523 524 json, err := fileMap.JSON() 525 if err != nil { 526 return nil, false 527 } 528 uh := client.NewUploadHandleFromString(json) 529 if up.fileOpts.wantVivify() { 530 uh.Vivify = true 531 } 532 if !uh.Vivify && uh.BlobRef == dupFileRef { 533 // Unchanged (same filename, modtime, JSON serialization, etc) 534 return &client.PutResult{BlobRef: dupFileRef, Size: uint32(len(json)), Skipped: true}, true 535 } 536 pr, err = up.Upload(uh) 537 if err != nil { 538 log.Printf("Warning: error uploading file map after finding server dup of %v: %v", sum, err) 539 return nil, false 540 } 541 return pr, true 542 } 543 544 func (up *Uploader) uploadNodeRegularFile(n *node) (*client.PutResult, error) { 545 filebb := schema.NewCommonFileMap(n.fullPath, n.fi) 546 filebb.SetType("file") 547 548 up.fdGate.Start() 549 defer up.fdGate.Done() 550 551 file, err := up.open(n.fullPath) 552 if err != nil { 553 return nil, err 554 } 555 defer file.Close() 556 if up.fileOpts.exifTime { 557 ra, ok := file.(io.ReaderAt) 558 if !ok { 559 return nil, errors.New("Error asserting local file to io.ReaderAt") 560 } 561 modtime, err := schema.FileTime(ra) 562 if err != nil { 563 log.Printf("warning: getting time from EXIF failed for %v: %v", n.fullPath, err) 564 } else { 565 filebb.SetModTime(modtime) 566 } 567 } 568 if up.fileOpts.capCtime { 569 filebb.CapCreationTime() 570 } 571 572 var ( 573 size = n.fi.Size() 574 fileContents io.Reader = io.LimitReader(file, size) 575 br blob.Ref // of file schemaref 576 sum string // sha1 hashsum of the file to upload 577 pr *client.PutResult // of the final "file" schema blob 578 ) 579 580 const dupCheckThreshold = 256 << 10 581 if size > dupCheckThreshold { 582 sumRef, err := up.wholeFileDigest(n.fullPath) 583 if err == nil { 584 sum = sumRef.String() 585 ok := false 586 pr, ok = up.fileMapFromDuplicate(up.statReceiver(n), filebb, sum) 587 if ok { 588 br = pr.BlobRef 589 android.NoteFileUploaded(n.fullPath, !pr.Skipped) 590 if up.fileOpts.wantVivify() { 591 // we can return early in that case, because the other options 592 // are disallowed in the vivify case. 593 return pr, nil 594 } 595 } 596 } 597 } 598 599 if up.fileOpts.wantVivify() { 600 // If vivify wasn't already done in fileMapFromDuplicate. 601 err := schema.WriteFileChunks(up.noStatReceiver(up.statReceiver(n)), filebb, fileContents) 602 if err != nil { 603 return nil, err 604 } 605 json, err := filebb.JSON() 606 if err != nil { 607 return nil, err 608 } 609 br = blob.SHA1FromString(json) 610 h := &client.UploadHandle{ 611 BlobRef: br, 612 Size: uint32(len(json)), 613 Contents: strings.NewReader(json), 614 Vivify: true, 615 } 616 pr, err = up.Upload(h) 617 if err != nil { 618 return nil, err 619 } 620 android.NoteFileUploaded(n.fullPath, true) 621 return pr, nil 622 } 623 624 if !br.Valid() { 625 // br still zero means fileMapFromDuplicate did not find the file on the server, 626 // and the file has not just been uploaded subsequently to a vivify request. 627 // So we do the full file + file schema upload here. 628 if sum == "" && up.fileOpts.wantFilePermanode() { 629 fileContents = &trackDigestReader{r: fileContents} 630 } 631 br, err = schema.WriteFileMap(up.noStatReceiver(up.statReceiver(n)), filebb, fileContents) 632 if err != nil { 633 return nil, err 634 } 635 } 636 637 // The work for those planned permanodes (and the claims) is redone 638 // everytime we get here (i.e past the stat cache). However, they're 639 // caught by the have cache, so they won't be reuploaded for nothing 640 // at least. 641 if up.fileOpts.wantFilePermanode() { 642 if td, ok := fileContents.(*trackDigestReader); ok { 643 sum = td.Sum() 644 } 645 // claimTime is both the time of the "claimDate" in the 646 // JSON claim, as well as the date in the OpenPGP 647 // header. 648 // TODO(bradfitz): this is a little clumsy to do by hand. 649 // There should probably be a method on *Uploader to do this 650 // from an unsigned schema map. Maybe ditch the schema.Claimer 651 // type and just have the Uploader override the claimDate. 652 claimTime, ok := filebb.ModTime() 653 if !ok { 654 return nil, fmt.Errorf("couldn't get modtime for file %v", n.fullPath) 655 } 656 err = up.uploadFilePermanode(sum, br, claimTime) 657 if err != nil { 658 return nil, fmt.Errorf("Error uploading permanode for node %v: %v", n, err) 659 } 660 } 661 662 // TODO(bradfitz): faking a PutResult here to return 663 // is kinda gross. should instead make a 664 // blobserver.Storage wrapper type (wrapping 665 // statReceiver) that can track some of this? or make 666 // schemaWriteFileMap return it? 667 json, _ := filebb.JSON() 668 pr = &client.PutResult{BlobRef: br, Size: uint32(len(json)), Skipped: false} 669 return pr, nil 670 } 671 672 // uploadFilePermanode creates and uploads the planned permanode (with sum as a 673 // fixed key) associated with the file blobref fileRef. 674 // It also sets the optional tags for this permanode. 675 func (up *Uploader) uploadFilePermanode(sum string, fileRef blob.Ref, claimTime time.Time) error { 676 // Use a fixed time value for signing; not using modtime 677 // so two identical files don't have different modtimes? 678 // TODO(bradfitz): consider this more? 679 permaNodeSigTime := time.Unix(0, 0) 680 permaNode, err := up.UploadPlannedPermanode(sum, permaNodeSigTime) 681 if err != nil { 682 return fmt.Errorf("Error uploading planned permanode: %v", err) 683 } 684 handleResult("node-permanode", permaNode, nil) 685 686 contentAttr := schema.NewSetAttributeClaim(permaNode.BlobRef, "camliContent", fileRef.String()) 687 contentAttr.SetClaimDate(claimTime) 688 signer, err := up.Signer() 689 if err != nil { 690 return err 691 } 692 signed, err := contentAttr.SignAt(signer, claimTime) 693 if err != nil { 694 return fmt.Errorf("Failed to sign content claim: %v", err) 695 } 696 put, err := up.uploadString(signed) 697 if err != nil { 698 return fmt.Errorf("Error uploading permanode's attribute: %v", err) 699 } 700 701 handleResult("node-permanode-contentattr", put, nil) 702 if tags := up.fileOpts.tags(); len(tags) > 0 { 703 errch := make(chan error) 704 for _, tag := range tags { 705 go func(tag string) { 706 m := schema.NewAddAttributeClaim(permaNode.BlobRef, "tag", tag) 707 m.SetClaimDate(claimTime) 708 signed, err := m.SignAt(signer, claimTime) 709 if err != nil { 710 errch <- fmt.Errorf("Failed to sign tag claim: %v", err) 711 return 712 } 713 put, err := up.uploadString(signed) 714 if err != nil { 715 errch <- fmt.Errorf("Error uploading permanode's tag attribute %v: %v", tag, err) 716 return 717 } 718 handleResult("node-permanode-tag", put, nil) 719 errch <- nil 720 }(tag) 721 } 722 723 for _ = range tags { 724 if e := <-errch; e != nil && err == nil { 725 err = e 726 } 727 } 728 if err != nil { 729 return err 730 } 731 } 732 return nil 733 } 734 735 func (up *Uploader) UploadFile(filename string) (*client.PutResult, error) { 736 fullPath, err := filepath.Abs(filename) 737 if err != nil { 738 return nil, err 739 } 740 fi, err := up.lstat(fullPath) 741 if err != nil { 742 return nil, err 743 } 744 745 if fi.IsDir() { 746 panic("must use UploadTree now for directories") 747 } 748 n := &node{ 749 fullPath: fullPath, 750 fi: fi, 751 } 752 753 withPermanode := up.fileOpts.wantFilePermanode() 754 if up.statCache != nil && !up.fileOpts.wantVivify() { 755 // Note: ignoring cache hits if wantVivify, otherwise 756 // a non-vivify put followed by a vivify one wouldn't 757 // end up doing the vivify. 758 if cachedRes, err := up.statCache.CachedPutResult( 759 up.pwd, n.fullPath, n.fi, withPermanode); err == nil { 760 return cachedRes, nil 761 } 762 } 763 764 pr, err := up.uploadNode(n) 765 if err == nil && up.statCache != nil { 766 up.statCache.AddCachedPutResult( 767 up.pwd, n.fullPath, n.fi, pr, withPermanode) 768 } 769 770 return pr, err 771 } 772 773 // NewTreeUpload returns a TreeUpload. It doesn't begin uploading any files until a 774 // call to Start 775 func (up *Uploader) NewTreeUpload(dir string) *TreeUpload { 776 tu := up.NewRootlessTreeUpload() 777 tu.rootless = false 778 tu.base = dir 779 return tu 780 } 781 782 func (up *Uploader) NewRootlessTreeUpload() *TreeUpload { 783 return &TreeUpload{ 784 rootless: true, 785 base: "", 786 up: up, 787 donec: make(chan bool, 1), 788 errc: make(chan error, 1), 789 stattedc: make(chan *node, buffered), 790 } 791 } 792 793 func (t *TreeUpload) Start() { 794 go t.run() 795 } 796 797 type node struct { 798 tu *TreeUpload // nil if not doing a tree upload 799 fullPath string 800 fi os.FileInfo 801 children []*node 802 803 // cond (and its &mu Lock) guard err and res. 804 cond sync.Cond // with L being &mu 805 mu sync.Mutex 806 err error 807 res *client.PutResult 808 809 sumBytes int64 // cached value, if non-zero. also guarded by mu. 810 } 811 812 func (n *node) String() string { 813 if n == nil { 814 return "<nil *node>" 815 } 816 return fmt.Sprintf("[node %s, isDir=%v, nchild=%d]", n.fullPath, n.fi.IsDir(), len(n.children)) 817 } 818 819 func (n *node) SetPutResult(res *client.PutResult, err error) { 820 n.mu.Lock() 821 defer n.mu.Unlock() 822 if res == nil && err == nil { 823 panic("SetPutResult called with (nil, nil)") 824 } 825 if n.res != nil || n.err != nil { 826 panic("SetPutResult called twice on node " + n.fullPath) 827 } 828 n.res, n.err = res, err 829 n.cond.Signal() 830 } 831 832 func (n *node) PutResult() (*client.PutResult, error) { 833 n.mu.Lock() 834 defer n.mu.Unlock() 835 for n.err == nil && n.res == nil { 836 n.cond.Wait() 837 } 838 return n.res, n.err 839 } 840 841 func (n *node) SumBytes() (v int64) { 842 n.mu.Lock() 843 defer n.mu.Unlock() 844 if n.sumBytes != 0 { 845 return n.sumBytes 846 } 847 for _, c := range n.children { 848 v += c.SumBytes() 849 } 850 if n.fi.Mode()&os.ModeType == 0 { 851 v += n.fi.Size() 852 } 853 n.sumBytes = v 854 return 855 } 856 857 /* 858 A TreeUpload holds the state of an ongoing recursive directory tree 859 upload. Call Wait to get the final result. 860 861 Uploading a directory tree involves several concurrent processes, each 862 which may involve multiple goroutines: 863 864 1) one process stats all files and walks all directories as fast as possible 865 to calculate how much total work there will be. this goroutine also 866 filters out directories to be skipped. (caches, temp files, skipDirs, etc) 867 868 2) one process works though the files that were discovered and checks 869 the statcache to see what actually needs to be uploaded. 870 The statcache is 871 full path => {last os.FileInfo signature, put result from last time} 872 and is used to avoid re-reading/digesting the file even locally, 873 trusting that it's already on the server. 874 875 3) one process uploads files & metadata. This process checks the "havecache" 876 to see which blobs are already on the server. For awhile the local havecache 877 (if configured) and the remote blobserver "stat" RPC are raced to determine 878 if the local havecache is even faster. If not, it's not consulted. But if the 879 latency of remote stats is high enough, checking locally is preferred. 880 */ 881 type TreeUpload struct { 882 // If DiskUsageMode is set true before Start, only 883 // per-directory disk usage stats are output, like the "du" 884 // command. 885 DiskUsageMode bool 886 887 // Immutable: 888 rootless bool // if true, "base" will be empty. 889 base string // base directory 890 up *Uploader 891 stattedc chan *node // from stat-the-world goroutine to run() 892 893 donec chan bool // closed when run() finishes 894 err error 895 errc chan error // with 1 buffer item 896 897 // Owned by run goroutine: 898 total stats // total bytes on disk 899 skipped stats // not even tried to upload (trusting stat cache) 900 uploaded stats // uploaded (even if server said it already had it and bytes weren't sent) 901 902 finalPutRes *client.PutResult // set after run() returns 903 } 904 905 // Enqueue starts uploading path (a file, directory, etc). 906 func (t *TreeUpload) Enqueue(path string) { 907 t.statPath(path, nil) 908 } 909 910 // fi is optional (will be statted if nil) 911 func (t *TreeUpload) statPath(fullPath string, fi os.FileInfo) (nod *node, err error) { 912 defer func() { 913 if err == nil && nod != nil { 914 t.stattedc <- nod 915 } 916 }() 917 if t.up.Client.IsIgnoredFile(fullPath) { 918 return nil, nil 919 } 920 if fi == nil { 921 fi, err = t.up.lstat(fullPath) 922 if err != nil { 923 return nil, err 924 } 925 } 926 n := &node{ 927 tu: t, 928 fullPath: fullPath, 929 fi: fi, 930 } 931 n.cond.L = &n.mu 932 933 if !fi.IsDir() { 934 return n, nil 935 } 936 f, err := t.up.open(fullPath) 937 if err != nil { 938 return nil, err 939 } 940 fis, err := f.Readdir(-1) 941 f.Close() 942 if err != nil { 943 return nil, err 944 } 945 sort.Sort(byTypeAndName(fis)) 946 for _, fi := range fis { 947 depn, err := t.statPath(filepath.Join(fullPath, filepath.Base(fi.Name())), fi) 948 if err != nil { 949 return nil, err 950 } 951 if depn != nil { 952 n.children = append(n.children, depn) 953 } 954 } 955 return n, nil 956 } 957 958 // testHookStatCache, if non-nil, runs first in the checkStatCache worker. 959 var testHookStatCache func(n *node, ok bool) 960 961 func (t *TreeUpload) run() { 962 defer close(t.donec) 963 964 // Kick off scanning all files, eventually learning the root 965 // node (which references all its children). 966 var root *node // nil until received and set in loop below. 967 rootc := make(chan *node, 1) 968 if !t.rootless { 969 go func() { 970 n, err := t.statPath(t.base, nil) 971 if err != nil { 972 log.Fatalf("Error scanning files under %s: %v", t.base, err) 973 } 974 close(t.stattedc) 975 rootc <- n 976 }() 977 } 978 979 var lastStat, lastUpload string 980 dumpStats := func() { 981 if android.IsChild() { 982 printAndroidCamputStatus(t) 983 return 984 } 985 statStatus := "" 986 if root == nil { 987 statStatus = fmt.Sprintf("last stat: %s", lastStat) 988 } 989 blobStats := t.up.Stats() 990 log.Printf("FILES: Total: %+v Skipped: %+v Uploaded: %+v %s BLOBS: %s Digested: %d last upload: %s", 991 t.total, t.skipped, t.uploaded, 992 statStatus, 993 blobStats.String(), 994 atomic.LoadInt64(&atomicDigestOps), 995 lastUpload) 996 } 997 998 // Channels for stats & progress bars. These are never closed: 999 uploadedc := make(chan *node) // at least tried to upload; server might have had blob 1000 skippedc := make(chan *node) // didn't even hit blobserver; trusted our stat cache 1001 1002 uploadsdonec := make(chan bool) 1003 var upload chan<- *node 1004 withPermanode := t.up.fileOpts.wantFilePermanode() 1005 if t.DiskUsageMode { 1006 upload = NewNodeWorker(1, func(n *node, ok bool) { 1007 if !ok { 1008 uploadsdonec <- true 1009 return 1010 } 1011 if n.fi.IsDir() { 1012 fmt.Printf("%d\t%s\n", n.SumBytes()>>10, n.fullPath) 1013 } 1014 }) 1015 } else { 1016 dirUpload := NewNodeWorker(dirUploadWorkers, func(n *node, ok bool) { 1017 if !ok { 1018 log.Printf("done uploading directories - done with all uploads.") 1019 uploadsdonec <- true 1020 return 1021 } 1022 put, err := t.up.uploadNode(n) 1023 if err != nil { 1024 log.Fatalf("Error uploading %s: %v", n.fullPath, err) 1025 } 1026 n.SetPutResult(put, nil) 1027 uploadedc <- n 1028 }) 1029 1030 upload = NewNodeWorker(uploadWorkers, func(n *node, ok bool) { 1031 if !ok { 1032 log.Printf("done with all uploads.") 1033 close(dirUpload) 1034 return 1035 } 1036 if n.fi.IsDir() { 1037 dirUpload <- n 1038 return 1039 } 1040 put, err := t.up.uploadNode(n) 1041 if err != nil { 1042 log.Fatalf("Error uploading %s: %v", n.fullPath, err) 1043 } 1044 n.SetPutResult(put, nil) 1045 if c := t.up.statCache; c != nil { 1046 c.AddCachedPutResult( 1047 t.up.pwd, n.fullPath, n.fi, put, withPermanode) 1048 } 1049 uploadedc <- n 1050 }) 1051 } 1052 1053 checkStatCache := NewNodeWorker(statCacheWorkers, func(n *node, ok bool) { 1054 if hook := testHookStatCache; hook != nil { 1055 hook(n, ok) 1056 } 1057 if !ok { 1058 if t.up.statCache != nil { 1059 log.Printf("done checking stat cache") 1060 } 1061 close(upload) 1062 return 1063 } 1064 if t.DiskUsageMode || t.up.statCache == nil { 1065 upload <- n 1066 return 1067 } 1068 if !n.fi.IsDir() { 1069 cachedRes, err := t.up.statCache.CachedPutResult( 1070 t.up.pwd, n.fullPath, n.fi, withPermanode) 1071 if err == nil { 1072 n.SetPutResult(cachedRes, nil) 1073 cachelog.Printf("Cache HIT on %q -> %v", n.fullPath, cachedRes) 1074 android.NoteFileUploaded(n.fullPath, false) 1075 skippedc <- n 1076 return 1077 } 1078 } 1079 upload <- n 1080 }) 1081 1082 ticker := time.NewTicker(500 * time.Millisecond) 1083 defer ticker.Stop() 1084 1085 stattedc := t.stattedc 1086 Loop: 1087 for { 1088 select { 1089 case <-uploadsdonec: 1090 break Loop 1091 case n := <-rootc: 1092 root = n 1093 case n := <-uploadedc: 1094 t.uploaded.incr(n) 1095 lastUpload = n.fullPath 1096 case n := <-skippedc: 1097 t.skipped.incr(n) 1098 case n, ok := <-stattedc: 1099 if !ok { 1100 log.Printf("done statting:") 1101 dumpStats() 1102 close(checkStatCache) 1103 stattedc = nil 1104 continue 1105 } 1106 lastStat = n.fullPath 1107 t.total.incr(n) 1108 checkStatCache <- n 1109 case <-ticker.C: 1110 dumpStats() 1111 } 1112 } 1113 1114 log.Printf("tree upload finished. final stats:") 1115 dumpStats() 1116 1117 if root == nil { 1118 panic("unexpected nil root node") 1119 } 1120 var err error 1121 log.Printf("Waiting on root node %q", root.fullPath) 1122 t.finalPutRes, err = root.PutResult() 1123 log.Printf("Waited on root node %q: %v", root.fullPath, t.finalPutRes) 1124 if err != nil { 1125 t.err = err 1126 } 1127 } 1128 1129 func (t *TreeUpload) Wait() (*client.PutResult, error) { 1130 <-t.donec 1131 // If an error is waiting and we don't otherwise have one, use it: 1132 if t.err == nil { 1133 select { 1134 case t.err = <-t.errc: 1135 default: 1136 } 1137 } 1138 if t.err == nil && t.finalPutRes == nil { 1139 panic("Nothing ever set t.finalPutRes, but no error set") 1140 } 1141 return t.finalPutRes, t.err 1142 } 1143 1144 type byTypeAndName []os.FileInfo 1145 1146 func (s byTypeAndName) Len() int { return len(s) } 1147 func (s byTypeAndName) Less(i, j int) bool { 1148 // files go before directories 1149 if s[i].IsDir() { 1150 if !s[j].IsDir() { 1151 return false 1152 } 1153 } else if s[j].IsDir() { 1154 return true 1155 } 1156 return s[i].Name() < s[j].Name() 1157 } 1158 func (s byTypeAndName) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 1159 1160 // trackDigestReader is an io.Reader wrapper which records the digest of what it reads. 1161 type trackDigestReader struct { 1162 r io.Reader 1163 h hash.Hash 1164 } 1165 1166 func (t *trackDigestReader) Read(p []byte) (n int, err error) { 1167 if t.h == nil { 1168 t.h = sha1.New() 1169 } 1170 n, err = t.r.Read(p) 1171 t.h.Write(p[:n]) 1172 return 1173 } 1174 1175 func (t *trackDigestReader) Sum() string { 1176 return fmt.Sprintf("sha1-%x", t.h.Sum(nil)) 1177 }