github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/index/corpus.go (about) 1 /* 2 Copyright 2013 The Camlistore Authors 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package index 18 19 import ( 20 "bytes" 21 "errors" 22 "fmt" 23 "log" 24 "os" 25 "runtime" 26 "sort" 27 "strconv" 28 "strings" 29 "sync" 30 "time" 31 32 "camlistore.org/pkg/blob" 33 "camlistore.org/pkg/context" 34 "camlistore.org/pkg/osutil" 35 "camlistore.org/pkg/schema" 36 "camlistore.org/pkg/sorted" 37 "camlistore.org/pkg/strutil" 38 "camlistore.org/pkg/syncutil" 39 "camlistore.org/pkg/types/camtypes" 40 ) 41 42 // Corpus is an in-memory summary of all of a user's blobs' metadata. 43 type Corpus struct { 44 mu sync.RWMutex 45 //mu syncutil.RWMutexTracker // when debugging 46 47 // building is true at start while scanning all rows in the 48 // index. While building, certain invariants (like things 49 // being sorted) can be temporarily violated and fixed at the 50 // end of scan. 51 building bool 52 53 // gen is incremented on every blob received. 54 // It's used as a query cache invalidator. 55 gen int64 56 57 strs map[string]string // interned strings 58 brOfStr map[string]blob.Ref // blob.Parse fast path 59 brInterns int64 // blob.Ref -> blob.Ref, via br method 60 61 blobs map[blob.Ref]*camtypes.BlobMeta 62 sumBlobBytes int64 63 64 // camlBlobs maps from camliType ("file") to blobref to the meta. 65 // The value is the same one in blobs. 66 camBlobs map[string]map[blob.Ref]*camtypes.BlobMeta 67 68 // TODO: add GoLLRB to third_party; keep sorted BlobMeta 69 keyId map[blob.Ref]string 70 files map[blob.Ref]camtypes.FileInfo 71 permanodes map[blob.Ref]*PermanodeMeta 72 imageInfo map[blob.Ref]camtypes.ImageInfo // keyed by fileref (not wholeref) 73 fileWholeRef map[blob.Ref]blob.Ref // fileref -> its wholeref (TODO: multi-valued?) 74 gps map[blob.Ref]latLong // wholeRef -> GPS coordinates 75 76 // edge tracks "forward" edges. e.g. from a directory's static-set to 77 // its members. Permanodes' camliMembers aren't tracked, since they 78 // can be obtained from permanodes.Claims. 79 // TODO: implement 80 edge map[blob.Ref][]edge 81 82 // edgeBack tracks "backward" edges. e.g. from a file back to 83 // any directories it's part of. 84 // The map is from target (e.g. file) => owner (static-set). 85 // This only tracks static data structures, not permanodes. 86 // TODO: implement 87 edgeBack map[blob.Ref]map[blob.Ref]bool 88 89 // claimBack allows hopping backwards from a Claim's Value 90 // when the Value is a blobref. It allows, for example, 91 // finding the parents of camliMember claims. If a permanode 92 // parent set A has a camliMembers B and C, it allows finding 93 // A from either B and C. 94 // The slice is not sorted. 95 claimBack map[blob.Ref][]*camtypes.Claim 96 97 // TOOD: use deletedCache instead? 98 deletedBy map[blob.Ref]blob.Ref // key is deleted by value 99 // deletes tracks deletions of claims and permanodes. The key is 100 // the blobref of a claim or permanode. The values, sorted newest first, 101 // contain the blobref of the claim responsible for the deletion, as well 102 // as the date when that deletion happened. 103 deletes map[blob.Ref][]deletion 104 105 mediaTags map[blob.Ref]map[string]string // wholeref -> "album" -> "foo" 106 107 // scratch string slice 108 ss []string 109 } 110 111 type latLong struct { 112 lat, long float64 113 } 114 115 // RLock locks the Corpus for reads. It must be used for any "Locked" methods. 116 func (c *Corpus) RLock() { c.mu.RLock() } 117 118 // RUnlock unlocks the Corpus for reads. 119 func (c *Corpus) RUnlock() { c.mu.RUnlock() } 120 121 // IsDeleted reports whether the provided blobref (of a permanode or claim) should be considered deleted. 122 func (c *Corpus) IsDeleted(br blob.Ref) bool { 123 c.RLock() 124 defer c.RUnlock() 125 return c.IsDeletedLocked(br) 126 } 127 128 // IsDeletedLocked is the version of IsDeleted that assumes the Corpus is already locked with RLock. 129 func (c *Corpus) IsDeletedLocked(br blob.Ref) bool { 130 for _, v := range c.deletes[br] { 131 if !c.IsDeletedLocked(v.deleter) { 132 return true 133 } 134 } 135 return false 136 } 137 138 type edge struct { 139 edgeType string 140 peer blob.Ref 141 } 142 143 type PermanodeMeta struct { 144 // TODO: OwnerKeyId string 145 Claims []*camtypes.Claim // sorted by camtypes.ClaimsByDate 146 } 147 148 func newCorpus() *Corpus { 149 return &Corpus{ 150 blobs: make(map[blob.Ref]*camtypes.BlobMeta), 151 camBlobs: make(map[string]map[blob.Ref]*camtypes.BlobMeta), 152 files: make(map[blob.Ref]camtypes.FileInfo), 153 permanodes: make(map[blob.Ref]*PermanodeMeta), 154 imageInfo: make(map[blob.Ref]camtypes.ImageInfo), 155 deletedBy: make(map[blob.Ref]blob.Ref), 156 keyId: make(map[blob.Ref]string), 157 brOfStr: make(map[string]blob.Ref), 158 fileWholeRef: make(map[blob.Ref]blob.Ref), 159 gps: make(map[blob.Ref]latLong), 160 mediaTags: make(map[blob.Ref]map[string]string), 161 deletes: make(map[blob.Ref][]deletion), 162 claimBack: make(map[blob.Ref][]*camtypes.Claim), 163 } 164 } 165 166 func NewCorpusFromStorage(s sorted.KeyValue) (*Corpus, error) { 167 if s == nil { 168 return nil, errors.New("storage is nil") 169 } 170 c := newCorpus() 171 return c, c.scanFromStorage(s) 172 } 173 174 func (x *Index) KeepInMemory() (*Corpus, error) { 175 var err error 176 x.corpus, err = NewCorpusFromStorage(x.s) 177 return x.corpus, err 178 } 179 180 // PreventStorageAccessForTesting causes any access to the index's underlying 181 // Storage interface to panic. 182 func (x *Index) PreventStorageAccessForTesting() { 183 x.s = crashStorage{} 184 } 185 186 type crashStorage struct { 187 sorted.KeyValue 188 } 189 190 func (crashStorage) Get(key string) (string, error) { 191 panic(fmt.Sprintf("unexpected KeyValue.Get(%q) called", key)) 192 } 193 194 func (crashStorage) Find(start, end string) sorted.Iterator { 195 panic(fmt.Sprintf("unexpected KeyValue.Find(%q, %q) called", start, end)) 196 } 197 198 // *********** Updating the corpus 199 200 var corpusMergeFunc = map[string]func(c *Corpus, k, v []byte) error{ 201 "have": nil, // redundant with "meta" 202 "recpn": nil, // unneeded. 203 "meta": (*Corpus).mergeMetaRow, 204 "signerkeyid": (*Corpus).mergeSignerKeyIdRow, 205 "claim": (*Corpus).mergeClaimRow, 206 "fileinfo": (*Corpus).mergeFileInfoRow, 207 "filetimes": (*Corpus).mergeFileTimesRow, 208 "imagesize": (*Corpus).mergeImageSizeRow, 209 "wholetofile": (*Corpus).mergeWholeToFileRow, 210 "exifgps": (*Corpus).mergeEXIFGPSRow, 211 "exiftag": nil, // not using any for now 212 "signerattrvalue": nil, // ignoring for now 213 "mediatag": (*Corpus).mergeMediaTag, 214 } 215 216 func memstats() *runtime.MemStats { 217 ms := new(runtime.MemStats) 218 runtime.GC() 219 runtime.ReadMemStats(ms) 220 return ms 221 } 222 223 var logCorpusStats = true // set to false in tests 224 225 var slurpPrefixes = []string{ 226 "meta:", // must be first 227 "signerkeyid:", 228 "claim|", 229 "fileinfo|", 230 "filetimes|", 231 "imagesize|", 232 "wholetofile|", 233 "exifgps|", 234 "mediatag|", 235 } 236 237 // Key types (without trailing punctuation) that we slurp to memory at start. 238 var slurpedKeyType = make(map[string]bool) 239 240 func init() { 241 for _, prefix := range slurpPrefixes { 242 slurpedKeyType[typeOfKey(prefix)] = true 243 } 244 } 245 246 func (c *Corpus) scanFromStorage(s sorted.KeyValue) error { 247 c.building = true 248 249 var ms0 *runtime.MemStats 250 if logCorpusStats { 251 ms0 = memstats() 252 log.Printf("Slurping corpus to memory from index...") 253 log.Printf("Slurping corpus to memory from index... (1/%d: meta rows)", len(slurpPrefixes)) 254 } 255 256 // We do the "meta" rows first, before the prefixes below, because it 257 // populates the blobs map (used for blobref interning) and the camBlobs 258 // map (used for hinting the size of other maps) 259 if err := c.scanPrefix(s, "meta:"); err != nil { 260 return err 261 } 262 c.files = make(map[blob.Ref]camtypes.FileInfo, len(c.camBlobs["file"])) 263 c.permanodes = make(map[blob.Ref]*PermanodeMeta, len(c.camBlobs["permanode"])) 264 cpu0 := osutil.CPUUsage() 265 266 var grp syncutil.Group 267 for i, prefix := range slurpPrefixes[1:] { 268 if logCorpusStats { 269 log.Printf("Slurping corpus to memory from index... (%d/%d: prefix %q)", i+2, len(slurpPrefixes), 270 prefix[:len(prefix)-1]) 271 } 272 prefix := prefix 273 grp.Go(func() error { return c.scanPrefix(s, prefix) }) 274 } 275 if err := grp.Err(); err != nil { 276 return err 277 } 278 279 // Post-load optimizations and restoration of invariants. 280 for _, pm := range c.permanodes { 281 // Restore invariants violated during building: 282 sort.Sort(camtypes.ClaimPtrsByDate(pm.Claims)) 283 284 // And intern some stuff. 285 for _, cl := range pm.Claims { 286 cl.BlobRef = c.br(cl.BlobRef) 287 cl.Signer = c.br(cl.Signer) 288 cl.Permanode = c.br(cl.Permanode) 289 cl.Target = c.br(cl.Target) 290 } 291 292 } 293 c.brOfStr = nil // drop this now. 294 c.building = false 295 // log.V(1).Printf("interned blob.Ref = %d", c.brInterns) 296 297 if err := c.initDeletes(s); err != nil { 298 return fmt.Errorf("Could not populate the corpus deletes: %v", err) 299 } 300 301 if logCorpusStats { 302 cpu := osutil.CPUUsage() - cpu0 303 ms1 := memstats() 304 memUsed := ms1.Alloc - ms0.Alloc 305 if ms1.Alloc < ms0.Alloc { 306 memUsed = 0 307 } 308 log.Printf("Corpus stats: %.3f MiB mem: %d blobs (%.3f GiB) (%d schema (%d permanode, %d file (%d image), ...)", 309 float64(memUsed)/(1<<20), 310 len(c.blobs), 311 float64(c.sumBlobBytes)/(1<<30), 312 c.numSchemaBlobsLocked(), 313 len(c.permanodes), 314 len(c.files), 315 len(c.imageInfo)) 316 log.Printf("Corpus scanning CPU usage: %v", cpu) 317 } 318 319 return nil 320 } 321 322 // initDeletes populates the corpus deletes from the delete entries in s. 323 func (c *Corpus) initDeletes(s sorted.KeyValue) (err error) { 324 it := queryPrefix(s, keyDeleted) 325 defer closeIterator(it, &err) 326 for it.Next() { 327 cl, ok := kvDeleted(it.Key()) 328 if !ok { 329 return fmt.Errorf("Bogus keyDeleted entry key: want |\"deleted\"|<deleted blobref>|<reverse claimdate>|<deleter claim>|, got %q", it.Key()) 330 } 331 targetDeletions := append(c.deletes[cl.Target], 332 deletion{ 333 deleter: cl.BlobRef, 334 when: cl.Date, 335 }) 336 sort.Sort(sort.Reverse(byDeletionDate(targetDeletions))) 337 c.deletes[cl.Target] = targetDeletions 338 } 339 return err 340 } 341 342 func (c *Corpus) numSchemaBlobsLocked() (n int64) { 343 for _, m := range c.camBlobs { 344 n += int64(len(m)) 345 } 346 return 347 } 348 349 func (c *Corpus) scanPrefix(s sorted.KeyValue, prefix string) (err error) { 350 typeKey := typeOfKey(prefix) 351 fn, ok := corpusMergeFunc[typeKey] 352 if !ok { 353 panic("No registered merge func for prefix " + prefix) 354 } 355 356 n, t0 := 0, time.Now() 357 it := queryPrefixString(s, prefix) 358 defer closeIterator(it, &err) 359 for it.Next() { 360 n++ 361 if n == 1 { 362 // Let the query be sent off and responses start flowing in before 363 // we take the lock. And if no rows: no lock. 364 c.mu.Lock() 365 defer c.mu.Unlock() 366 } 367 if err := fn(c, it.KeyBytes(), it.ValueBytes()); err != nil { 368 return err 369 } 370 } 371 if logCorpusStats { 372 d := time.Since(t0) 373 log.Printf("Scanned prefix %q: %d rows, %v", prefix[:len(prefix)-1], n, d) 374 } 375 return nil 376 } 377 378 func (c *Corpus) addBlob(br blob.Ref, mm *mutationMap) error { 379 c.mu.Lock() 380 defer c.mu.Unlock() 381 if _, dup := c.blobs[br]; dup { 382 return nil 383 } 384 c.gen++ 385 for k, v := range mm.kv { 386 kt := typeOfKey(k) 387 if !slurpedKeyType[kt] { 388 continue 389 } 390 if err := corpusMergeFunc[kt](c, []byte(k), []byte(v)); err != nil { 391 return err 392 } 393 } 394 for _, cl := range mm.deletes { 395 if err := c.updateDeletes(cl); err != nil { 396 return fmt.Errorf("Could not update the deletes cache after deletion from %v: %v", cl, err) 397 } 398 } 399 return nil 400 } 401 402 // updateDeletes updates the corpus deletes with the delete claim deleteClaim. 403 // deleteClaim is trusted to be a valid delete Claim. 404 func (c *Corpus) updateDeletes(deleteClaim schema.Claim) error { 405 target := c.br(deleteClaim.Target()) 406 deleter := deleteClaim.Blob() 407 when, err := deleter.ClaimDate() 408 if err != nil { 409 return fmt.Errorf("Could not get date of delete claim %v: %v", deleteClaim, err) 410 } 411 del := deletion{ 412 deleter: c.br(deleter.BlobRef()), 413 when: when, 414 } 415 for _, v := range c.deletes[target] { 416 if v == del { 417 return nil 418 } 419 } 420 targetDeletions := append(c.deletes[target], del) 421 sort.Sort(sort.Reverse(byDeletionDate(targetDeletions))) 422 c.deletes[target] = targetDeletions 423 return nil 424 } 425 426 func (c *Corpus) mergeMetaRow(k, v []byte) error { 427 bm, ok := kvBlobMeta_bytes(k, v) 428 if !ok { 429 return fmt.Errorf("bogus meta row: %q -> %q", k, v) 430 } 431 return c.mergeBlobMeta(bm) 432 } 433 434 func (c *Corpus) mergeBlobMeta(bm camtypes.BlobMeta) error { 435 if _, dup := c.blobs[bm.Ref]; dup { 436 panic("dup blob seen") 437 } 438 bm.CamliType = c.str(bm.CamliType) 439 440 c.blobs[bm.Ref] = &bm 441 c.sumBlobBytes += int64(bm.Size) 442 if bm.CamliType != "" { 443 m, ok := c.camBlobs[bm.CamliType] 444 if !ok { 445 m = make(map[blob.Ref]*camtypes.BlobMeta) 446 c.camBlobs[bm.CamliType] = m 447 } 448 m[bm.Ref] = &bm 449 } 450 return nil 451 } 452 453 func (c *Corpus) mergeSignerKeyIdRow(k, v []byte) error { 454 br, ok := blob.ParseBytes(k[len("signerkeyid:"):]) 455 if !ok { 456 return fmt.Errorf("bogus signerid row: %q -> %q", k, v) 457 } 458 c.keyId[br] = string(v) 459 return nil 460 } 461 462 func (c *Corpus) mergeClaimRow(k, v []byte) error { 463 // TODO: update kvClaim to take []byte instead of string 464 cl, ok := kvClaim(string(k), string(v), c.blobParse) 465 if !ok || !cl.Permanode.Valid() { 466 return fmt.Errorf("bogus claim row: %q -> %q", k, v) 467 } 468 cl.Type = c.str(cl.Type) 469 cl.Attr = c.str(cl.Attr) 470 cl.Value = c.str(cl.Value) // less likely to intern, but some (tags) do 471 472 pn := c.br(cl.Permanode) 473 pm, ok := c.permanodes[pn] 474 if !ok { 475 pm = new(PermanodeMeta) 476 c.permanodes[pn] = pm 477 } 478 pm.Claims = append(pm.Claims, &cl) 479 if !c.building { 480 // Unless we're still starting up (at which we sort at 481 // the end instead), keep this sorted. 482 sort.Sort(camtypes.ClaimPtrsByDate(pm.Claims)) 483 } 484 485 if vbr, ok := blob.Parse(cl.Value); ok { 486 c.claimBack[vbr] = append(c.claimBack[vbr], &cl) 487 } 488 return nil 489 } 490 491 func (c *Corpus) mergeFileInfoRow(k, v []byte) error { 492 // fileinfo|sha1-579f7f246bd420d486ddeb0dadbb256cfaf8bf6b" "5|some-stuff.txt|" 493 pipe := bytes.IndexByte(k, '|') 494 if pipe < 0 { 495 return fmt.Errorf("unexpected fileinfo key %q", k) 496 } 497 br, ok := blob.ParseBytes(k[pipe+1:]) 498 if !ok { 499 return fmt.Errorf("unexpected fileinfo blobref in key %q", k) 500 } 501 502 // TODO: could at least use strutil.ParseUintBytes to not stringify and retain 503 // the length bytes of v. 504 c.ss = strutil.AppendSplitN(c.ss[:0], string(v), "|", 3) 505 if len(c.ss) != 3 { 506 return fmt.Errorf("unexpected fileinfo value %q", k) 507 } 508 size, err := strconv.ParseInt(c.ss[0], 10, 64) 509 if err != nil { 510 return fmt.Errorf("unexpected fileinfo value %q", k) 511 } 512 c.mutateFileInfo(br, func(fi *camtypes.FileInfo) { 513 fi.Size = size 514 fi.FileName = c.str(urld(c.ss[1])) 515 fi.MIMEType = c.str(urld(c.ss[2])) 516 }) 517 return nil 518 } 519 520 func (c *Corpus) mergeFileTimesRow(k, v []byte) error { 521 if len(v) == 0 { 522 return nil 523 } 524 // "filetimes|sha1-579f7f246bd420d486ddeb0dadbb256cfaf8bf6b" "1970-01-01T00%3A02%3A03Z" 525 pipe := bytes.IndexByte(k, '|') 526 if pipe < 0 { 527 return fmt.Errorf("unexpected fileinfo key %q", k) 528 } 529 br, ok := blob.ParseBytes(k[pipe+1:]) 530 if !ok { 531 return fmt.Errorf("unexpected filetimes blobref in key %q", k) 532 } 533 c.ss = strutil.AppendSplitN(c.ss[:0], urld(string(v)), ",", -1) 534 times := c.ss 535 c.mutateFileInfo(br, func(fi *camtypes.FileInfo) { 536 updateFileInfoTimes(fi, times) 537 }) 538 return nil 539 } 540 541 func (c *Corpus) mutateFileInfo(br blob.Ref, fn func(*camtypes.FileInfo)) { 542 br = c.br(br) 543 fi := c.files[br] // use zero value if not present 544 fn(&fi) 545 c.files[br] = fi 546 } 547 548 func (c *Corpus) mergeImageSizeRow(k, v []byte) error { 549 br, okk := blob.ParseBytes(k[len("imagesize|"):]) 550 ii, okv := kvImageInfo(v) 551 if !okk || !okv { 552 return fmt.Errorf("bogus row %q = %q", k, v) 553 } 554 br = c.br(br) 555 c.imageInfo[br] = ii 556 return nil 557 } 558 559 // "wholetofile|sha1-17b53c7c3e664d3613dfdce50ef1f2a09e8f04b5|sha1-fb88f3eab3acfcf3cfc8cd77ae4366f6f975d227" -> "1" 560 func (c *Corpus) mergeWholeToFileRow(k, v []byte) error { 561 pair := k[len("wholetofile|"):] 562 pipe := bytes.IndexByte(pair, '|') 563 if pipe < 0 { 564 return fmt.Errorf("bogus row %q = %q", k, v) 565 } 566 wholeRef, ok1 := blob.ParseBytes(pair[:pipe]) 567 fileRef, ok2 := blob.ParseBytes(pair[pipe+1:]) 568 if !ok1 || !ok2 { 569 return fmt.Errorf("bogus row %q = %q", k, v) 570 } 571 c.fileWholeRef[fileRef] = wholeRef 572 return nil 573 } 574 575 // "mediatag|sha1-2b219be9d9691b4f8090e7ee2690098097f59566|album" = "Some+Album+Name" 576 func (c *Corpus) mergeMediaTag(k, v []byte) error { 577 f := strings.Split(string(k), "|") 578 if len(f) != 3 { 579 return fmt.Errorf("unexpected key %q", k) 580 } 581 wholeRef, ok := blob.Parse(f[1]) 582 if !ok { 583 return fmt.Errorf("failed to parse wholeref from key %q", k) 584 } 585 tm, ok := c.mediaTags[wholeRef] 586 if !ok { 587 tm = make(map[string]string) 588 c.mediaTags[wholeRef] = tm 589 } 590 tm[c.str(f[2])] = c.str(urld(string(v))) 591 return nil 592 } 593 594 // "exifgps|sha1-17b53c7c3e664d3613dfdce50ef1f2a09e8f04b5" -> "-122.39897155555556|37.61952208333334" 595 func (c *Corpus) mergeEXIFGPSRow(k, v []byte) error { 596 wholeRef, ok := blob.ParseBytes(k[len("exifgps|"):]) 597 pipe := bytes.IndexByte(v, '|') 598 if pipe < 0 || !ok { 599 return fmt.Errorf("bogus row %q = %q", k, v) 600 } 601 lat, err := strconv.ParseFloat(string(v[:pipe]), 64) 602 long, err1 := strconv.ParseFloat(string(v[pipe+1:]), 64) 603 if err != nil || err1 != nil { 604 return fmt.Errorf("bogus row %q = %q", k, v) 605 } 606 c.gps[wholeRef] = latLong{lat, long} 607 return nil 608 } 609 610 // This enables the blob.Parse fast path cache, which reduces CPU (via 611 // reduced GC from new garbage), but increases memory usage, even 612 // though it shouldn't. The GC should fully discard the brOfStr map 613 // (which we nil out at the end of parsing), but the Go GC doesn't 614 // seem to clear it all. 615 // TODO: investigate / file bugs. 616 const useBlobParseCache = false 617 618 func (c *Corpus) blobParse(v string) (br blob.Ref, ok bool) { 619 if useBlobParseCache { 620 br, ok = c.brOfStr[v] 621 if ok { 622 return 623 } 624 } 625 return blob.Parse(v) 626 } 627 628 // str returns s, interned. 629 func (c *Corpus) str(s string) string { 630 if s == "" { 631 return "" 632 } 633 if s, ok := c.strs[s]; ok { 634 return s 635 } 636 if c.strs == nil { 637 c.strs = make(map[string]string) 638 } 639 c.strs[s] = s 640 return s 641 } 642 643 // br returns br, interned. 644 func (c *Corpus) br(br blob.Ref) blob.Ref { 645 if bm, ok := c.blobs[br]; ok { 646 c.brInterns++ 647 return bm.Ref 648 } 649 return br 650 } 651 652 // *********** Reading from the corpus 653 654 // EnumerateCamliBlobsLocked sends just camlistore meta blobs to ch. 655 // 656 // The Corpus must already be locked with RLock. 657 // 658 // If camType is empty, all camlistore blobs are sent, otherwise it specifies 659 // the camliType to send. 660 // ch is closed at the end. The err will either be nil or context.ErrCanceled. 661 func (c *Corpus) EnumerateCamliBlobsLocked(ctx *context.Context, camType string, ch chan<- camtypes.BlobMeta) error { 662 defer close(ch) 663 for t, m := range c.camBlobs { 664 if camType != "" && camType != t { 665 continue 666 } 667 for _, bm := range m { 668 select { 669 case ch <- *bm: 670 case <-ctx.Done(): 671 return context.ErrCanceled 672 } 673 } 674 } 675 return nil 676 } 677 678 // EnumerateBlobMetaLocked sends all known blobs to ch, or until the context is canceled. 679 // 680 // The Corpus must already be locked with RLock. 681 func (c *Corpus) EnumerateBlobMetaLocked(ctx *context.Context, ch chan<- camtypes.BlobMeta) error { 682 defer close(ch) 683 for _, bm := range c.blobs { 684 select { 685 case ch <- *bm: 686 case <-ctx.Done(): 687 return context.ErrCanceled 688 } 689 } 690 return nil 691 } 692 693 // pnAndTime is a value type wrapping a permanode blobref and its modtime. 694 // It's used by EnumeratePermanodesLastModified and EnumeratePermanodesCreated. 695 type pnAndTime struct { 696 pn blob.Ref 697 t time.Time 698 } 699 700 type byPermanodeTime []pnAndTime 701 702 func (s byPermanodeTime) Len() int { return len(s) } 703 func (s byPermanodeTime) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 704 func (s byPermanodeTime) Less(i, j int) bool { 705 if s[i].t.Equal(s[j].t) { 706 return s[i].pn.Less(s[j].pn) 707 } 708 return s[i].t.Before(s[j].t) 709 } 710 711 func (c *Corpus) permanodesByModtimeLocked() []pnAndTime { 712 pns := make([]pnAndTime, 0, len(c.permanodes)) 713 for pn := range c.permanodes { 714 if c.IsDeletedLocked(pn) { 715 continue 716 } 717 if modt, ok := c.PermanodeModtimeLocked(pn); ok { 718 pns = append(pns, pnAndTime{pn, modt}) 719 } 720 } 721 return pns 722 } 723 724 // corpus must be (read) locked. 725 func (c *Corpus) sendPermanodes(ctx *context.Context, ch chan<- camtypes.BlobMeta, pns []pnAndTime) error { 726 for _, cand := range pns { 727 bm := c.blobs[cand.pn] 728 if bm == nil { 729 continue 730 } 731 select { 732 case ch <- *bm: 733 continue 734 case <-ctx.Done(): 735 return context.ErrCanceled 736 } 737 } 738 return nil 739 } 740 741 // EnumeratePermanodesLastModified sends all permanodes, sorted by most recently modified first, to ch, 742 // or until ctx is done. 743 // 744 // The Corpus must already be locked with RLock. 745 func (c *Corpus) EnumeratePermanodesLastModifiedLocked(ctx *context.Context, ch chan<- camtypes.BlobMeta) error { 746 defer close(ch) 747 748 pns := c.permanodesByModtimeLocked() 749 sort.Sort(sort.Reverse(byPermanodeTime(pns))) 750 return c.sendPermanodes(ctx, ch, pns) 751 } 752 753 func (c *Corpus) permanodesByTimeLocked() []pnAndTime { 754 // TODO: cache this 755 pns := make([]pnAndTime, 0, len(c.permanodes)) 756 for pn := range c.permanodes { 757 if c.IsDeletedLocked(pn) { 758 continue 759 } 760 if pt, ok := c.PermanodeAnyTimeLocked(pn); ok { 761 pns = append(pns, pnAndTime{pn, pt}) 762 } 763 } 764 return pns 765 } 766 767 // EnumeratePermanodesCreatedLocked sends all permanodes to ch, or until ctx is done. 768 // They are sorted using the contents creation date if any, the permanode modtime 769 // otherwise, and in the order specified by newestFirst. 770 // 771 // The Corpus must already be locked with RLock. 772 func (c *Corpus) EnumeratePermanodesCreatedLocked(ctx *context.Context, ch chan<- camtypes.BlobMeta, newestFirst bool) error { 773 defer close(ch) 774 775 pns := c.permanodesByTimeLocked() 776 if newestFirst { 777 sort.Sort(sort.Reverse(byPermanodeTime(pns))) 778 } else { 779 sort.Sort(byPermanodeTime(pns)) 780 } 781 782 return c.sendPermanodes(ctx, ch, pns) 783 } 784 785 func (c *Corpus) GetBlobMeta(br blob.Ref) (camtypes.BlobMeta, error) { 786 c.mu.RLock() 787 defer c.mu.RUnlock() 788 return c.GetBlobMetaLocked(br) 789 } 790 791 func (c *Corpus) GetBlobMetaLocked(br blob.Ref) (camtypes.BlobMeta, error) { 792 bm, ok := c.blobs[br] 793 if !ok { 794 return camtypes.BlobMeta{}, os.ErrNotExist 795 } 796 return *bm, nil 797 } 798 799 func (c *Corpus) KeyId(signer blob.Ref) (string, error) { 800 c.mu.RLock() 801 defer c.mu.RUnlock() 802 if v, ok := c.keyId[signer]; ok { 803 return v, nil 804 } 805 return "", sorted.ErrNotFound 806 } 807 808 var ( 809 errUnsupportedNodeType = errors.New("unsupported nodeType") 810 errNoNodeAttr = errors.New("attribute not found") 811 ) 812 813 // typeSpecificNodeTimeLocked returns the time that is set as a specific permanode attribute. 814 // That attribute, if any, depends on the nodeType ("camliNodeType" attribute) value, which 815 // may be empty as well. 816 func (c *Corpus) typeSpecificNodeTimeLocked(nodeType string, pn blob.Ref) (t time.Time, err error) { 817 attr := "" 818 switch nodeType { 819 case "foursquare.com:checkin": 820 attr = "startDate" 821 // TODO(mpl): other nodeTypes from importers 822 default: 823 return t, errUnsupportedNodeType 824 } 825 timeStr := c.PermanodeAttrValueLocked(pn, attr, time.Time{}, blob.Ref{}) 826 if timeStr == "" { 827 return t, errNoNodeAttr 828 } 829 return time.Parse(time.RFC3339, timeStr) 830 } 831 832 // PermanodeTimeLocked returns the time of the content in permanode. 833 func (c *Corpus) PermanodeTimeLocked(pn blob.Ref) (t time.Time, ok bool) { 834 // TODO(bradfitz): keep this time property cached on the permanode / files 835 836 // TODO(bradfitz): finish implmenting all these 837 838 // Priorities: 839 // -- Permanode explicit "camliTime" property 840 // -- EXIF GPS time 841 // -- Exif camera time - this one is actually already in the FileInfo, 842 // because we use schema.FileTime (which returns the EXIF time, if available) 843 // to index the time when receiving a file. 844 // -- File time 845 // -- File modtime 846 // -- camliContent claim set time 847 848 // First check the type-specific time (e.g. from importers) 849 nodeType := c.PermanodeAttrValueLocked(pn, "camliNodeType", time.Time{}, blob.Ref{}) 850 if nodeType != "" { 851 if t, err := c.typeSpecificNodeTimeLocked(nodeType, pn); err == nil { 852 return t, true 853 } 854 } 855 856 // Otherwise check time from the FileInfo 857 ccRef, ccTime, ok := c.pnCamliContentLocked(pn) 858 if !ok { 859 return 860 } 861 862 fi, ok := c.files[ccRef] 863 if ok { 864 if fi.Time != nil { 865 return time.Time(*fi.Time), true 866 } 867 if fi.ModTime != nil { 868 return time.Time(*fi.ModTime), true 869 } 870 } 871 return ccTime, true 872 } 873 874 // PermanodeAnyTimeLocked returns the time that best qualifies the permanode. 875 // It tries content-specific times first, the permanode modtime otherwise. 876 func (c *Corpus) PermanodeAnyTimeLocked(pn blob.Ref) (t time.Time, ok bool) { 877 if t, ok := c.PermanodeTimeLocked(pn); ok { 878 return t, ok 879 } 880 return c.PermanodeModtimeLocked(pn) 881 } 882 883 func (c *Corpus) pnCamliContentLocked(pn blob.Ref) (cc blob.Ref, t time.Time, ok bool) { 884 // TODO(bradfitz): keep this property cached 885 pm, ok := c.permanodes[pn] 886 if !ok { 887 return 888 } 889 for _, cl := range pm.Claims { 890 if cl.Attr != "camliContent" { 891 continue 892 } 893 // TODO: pass down the 'PermanodeConstraint.At' parameter, and then do: if cl.Date.After(at) { continue } 894 switch cl.Type { 895 case string(schema.DelAttributeClaim): 896 cc = blob.Ref{} 897 t = time.Time{} 898 case string(schema.SetAttributeClaim): 899 cc = blob.ParseOrZero(cl.Value) 900 t = cl.Date 901 } 902 } 903 return cc, t, cc.Valid() 904 905 } 906 907 // PermanodeModtime returns the latest modification time of the given 908 // permanode. 909 // 910 // The ok value is true only if the permanode is known and has any 911 // non-deleted claims. A deleted claim is ignored and neither its 912 // claim date nor the date of the delete claim affect the modtime of 913 // the permanode. 914 func (c *Corpus) PermanodeModtime(pn blob.Ref) (t time.Time, ok bool) { 915 // TODO: figure out behavior wrt mutations by different people 916 c.mu.RLock() 917 defer c.mu.RUnlock() 918 return c.PermanodeModtimeLocked(pn) 919 } 920 921 // PermanodeModtimeLocked is like PermanodeModtime but for when the Corpus is 922 // already locked via RLock. 923 func (c *Corpus) PermanodeModtimeLocked(pn blob.Ref) (t time.Time, ok bool) { 924 pm, ok := c.permanodes[pn] 925 if !ok { 926 return 927 } 928 929 // Note: We intentionally don't try to derive any information 930 // (except the owner, elsewhere) from the permanode blob 931 // itself. Even though the permanode blob sometimes has the 932 // GPG signature time, we intentionally ignore it. 933 for _, cl := range pm.Claims { 934 if c.IsDeletedLocked(cl.BlobRef) { 935 continue 936 } 937 if cl.Date.After(t) { 938 t = cl.Date 939 } 940 } 941 return t, !t.IsZero() 942 } 943 944 // AppendPermanodeAttrValues appends to dst all the values for the attribute 945 // attr set on permaNode. 946 // signerFilter is optional. 947 // dst must start with length 0 (laziness, mostly) 948 func (c *Corpus) AppendPermanodeAttrValues(dst []string, 949 permaNode blob.Ref, 950 attr string, 951 at time.Time, 952 signerFilter blob.Ref) []string { 953 c.mu.RLock() 954 defer c.mu.RUnlock() 955 return c.AppendPermanodeAttrValuesLocked(dst, permaNode, attr, at, signerFilter) 956 } 957 958 // PermanodeAttrValueLocked returns a single-valued attribute or "". 959 func (c *Corpus) PermanodeAttrValueLocked(permaNode blob.Ref, 960 attr string, 961 at time.Time, 962 signerFilter blob.Ref) string { 963 pm, ok := c.permanodes[permaNode] 964 if !ok { 965 return "" 966 } 967 if at.IsZero() { 968 at = time.Now() 969 } 970 var v string 971 for _, cl := range pm.Claims { 972 if cl.Attr != attr || cl.Date.After(at) { 973 continue 974 } 975 if signerFilter.Valid() && signerFilter != cl.Signer { 976 continue 977 } 978 switch cl.Type { 979 case string(schema.DelAttributeClaim): 980 if cl.Value == "" { 981 v = "" 982 } else if v == cl.Value { 983 v = "" 984 } 985 case string(schema.SetAttributeClaim): 986 v = cl.Value 987 case string(schema.AddAttributeClaim): 988 if v == "" { 989 v = cl.Value 990 } 991 } 992 } 993 return v 994 } 995 996 func (c *Corpus) AppendPermanodeAttrValuesLocked(dst []string, 997 permaNode blob.Ref, 998 attr string, 999 at time.Time, 1000 signerFilter blob.Ref) []string { 1001 if len(dst) > 0 { 1002 panic("len(dst) must be 0") 1003 } 1004 pm, ok := c.permanodes[permaNode] 1005 if !ok { 1006 return dst 1007 } 1008 if at.IsZero() { 1009 at = time.Now() 1010 } 1011 for _, cl := range pm.Claims { 1012 if cl.Attr != attr || cl.Date.After(at) { 1013 continue 1014 } 1015 if signerFilter.Valid() && signerFilter != cl.Signer { 1016 continue 1017 } 1018 switch cl.Type { 1019 case string(schema.DelAttributeClaim): 1020 if cl.Value == "" { 1021 dst = dst[:0] // delete all 1022 } else { 1023 for i := 0; i < len(dst); i++ { 1024 v := dst[i] 1025 if v == cl.Value { 1026 copy(dst[i:], dst[i+1:]) 1027 dst = dst[:len(dst)-1] 1028 i-- 1029 } 1030 } 1031 } 1032 case string(schema.SetAttributeClaim): 1033 dst = append(dst[:0], cl.Value) 1034 case string(schema.AddAttributeClaim): 1035 dst = append(dst, cl.Value) 1036 } 1037 } 1038 return dst 1039 } 1040 1041 func (c *Corpus) AppendClaims(dst []camtypes.Claim, permaNode blob.Ref, 1042 signerFilter blob.Ref, 1043 attrFilter string) ([]camtypes.Claim, error) { 1044 c.mu.RLock() 1045 defer c.mu.RUnlock() 1046 pm, ok := c.permanodes[permaNode] 1047 if !ok { 1048 return nil, nil 1049 } 1050 for _, cl := range pm.Claims { 1051 if c.IsDeletedLocked(cl.BlobRef) { 1052 continue 1053 } 1054 if signerFilter.Valid() && cl.Signer != signerFilter { 1055 continue 1056 } 1057 if attrFilter != "" && cl.Attr != attrFilter { 1058 continue 1059 } 1060 dst = append(dst, *cl) 1061 } 1062 return dst, nil 1063 } 1064 1065 func (c *Corpus) GetFileInfo(fileRef blob.Ref) (fi camtypes.FileInfo, err error) { 1066 c.mu.RLock() 1067 defer c.mu.RUnlock() 1068 return c.GetFileInfoLocked(fileRef) 1069 } 1070 1071 func (c *Corpus) GetFileInfoLocked(fileRef blob.Ref) (fi camtypes.FileInfo, err error) { 1072 fi, ok := c.files[fileRef] 1073 if !ok { 1074 err = os.ErrNotExist 1075 } 1076 return 1077 } 1078 1079 func (c *Corpus) GetImageInfo(fileRef blob.Ref) (ii camtypes.ImageInfo, err error) { 1080 c.mu.RLock() 1081 defer c.mu.RUnlock() 1082 return c.GetImageInfoLocked(fileRef) 1083 } 1084 1085 func (c *Corpus) GetImageInfoLocked(fileRef blob.Ref) (ii camtypes.ImageInfo, err error) { 1086 ii, ok := c.imageInfo[fileRef] 1087 if !ok { 1088 err = os.ErrNotExist 1089 } 1090 return 1091 } 1092 1093 func (c *Corpus) GetMediaTags(fileRef blob.Ref) (map[string]string, error) { 1094 c.mu.RLock() 1095 defer c.mu.RUnlock() 1096 return c.GetMediaTagsLocked(fileRef) 1097 } 1098 1099 func (c *Corpus) GetMediaTagsLocked(fileRef blob.Ref) (map[string]string, error) { 1100 wholeRef, ok := c.fileWholeRef[fileRef] 1101 if !ok { 1102 return nil, os.ErrNotExist 1103 } 1104 tags, ok := c.mediaTags[wholeRef] 1105 if !ok { 1106 return nil, os.ErrNotExist 1107 } 1108 return tags, nil 1109 } 1110 1111 func (c *Corpus) FileLatLongLocked(fileRef blob.Ref) (lat, long float64, ok bool) { 1112 wholeRef, ok := c.fileWholeRef[fileRef] 1113 if !ok { 1114 return 1115 } 1116 ll, ok := c.gps[wholeRef] 1117 if !ok { 1118 return 1119 } 1120 return ll.lat, ll.long, true 1121 } 1122 1123 // zero value of at means current 1124 func (c *Corpus) PermanodeLatLongLocked(pn blob.Ref, at time.Time) (lat, long float64, ok bool) { 1125 nodeType := c.PermanodeAttrValueLocked(pn, "camliNodeType", at, blob.Ref{}) 1126 if nodeType == "" { 1127 return 1128 } 1129 // TODO: make these pluggable, e.g. registered from an importer or something? 1130 // How will that work when they're out-of-process? 1131 if nodeType == "foursquare.com:checkin" { 1132 venuePn, hasVenue := blob.Parse(c.PermanodeAttrValueLocked(pn, "foursquareVenuePermanode", at, blob.Ref{})) 1133 if !hasVenue { 1134 return 1135 } 1136 return c.PermanodeLatLongLocked(venuePn, at) 1137 } 1138 if nodeType == "foursquare.com:venue" { 1139 var err error 1140 lat, err = strconv.ParseFloat(c.PermanodeAttrValueLocked(pn, "latitude", at, blob.Ref{}), 64) 1141 if err != nil { 1142 return 1143 } 1144 long, err = strconv.ParseFloat(c.PermanodeAttrValueLocked(pn, "longitude", at, blob.Ref{}), 64) 1145 if err != nil { 1146 return 1147 } 1148 return lat, long, true 1149 } 1150 return 1151 } 1152 1153 // ForeachClaimBackLocked calls fn for each claim with a value referencing br. 1154 // If at is zero, all claims are yielded. 1155 // If at is non-zero, claims after that point are skipped. 1156 // If fn returns false, iteration ends. 1157 // Iteration is in an undefined order. 1158 func (c *Corpus) ForeachClaimBackLocked(value blob.Ref, at time.Time, fn func(*camtypes.Claim) bool) { 1159 for _, cl := range c.claimBack[value] { 1160 if !at.IsZero() && cl.Date.After(at) { 1161 continue 1162 } 1163 if !fn(cl) { 1164 return 1165 } 1166 } 1167 } 1168 1169 // PermanodeHasAttrValueLocked reports whether the permanode pn at 1170 // time at (zero means now) has the given attribute with the given 1171 // value. If the attribute is multi-valued, any may match. 1172 func (c *Corpus) PermanodeHasAttrValueLocked(pn blob.Ref, at time.Time, attr, val string) bool { 1173 pm, ok := c.permanodes[pn] 1174 if !ok { 1175 return false 1176 } 1177 if at.IsZero() { 1178 at = time.Now() 1179 } 1180 ret := false 1181 for _, cl := range pm.Claims { 1182 if cl.Attr != attr { 1183 continue 1184 } 1185 if cl.Date.After(at) { 1186 break 1187 } 1188 switch cl.Type { 1189 case string(schema.DelAttributeClaim): 1190 if cl.Value == "" || cl.Value == val { 1191 ret = false 1192 } 1193 case string(schema.SetAttributeClaim): 1194 ret = (cl.Value == val) 1195 case string(schema.AddAttributeClaim): 1196 if cl.Value == val { 1197 return true 1198 } 1199 } 1200 } 1201 return ret 1202 } 1203 1204 // SetVerboseCorpusLogging controls corpus setup verbosity. It's on by default 1205 // but used to disable verbose logging in tests. 1206 func SetVerboseCorpusLogging(v bool) { 1207 logCorpusStats = v 1208 }