github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/pkg/index/index.go (about) 1 /* 2 Copyright 2011 Google Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package index 18 19 import ( 20 "bytes" 21 "errors" 22 "fmt" 23 "io" 24 "log" 25 "os" 26 "sort" 27 "strconv" 28 "strings" 29 "sync" 30 "time" 31 32 "camlistore.org/pkg/blob" 33 "camlistore.org/pkg/blobserver" 34 "camlistore.org/pkg/context" 35 "camlistore.org/pkg/jsonconfig" 36 "camlistore.org/pkg/schema" 37 "camlistore.org/pkg/sorted" 38 "camlistore.org/pkg/strutil" 39 "camlistore.org/pkg/types" 40 "camlistore.org/pkg/types/camtypes" 41 ) 42 43 func init() { 44 blobserver.RegisterStorageConstructor("index", newFromConfig) 45 } 46 47 type Index struct { 48 *blobserver.NoImplStorage 49 50 s sorted.KeyValue 51 52 KeyFetcher blob.Fetcher // for verifying claims 53 54 // BlobSource is used for fetching blobs when indexing files and other 55 // blobs types that reference other objects. 56 BlobSource blobserver.FetcherEnumerator 57 58 // TODO(mpl): do not init and use deletes when we have a corpus. Since corpus has its own deletes now, they are redundant. 59 60 // deletes is a cache to keep track of the deletion status (deleted vs undeleted) 61 // of the blobs in the index. It makes for faster reads than the otherwise 62 // recursive calls on the index. 63 deletes *deletionCache 64 65 corpus *Corpus // or nil, if not being kept in memory 66 67 mu sync.Mutex // guards following 68 // needs maps from a blob to the missing blobs it needs to 69 // finish indexing. 70 needs map[blob.Ref][]blob.Ref 71 // neededBy is the inverse of needs. The keys are missing blobs 72 // and the value(s) are blobs waiting to be reindexed. 73 neededBy map[blob.Ref][]blob.Ref 74 readyReindex map[blob.Ref]bool // set of things ready to be re-indexed 75 76 tickleOoo chan bool // tickle out-of-order reindex loop, whenever readyReindex is added to 77 } 78 79 var ( 80 _ blobserver.Storage = (*Index)(nil) 81 _ Interface = (*Index)(nil) 82 ) 83 84 var aboutToReindex = false 85 86 // SetImpendingReindex notes that the user ran the camlistored binary with the --reindex flag. 87 // Because the index is about to be wiped, schema version checks should be suppressed. 88 func SetImpendingReindex() { 89 // TODO: remove this function, once we refactor how indexes are created. 90 // They'll probably not all have their own storage constructor registered. 91 aboutToReindex = true 92 } 93 94 // MustNew is wraps New and fails with a Fatal error on t if New 95 // returns an error. 96 func MustNew(t types.TB, s sorted.KeyValue) *Index { 97 ix, err := New(s) 98 if err != nil { 99 t.Fatalf("Error creating index: %v", err) 100 } 101 return ix 102 } 103 104 // New returns a new index using the provided key/value storage implementation. 105 func New(s sorted.KeyValue) (*Index, error) { 106 idx := &Index{ 107 s: s, 108 needs: make(map[blob.Ref][]blob.Ref), 109 neededBy: make(map[blob.Ref][]blob.Ref), 110 readyReindex: make(map[blob.Ref]bool), 111 tickleOoo: make(chan bool, 1), 112 } 113 if aboutToReindex { 114 idx.deletes = newDeletionCache() 115 go idx.outOfOrderIndexerLoop() 116 return idx, nil 117 } 118 119 schemaVersion := idx.schemaVersion() 120 switch { 121 case schemaVersion == 0 && idx.isEmpty(): 122 // New index. 123 err := idx.s.Set(keySchemaVersion.name, fmt.Sprint(requiredSchemaVersion)) 124 if err != nil { 125 return nil, fmt.Errorf("Could not write index schema version %q: %v", requiredSchemaVersion, err) 126 } 127 case schemaVersion != requiredSchemaVersion: 128 tip := "" 129 if os.Getenv("CAMLI_DEV_CAMLI_ROOT") != "" { 130 // Good signal that we're using the devcam server, so help out 131 // the user with a more useful tip: 132 tip = `(For the dev server, run "devcam server --wipe" to wipe both your blobs and index)` 133 } else { 134 tip = "Run 'camlistored --reindex' (it might take awhile, but shows status). Alternative: 'camtool dbinit' (or just delete the file for a file based index), and then 'camtool sync --all'" 135 } 136 return nil, fmt.Errorf("index schema version is %d; required one is %d. You need to reindex. %s", 137 schemaVersion, requiredSchemaVersion, tip) 138 } 139 if err := idx.initDeletesCache(); err != nil { 140 return nil, fmt.Errorf("Could not initialize index's deletes cache: %v", err) 141 } 142 if err := idx.initNeededMaps(); err != nil { 143 return nil, fmt.Errorf("Could not initialize index's missing blob maps: %v", err) 144 } 145 go idx.outOfOrderIndexerLoop() 146 return idx, nil 147 } 148 149 func newFromConfig(ld blobserver.Loader, config jsonconfig.Obj) (blobserver.Storage, error) { 150 blobPrefix := config.RequiredString("blobSource") 151 kvConfig := config.RequiredObject("storage") 152 if err := config.Validate(); err != nil { 153 return nil, err 154 } 155 kv, err := sorted.NewKeyValue(kvConfig) 156 if err != nil { 157 return nil, err 158 } 159 160 ix, err := New(kv) 161 if err != nil { 162 return nil, err 163 } 164 165 sto, err := ld.GetStorage(blobPrefix) 166 if err != nil { 167 ix.Close() 168 return nil, err 169 } 170 ix.BlobSource = sto 171 172 // Good enough, for now: 173 ix.KeyFetcher = ix.BlobSource 174 175 return ix, err 176 } 177 178 func (x *Index) String() string { 179 return fmt.Sprintf("Camlistore index, using key/value implementation %T", x.s) 180 } 181 182 func (x *Index) isEmpty() bool { 183 iter := x.s.Find("", "") 184 hasRows := iter.Next() 185 if err := iter.Close(); err != nil { 186 panic(err) 187 } 188 return !hasRows 189 } 190 191 func (x *Index) Reindex() error { 192 ctx := context.TODO() 193 194 wiper, ok := x.s.(sorted.Wiper) 195 if !ok { 196 return fmt.Errorf("index's storage type %T doesn't support sorted.Wiper", x.s) 197 } 198 log.Printf("Wiping index storage type %T ...", x.s) 199 if err := wiper.Wipe(); err != nil { 200 return fmt.Errorf("error wiping index's sorted key/value type %T: %v", x.s, err) 201 } 202 log.Printf("Index wiped. Rebuilding...") 203 204 reindexStart, _ := blob.Parse(os.Getenv("CAMLI_REINDEX_START")) 205 206 err := x.s.Set(keySchemaVersion.name, fmt.Sprintf("%d", requiredSchemaVersion)) 207 if err != nil { 208 return err 209 } 210 211 var nerrmu sync.Mutex 212 nerr := 0 213 214 blobc := make(chan blob.Ref, 32) 215 216 enumCtx := ctx.New() 217 enumErr := make(chan error, 1) 218 go func() { 219 defer close(blobc) 220 donec := enumCtx.Done() 221 var lastTick time.Time 222 enumErr <- blobserver.EnumerateAll(enumCtx, x.BlobSource, func(sb blob.SizedRef) error { 223 now := time.Now() 224 if lastTick.Before(now.Add(-1 * time.Second)) { 225 log.Printf("Reindexing at %v", sb.Ref) 226 lastTick = now 227 } 228 if reindexStart.Valid() && sb.Ref.Less(reindexStart) { 229 return nil 230 } 231 select { 232 case <-donec: 233 return context.ErrCanceled 234 case blobc <- sb.Ref: 235 return nil 236 } 237 }) 238 }() 239 const j = 4 // arbitrary concurrency level 240 var wg sync.WaitGroup 241 for i := 0; i < j; i++ { 242 wg.Add(1) 243 go func() { 244 defer wg.Done() 245 for br := range blobc { 246 if err := x.indexBlob(br); err != nil { 247 log.Printf("Error reindexing %v: %v", br, err) 248 nerrmu.Lock() 249 nerr++ 250 nerrmu.Unlock() 251 // TODO: flag (or default?) to stop the EnumerateAll above once 252 // there's any error with reindexing? 253 } 254 } 255 }() 256 } 257 if err := <-enumErr; err != nil { 258 return err 259 } 260 261 wg.Wait() 262 263 x.mu.Lock() 264 readyCount := len(x.readyReindex) 265 x.mu.Unlock() 266 if readyCount > 0 { 267 return fmt.Errorf("%d blobs were ready to reindex in out-of-order queue, but not yet ran", readyCount) 268 } 269 270 log.Printf("Index rebuild complete.") 271 nerrmu.Lock() // no need to unlock 272 if nerr != 0 { 273 return fmt.Errorf("%d blobs failed to re-index", nerr) 274 } 275 if err := x.initDeletesCache(); err != nil { 276 return err 277 } 278 return nil 279 } 280 281 func queryPrefixString(s sorted.KeyValue, prefix string) sorted.Iterator { 282 if prefix == "" { 283 return s.Find("", "") 284 } 285 lastByte := prefix[len(prefix)-1] 286 if lastByte == 0xff { 287 panic("unsupported query prefix ending in 0xff") 288 } 289 end := prefix[:len(prefix)-1] + string(lastByte+1) 290 return s.Find(prefix, end) 291 } 292 293 func (x *Index) queryPrefixString(prefix string) sorted.Iterator { 294 return queryPrefixString(x.s, prefix) 295 } 296 297 func queryPrefix(s sorted.KeyValue, key *keyType, args ...interface{}) sorted.Iterator { 298 return queryPrefixString(s, key.Prefix(args...)) 299 } 300 301 func (x *Index) queryPrefix(key *keyType, args ...interface{}) sorted.Iterator { 302 return x.queryPrefixString(key.Prefix(args...)) 303 } 304 305 func closeIterator(it sorted.Iterator, perr *error) { 306 err := it.Close() 307 if err != nil && *perr == nil { 308 *perr = err 309 } 310 } 311 312 // schemaVersion returns the version of schema as it is found 313 // in the currently used index. If not found, it returns 0. 314 func (x *Index) schemaVersion() int { 315 schemaVersionStr, err := x.s.Get(keySchemaVersion.name) 316 if err != nil { 317 if err == sorted.ErrNotFound { 318 return 0 319 } 320 panic(fmt.Sprintf("Could not get index schema version: %v", err)) 321 } 322 schemaVersion, err := strconv.Atoi(schemaVersionStr) 323 if err != nil { 324 panic(fmt.Sprintf("Bogus index schema version: %q", schemaVersionStr)) 325 } 326 return schemaVersion 327 } 328 329 type deletion struct { 330 deleter blob.Ref 331 when time.Time 332 } 333 334 type byDeletionDate []deletion 335 336 func (d byDeletionDate) Len() int { return len(d) } 337 func (d byDeletionDate) Swap(i, j int) { d[i], d[j] = d[j], d[i] } 338 func (d byDeletionDate) Less(i, j int) bool { return d[i].when.Before(d[j].when) } 339 340 type deletionCache struct { 341 sync.RWMutex 342 m map[blob.Ref][]deletion 343 } 344 345 func newDeletionCache() *deletionCache { 346 return &deletionCache{ 347 m: make(map[blob.Ref][]deletion), 348 } 349 } 350 351 // initDeletesCache creates and populates the deletion status cache used by the index 352 // for faster calls to IsDeleted and DeletedAt. It is called by New. 353 func (x *Index) initDeletesCache() (err error) { 354 x.deletes = newDeletionCache() 355 it := x.queryPrefix(keyDeleted) 356 defer closeIterator(it, &err) 357 for it.Next() { 358 cl, ok := kvDeleted(it.Key()) 359 if !ok { 360 return fmt.Errorf("Bogus keyDeleted entry key: want |\"deleted\"|<deleted blobref>|<reverse claimdate>|<deleter claim>|, got %q", it.Key()) 361 } 362 targetDeletions := append(x.deletes.m[cl.Target], 363 deletion{ 364 deleter: cl.BlobRef, 365 when: cl.Date, 366 }) 367 sort.Sort(sort.Reverse(byDeletionDate(targetDeletions))) 368 x.deletes.m[cl.Target] = targetDeletions 369 } 370 return err 371 } 372 373 func kvDeleted(k string) (c camtypes.Claim, ok bool) { 374 // TODO(bradfitz): garbage 375 keyPart := strings.Split(k, "|") 376 if len(keyPart) != 4 { 377 return 378 } 379 if keyPart[0] != "deleted" { 380 return 381 } 382 target, ok := blob.Parse(keyPart[1]) 383 if !ok { 384 return 385 } 386 claimRef, ok := blob.Parse(keyPart[3]) 387 if !ok { 388 return 389 } 390 date, err := time.Parse(time.RFC3339, unreverseTimeString(keyPart[2])) 391 if err != nil { 392 return 393 } 394 return camtypes.Claim{ 395 BlobRef: claimRef, 396 Target: target, 397 Date: date, 398 Type: string(schema.DeleteClaim), 399 }, true 400 } 401 402 // IsDeleted reports whether the provided blobref (of a permanode or 403 // claim) should be considered deleted. 404 func (x *Index) IsDeleted(br blob.Ref) bool { 405 if x.deletes == nil { 406 // We still allow the slow path, in case someone creates 407 // their own Index without a deletes cache. 408 return x.isDeletedNoCache(br) 409 } 410 x.deletes.RLock() 411 defer x.deletes.RUnlock() 412 return x.isDeleted(br) 413 } 414 415 // The caller must hold x.deletes.mu for read. 416 func (x *Index) isDeleted(br blob.Ref) bool { 417 deletes, ok := x.deletes.m[br] 418 if !ok { 419 return false 420 } 421 for _, v := range deletes { 422 if !x.isDeleted(v.deleter) { 423 return true 424 } 425 } 426 return false 427 } 428 429 // Used when the Index has no deletes cache (x.deletes is nil). 430 func (x *Index) isDeletedNoCache(br blob.Ref) bool { 431 var err error 432 it := x.queryPrefix(keyDeleted, br) 433 for it.Next() { 434 cl, ok := kvDeleted(it.Key()) 435 if !ok { 436 panic(fmt.Sprintf("Bogus keyDeleted entry key: want |\"deleted\"|<deleted blobref>|<reverse claimdate>|<deleter claim>|, got %q", it.Key())) 437 } 438 if !x.isDeletedNoCache(cl.BlobRef) { 439 closeIterator(it, &err) 440 if err != nil { 441 // TODO: Do better? 442 panic(fmt.Sprintf("Could not close iterator on keyDeleted: %v", err)) 443 } 444 return true 445 } 446 } 447 closeIterator(it, &err) 448 if err != nil { 449 // TODO: Do better? 450 panic(fmt.Sprintf("Could not close iterator on keyDeleted: %v", err)) 451 } 452 return false 453 } 454 455 // GetRecentPermanodes sends results to dest filtered by owner, limit, and 456 // before. A zero value for before will default to the current time. The 457 // results will have duplicates supressed, with most recent permanode 458 // returned. 459 // Note, permanodes more recent than before will still be fetched from the 460 // index then skipped. This means runtime scales linearly with the number of 461 // nodes more recent than before. 462 func (x *Index) GetRecentPermanodes(dest chan<- camtypes.RecentPermanode, owner blob.Ref, limit int, before time.Time) (err error) { 463 defer close(dest) 464 465 keyId, err := x.KeyId(owner) 466 if err == sorted.ErrNotFound { 467 log.Printf("No recent permanodes because keyId for owner %v not found", owner) 468 return nil 469 } 470 if err != nil { 471 log.Printf("Error fetching keyId for owner %v: %v", owner, err) 472 return err 473 } 474 475 sent := 0 476 var seenPermanode dupSkipper 477 478 if before.IsZero() { 479 before = time.Now() 480 } 481 // TODO(bradfitz): handle before efficiently. don't use queryPrefix. 482 it := x.queryPrefix(keyRecentPermanode, keyId) 483 defer closeIterator(it, &err) 484 for it.Next() { 485 permaStr := it.Value() 486 parts := strings.SplitN(it.Key(), "|", 4) 487 if len(parts) != 4 { 488 continue 489 } 490 mTime, _ := time.Parse(time.RFC3339, unreverseTimeString(parts[2])) 491 permaRef, ok := blob.Parse(permaStr) 492 if !ok { 493 continue 494 } 495 if x.IsDeleted(permaRef) { 496 continue 497 } 498 if seenPermanode.Dup(permaStr) { 499 continue 500 } 501 // Skip entries with an mTime less than or equal to before. 502 if !mTime.Before(before) { 503 continue 504 } 505 dest <- camtypes.RecentPermanode{ 506 Permanode: permaRef, 507 Signer: owner, // TODO(bradfitz): kinda. usually. for now. 508 LastModTime: mTime, 509 } 510 sent++ 511 if sent == limit { 512 break 513 } 514 } 515 return nil 516 } 517 518 func (x *Index) AppendClaims(dst []camtypes.Claim, permaNode blob.Ref, 519 signerFilter blob.Ref, 520 attrFilter string) ([]camtypes.Claim, error) { 521 if x.corpus != nil { 522 return x.corpus.AppendClaims(dst, permaNode, signerFilter, attrFilter) 523 } 524 var ( 525 keyId string 526 err error 527 it sorted.Iterator 528 ) 529 if signerFilter.Valid() { 530 keyId, err = x.KeyId(signerFilter) 531 if err == sorted.ErrNotFound { 532 return nil, nil 533 } 534 if err != nil { 535 return nil, err 536 } 537 it = x.queryPrefix(keyPermanodeClaim, permaNode, keyId) 538 } else { 539 it = x.queryPrefix(keyPermanodeClaim, permaNode) 540 } 541 defer closeIterator(it, &err) 542 543 // In the common case, an attribute filter is just a plain 544 // token ("camliContent") unescaped. If so, fast path that 545 // check to skip the row before we even split it. 546 var mustHave string 547 if attrFilter != "" && urle(attrFilter) == attrFilter { 548 mustHave = attrFilter 549 } 550 551 for it.Next() { 552 val := it.Value() 553 if mustHave != "" && !strings.Contains(val, mustHave) { 554 continue 555 } 556 cl, ok := kvClaim(it.Key(), val, blob.Parse) 557 if !ok { 558 continue 559 } 560 if x.IsDeleted(cl.BlobRef) { 561 continue 562 } 563 if attrFilter != "" && cl.Attr != attrFilter { 564 continue 565 } 566 if signerFilter.Valid() && cl.Signer != signerFilter { 567 continue 568 } 569 dst = append(dst, cl) 570 } 571 return dst, nil 572 } 573 574 func kvClaim(k, v string, blobParse func(string) (blob.Ref, bool)) (c camtypes.Claim, ok bool) { 575 const nKeyPart = 5 576 const nValPart = 4 577 var keya [nKeyPart]string 578 var vala [nValPart]string 579 keyPart := strutil.AppendSplitN(keya[:0], k, "|", -1) 580 valPart := strutil.AppendSplitN(vala[:0], v, "|", -1) 581 if len(keyPart) < nKeyPart || len(valPart) < nValPart { 582 return 583 } 584 signerRef, ok := blobParse(valPart[3]) 585 if !ok { 586 return 587 } 588 permaNode, ok := blobParse(keyPart[1]) 589 if !ok { 590 return 591 } 592 claimRef, ok := blobParse(keyPart[4]) 593 if !ok { 594 return 595 } 596 date, err := time.Parse(time.RFC3339, keyPart[3]) 597 if err != nil { 598 return 599 } 600 return camtypes.Claim{ 601 BlobRef: claimRef, 602 Signer: signerRef, 603 Permanode: permaNode, 604 Date: date, 605 Type: urld(valPart[0]), 606 Attr: urld(valPart[1]), 607 Value: urld(valPart[2]), 608 }, true 609 } 610 611 func (x *Index) GetBlobMeta(br blob.Ref) (camtypes.BlobMeta, error) { 612 if x.corpus != nil { 613 return x.corpus.GetBlobMeta(br) 614 } 615 key := "meta:" + br.String() 616 meta, err := x.s.Get(key) 617 if err == sorted.ErrNotFound { 618 err = os.ErrNotExist 619 } 620 if err != nil { 621 return camtypes.BlobMeta{}, err 622 } 623 pos := strings.Index(meta, "|") 624 if pos < 0 { 625 panic(fmt.Sprintf("Bogus index row for key %q: got value %q", key, meta)) 626 } 627 size, err := strconv.ParseUint(meta[:pos], 10, 32) 628 if err != nil { 629 return camtypes.BlobMeta{}, err 630 } 631 mime := meta[pos+1:] 632 return camtypes.BlobMeta{ 633 Ref: br, 634 Size: uint32(size), 635 CamliType: camliTypeFromMIME(mime), 636 }, nil 637 } 638 639 func (x *Index) KeyId(signer blob.Ref) (string, error) { 640 if x.corpus != nil { 641 return x.corpus.KeyId(signer) 642 } 643 return x.s.Get("signerkeyid:" + signer.String()) 644 } 645 646 func (x *Index) PermanodeOfSignerAttrValue(signer blob.Ref, attr, val string) (permaNode blob.Ref, err error) { 647 keyId, err := x.KeyId(signer) 648 if err == sorted.ErrNotFound { 649 return blob.Ref{}, os.ErrNotExist 650 } 651 if err != nil { 652 return blob.Ref{}, err 653 } 654 it := x.queryPrefix(keySignerAttrValue, keyId, attr, val) 655 defer closeIterator(it, &err) 656 for it.Next() { 657 permaRef, ok := blob.Parse(it.Value()) 658 if ok && !x.IsDeleted(permaRef) { 659 return permaRef, nil 660 } 661 } 662 return blob.Ref{}, os.ErrNotExist 663 } 664 665 // This is just like PermanodeOfSignerAttrValue except we return multiple and dup-suppress. 666 // If request.Query is "", it is not used in the prefix search. 667 func (x *Index) SearchPermanodesWithAttr(dest chan<- blob.Ref, request *camtypes.PermanodeByAttrRequest) (err error) { 668 defer close(dest) 669 if request.FuzzyMatch { 670 // TODO(bradfitz): remove this for now? figure out how to handle it generically? 671 return errors.New("TODO: SearchPermanodesWithAttr: generic indexer doesn't support FuzzyMatch on PermanodeByAttrRequest") 672 } 673 if request.Attribute == "" { 674 return errors.New("index: missing Attribute in SearchPermanodesWithAttr") 675 } 676 677 keyId, err := x.KeyId(request.Signer) 678 if err == sorted.ErrNotFound { 679 return nil 680 } 681 if err != nil { 682 return err 683 } 684 seen := make(map[string]bool) 685 var it sorted.Iterator 686 if request.Query == "" { 687 it = x.queryPrefix(keySignerAttrValue, keyId, request.Attribute) 688 } else { 689 it = x.queryPrefix(keySignerAttrValue, keyId, request.Attribute, request.Query) 690 } 691 defer closeIterator(it, &err) 692 for it.Next() { 693 cl, ok := kvSignerAttrValue(it.Key(), it.Value()) 694 if !ok { 695 continue 696 } 697 if x.IsDeleted(cl.BlobRef) { 698 continue 699 } 700 if x.IsDeleted(cl.Permanode) { 701 continue 702 } 703 pnstr := cl.Permanode.String() 704 if seen[pnstr] { 705 continue 706 } 707 seen[pnstr] = true 708 709 dest <- cl.Permanode 710 if len(seen) == request.MaxResults { 711 break 712 } 713 } 714 return nil 715 } 716 717 func kvSignerAttrValue(k, v string) (c camtypes.Claim, ok bool) { 718 // TODO(bradfitz): garbage 719 keyPart := strings.Split(k, "|") 720 valPart := strings.Split(v, "|") 721 if len(keyPart) != 6 || len(valPart) != 1 { 722 // TODO(mpl): use glog 723 log.Printf("bogus keySignerAttrValue index entry: %q = %q", k, v) 724 return 725 } 726 if keyPart[0] != "signerattrvalue" { 727 return 728 } 729 date, err := time.Parse(time.RFC3339, unreverseTimeString(keyPart[4])) 730 if err != nil { 731 log.Printf("bogus time in keySignerAttrValue index entry: %q", keyPart[4]) 732 return 733 } 734 claimRef, ok := blob.Parse(keyPart[5]) 735 if !ok { 736 log.Printf("bogus claim in keySignerAttrValue index entry: %q", keyPart[5]) 737 return 738 } 739 permaNode, ok := blob.Parse(valPart[0]) 740 if !ok { 741 log.Printf("bogus permanode in keySignerAttrValue index entry: %q", valPart[0]) 742 return 743 } 744 return camtypes.Claim{ 745 BlobRef: claimRef, 746 Permanode: permaNode, 747 Date: date, 748 Attr: urld(keyPart[2]), 749 Value: urld(keyPart[3]), 750 }, true 751 } 752 753 func (x *Index) PathsOfSignerTarget(signer, target blob.Ref) (paths []*camtypes.Path, err error) { 754 paths = []*camtypes.Path{} 755 keyId, err := x.KeyId(signer) 756 if err != nil { 757 if err == sorted.ErrNotFound { 758 err = nil 759 } 760 return 761 } 762 763 mostRecent := make(map[string]*camtypes.Path) 764 maxClaimDates := make(map[string]time.Time) 765 766 it := x.queryPrefix(keyPathBackward, keyId, target) 767 defer closeIterator(it, &err) 768 for it.Next() { 769 p, ok, active := kvPathBackward(it.Key(), it.Value()) 770 if !ok { 771 continue 772 } 773 if x.IsDeleted(p.Claim) { 774 continue 775 } 776 if x.IsDeleted(p.Base) { 777 continue 778 } 779 780 key := p.Base.String() + "/" + p.Suffix 781 if p.ClaimDate.After(maxClaimDates[key]) { 782 maxClaimDates[key] = p.ClaimDate 783 if active { 784 mostRecent[key] = &p 785 } else { 786 delete(mostRecent, key) 787 } 788 } 789 } 790 for _, v := range mostRecent { 791 paths = append(paths, v) 792 } 793 return paths, nil 794 } 795 796 func kvPathBackward(k, v string) (p camtypes.Path, ok bool, active bool) { 797 // TODO(bradfitz): garbage 798 keyPart := strings.Split(k, "|") 799 valPart := strings.Split(v, "|") 800 if len(keyPart) != 4 || len(valPart) != 4 { 801 // TODO(mpl): use glog 802 log.Printf("bogus keyPathBackward index entry: %q = %q", k, v) 803 return 804 } 805 if keyPart[0] != "signertargetpath" { 806 return 807 } 808 target, ok := blob.Parse(keyPart[2]) 809 if !ok { 810 log.Printf("bogus target in keyPathBackward index entry: %q", keyPart[2]) 811 return 812 } 813 claim, ok := blob.Parse(keyPart[3]) 814 if !ok { 815 log.Printf("bogus claim in keyPathBackward index entry: %q", keyPart[3]) 816 return 817 } 818 date, err := time.Parse(time.RFC3339, valPart[0]) 819 if err != nil { 820 log.Printf("bogus date in keyPathBackward index entry: %q", valPart[0]) 821 return 822 } 823 base, ok := blob.Parse(valPart[1]) 824 if !ok { 825 log.Printf("bogus base in keyPathBackward index entry: %q", valPart[1]) 826 return 827 } 828 if valPart[2] == "Y" { 829 active = true 830 } 831 return camtypes.Path{ 832 Claim: claim, 833 Base: base, 834 Target: target, 835 ClaimDate: date, 836 Suffix: urld(valPart[3]), 837 }, true, active 838 } 839 840 func (x *Index) PathsLookup(signer, base blob.Ref, suffix string) (paths []*camtypes.Path, err error) { 841 paths = []*camtypes.Path{} 842 keyId, err := x.KeyId(signer) 843 if err != nil { 844 if err == sorted.ErrNotFound { 845 err = nil 846 } 847 return 848 } 849 850 it := x.queryPrefix(keyPathForward, keyId, base, suffix) 851 defer closeIterator(it, &err) 852 for it.Next() { 853 p, ok, active := kvPathForward(it.Key(), it.Value()) 854 if !ok { 855 continue 856 } 857 if x.IsDeleted(p.Claim) { 858 continue 859 } 860 if x.IsDeleted(p.Target) { 861 continue 862 } 863 864 // TODO(bradfitz): investigate what's up with deleted 865 // forward path claims here. Needs docs with the 866 // interface too, and tests. 867 _ = active 868 869 paths = append(paths, &p) 870 } 871 return 872 } 873 874 func kvPathForward(k, v string) (p camtypes.Path, ok bool, active bool) { 875 // TODO(bradfitz): garbage 876 keyPart := strings.Split(k, "|") 877 valPart := strings.Split(v, "|") 878 if len(keyPart) != 6 || len(valPart) != 2 { 879 // TODO(mpl): use glog 880 log.Printf("bogus keyPathForward index entry: %q = %q", k, v) 881 return 882 } 883 if keyPart[0] != "path" { 884 return 885 } 886 base, ok := blob.Parse(keyPart[2]) 887 if !ok { 888 log.Printf("bogus base in keyPathForward index entry: %q", keyPart[2]) 889 return 890 } 891 date, err := time.Parse(time.RFC3339, unreverseTimeString(keyPart[4])) 892 if err != nil { 893 log.Printf("bogus date in keyPathForward index entry: %q", keyPart[4]) 894 return 895 } 896 claim, ok := blob.Parse(keyPart[5]) 897 if !ok { 898 log.Printf("bogus claim in keyPathForward index entry: %q", keyPart[5]) 899 return 900 } 901 if valPart[0] == "Y" { 902 active = true 903 } 904 target, ok := blob.Parse(valPart[1]) 905 if !ok { 906 log.Printf("bogus target in keyPathForward index entry: %q", valPart[1]) 907 return 908 } 909 return camtypes.Path{ 910 Claim: claim, 911 Base: base, 912 Target: target, 913 ClaimDate: date, 914 Suffix: urld(keyPart[3]), 915 }, true, active 916 } 917 918 func (x *Index) PathLookup(signer, base blob.Ref, suffix string, at time.Time) (*camtypes.Path, error) { 919 paths, err := x.PathsLookup(signer, base, suffix) 920 if err != nil { 921 return nil, err 922 } 923 var ( 924 newest = int64(0) 925 atSeconds = int64(0) 926 best *camtypes.Path 927 ) 928 929 if !at.IsZero() { 930 atSeconds = at.Unix() 931 } 932 933 for _, path := range paths { 934 t := path.ClaimDate 935 secs := t.Unix() 936 if atSeconds != 0 && secs > atSeconds { 937 // Too new 938 continue 939 } 940 if newest > secs { 941 // Too old 942 continue 943 } 944 // Just right 945 newest, best = secs, path 946 } 947 if best == nil { 948 return nil, os.ErrNotExist 949 } 950 return best, nil 951 } 952 953 func (x *Index) ExistingFileSchemas(wholeRef blob.Ref) (schemaRefs []blob.Ref, err error) { 954 it := x.queryPrefix(keyWholeToFileRef, wholeRef) 955 defer closeIterator(it, &err) 956 for it.Next() { 957 keyPart := strings.Split(it.Key(), "|")[1:] 958 if len(keyPart) < 2 { 959 continue 960 } 961 ref, ok := blob.Parse(keyPart[1]) 962 if ok { 963 schemaRefs = append(schemaRefs, ref) 964 } 965 } 966 return schemaRefs, nil 967 } 968 969 func (x *Index) loadKey(key string, val *string, err *error, wg *sync.WaitGroup) { 970 defer wg.Done() 971 *val, *err = x.s.Get(key) 972 } 973 974 func (x *Index) GetFileInfo(fileRef blob.Ref) (camtypes.FileInfo, error) { 975 if x.corpus != nil { 976 return x.corpus.GetFileInfo(fileRef) 977 } 978 ikey := "fileinfo|" + fileRef.String() 979 tkey := "filetimes|" + fileRef.String() 980 wg := new(sync.WaitGroup) 981 wg.Add(2) 982 var iv, tv string // info value, time value 983 var ierr, terr error 984 go x.loadKey(ikey, &iv, &ierr, wg) 985 go x.loadKey(tkey, &tv, &terr, wg) 986 wg.Wait() 987 988 if ierr == sorted.ErrNotFound { 989 return camtypes.FileInfo{}, os.ErrNotExist 990 } 991 if ierr != nil { 992 return camtypes.FileInfo{}, ierr 993 } 994 valPart := strings.Split(iv, "|") 995 if len(valPart) < 3 { 996 log.Printf("index: bogus key %q = %q", ikey, iv) 997 return camtypes.FileInfo{}, os.ErrNotExist 998 } 999 size, err := strconv.ParseInt(valPart[0], 10, 64) 1000 if err != nil { 1001 log.Printf("index: bogus integer at position 0 in key %q = %q", ikey, iv) 1002 return camtypes.FileInfo{}, os.ErrNotExist 1003 } 1004 fileName := urld(valPart[1]) 1005 fi := camtypes.FileInfo{ 1006 Size: size, 1007 FileName: fileName, 1008 MIMEType: urld(valPart[2]), 1009 } 1010 1011 if tv != "" { 1012 times := strings.Split(urld(tv), ",") 1013 updateFileInfoTimes(&fi, times) 1014 } 1015 1016 return fi, nil 1017 } 1018 1019 func updateFileInfoTimes(fi *camtypes.FileInfo, times []string) { 1020 if len(times) == 0 { 1021 return 1022 } 1023 fi.Time = types.ParseTime3339OrNil(times[0]) 1024 if len(times) == 2 { 1025 fi.ModTime = types.ParseTime3339OrNil(times[1]) 1026 } 1027 } 1028 1029 // v is "width|height" 1030 func kvImageInfo(v []byte) (ii camtypes.ImageInfo, ok bool) { 1031 pipei := bytes.IndexByte(v, '|') 1032 if pipei < 0 { 1033 return 1034 } 1035 w, err := strutil.ParseUintBytes(v[:pipei], 10, 16) 1036 if err != nil { 1037 return 1038 } 1039 h, err := strutil.ParseUintBytes(v[pipei+1:], 10, 16) 1040 if err != nil { 1041 return 1042 } 1043 ii.Width = uint16(w) 1044 ii.Height = uint16(h) 1045 return ii, true 1046 } 1047 1048 func (x *Index) GetImageInfo(fileRef blob.Ref) (camtypes.ImageInfo, error) { 1049 if x.corpus != nil { 1050 return x.corpus.GetImageInfo(fileRef) 1051 } 1052 // it might be that the key does not exist because image.DecodeConfig failed earlier 1053 // (because of unsupported JPEG features like progressive mode). 1054 key := keyImageSize.Key(fileRef.String()) 1055 v, err := x.s.Get(key) 1056 if err == sorted.ErrNotFound { 1057 err = os.ErrNotExist 1058 } 1059 if err != nil { 1060 return camtypes.ImageInfo{}, err 1061 } 1062 ii, ok := kvImageInfo([]byte(v)) 1063 if !ok { 1064 return camtypes.ImageInfo{}, fmt.Errorf("index: bogus key %q = %q", key, v) 1065 } 1066 return ii, nil 1067 } 1068 1069 func (x *Index) GetMediaTags(fileRef blob.Ref) (tags map[string]string, err error) { 1070 if x.corpus != nil { 1071 return x.corpus.GetMediaTags(fileRef) 1072 } 1073 it := x.queryPrefix(keyMediaTag, fileRef.String()) 1074 defer closeIterator(it, &err) 1075 for it.Next() { 1076 tags[it.Key()] = it.Value() 1077 } 1078 return tags, nil 1079 } 1080 1081 func (x *Index) EdgesTo(ref blob.Ref, opts *camtypes.EdgesToOpts) (edges []*camtypes.Edge, err error) { 1082 it := x.queryPrefix(keyEdgeBackward, ref) 1083 defer closeIterator(it, &err) 1084 permanodeParents := make(map[string]*camtypes.Edge) 1085 for it.Next() { 1086 edge, ok := kvEdgeBackward(it.Key(), it.Value()) 1087 if !ok { 1088 continue 1089 } 1090 if x.IsDeleted(edge.From) { 1091 continue 1092 } 1093 if x.IsDeleted(edge.BlobRef) { 1094 continue 1095 } 1096 edge.To = ref 1097 if edge.FromType == "permanode" { 1098 permanodeParents[edge.From.String()] = edge 1099 } else { 1100 edges = append(edges, edge) 1101 } 1102 } 1103 for _, e := range permanodeParents { 1104 edges = append(edges, e) 1105 } 1106 return edges, nil 1107 } 1108 1109 func kvEdgeBackward(k, v string) (edge *camtypes.Edge, ok bool) { 1110 // TODO(bradfitz): garbage 1111 keyPart := strings.Split(k, "|") 1112 valPart := strings.Split(v, "|") 1113 if len(keyPart) != 4 || len(valPart) != 2 { 1114 // TODO(mpl): use glog 1115 log.Printf("bogus keyEdgeBackward index entry: %q = %q", k, v) 1116 return 1117 } 1118 if keyPart[0] != "edgeback" { 1119 return 1120 } 1121 parentRef, ok := blob.Parse(keyPart[2]) 1122 if !ok { 1123 log.Printf("bogus parent in keyEdgeBackward index entry: %q", keyPart[2]) 1124 return 1125 } 1126 blobRef, ok := blob.Parse(keyPart[3]) 1127 if !ok { 1128 log.Printf("bogus blobref in keyEdgeBackward index entry: %q", keyPart[3]) 1129 return 1130 } 1131 return &camtypes.Edge{ 1132 From: parentRef, 1133 FromType: valPart[0], 1134 FromTitle: valPart[1], 1135 BlobRef: blobRef, 1136 }, true 1137 } 1138 1139 // GetDirMembers sends on dest the children of the static directory dir. 1140 func (x *Index) GetDirMembers(dir blob.Ref, dest chan<- blob.Ref, limit int) (err error) { 1141 defer close(dest) 1142 1143 sent := 0 1144 it := x.queryPrefix(keyStaticDirChild, dir.String()) 1145 defer closeIterator(it, &err) 1146 for it.Next() { 1147 keyPart := strings.Split(it.Key(), "|") 1148 if len(keyPart) != 3 { 1149 return fmt.Errorf("index: bogus key keyStaticDirChild = %q", it.Key()) 1150 } 1151 1152 child, ok := blob.Parse(keyPart[2]) 1153 if !ok { 1154 continue 1155 } 1156 dest <- child 1157 sent++ 1158 if sent == limit { 1159 break 1160 } 1161 } 1162 return nil 1163 } 1164 1165 func kvBlobMeta(k, v string) (bm camtypes.BlobMeta, ok bool) { 1166 refStr := k[len("meta:"):] 1167 br, ok := blob.Parse(refStr) 1168 if !ok { 1169 return 1170 } 1171 pipe := strings.Index(v, "|") 1172 if pipe < 0 { 1173 return 1174 } 1175 size, err := strconv.ParseUint(v[:pipe], 10, 32) 1176 if err != nil { 1177 return 1178 } 1179 return camtypes.BlobMeta{ 1180 Ref: br, 1181 Size: uint32(size), 1182 CamliType: camliTypeFromMIME(v[pipe+1:]), 1183 }, true 1184 } 1185 1186 func kvBlobMeta_bytes(k, v []byte) (bm camtypes.BlobMeta, ok bool) { 1187 ref := k[len("meta:"):] 1188 br, ok := blob.ParseBytes(ref) 1189 if !ok { 1190 return 1191 } 1192 pipe := bytes.IndexByte(v, '|') 1193 if pipe < 0 { 1194 return 1195 } 1196 size, err := strutil.ParseUintBytes(v[:pipe], 10, 32) 1197 if err != nil { 1198 return 1199 } 1200 return camtypes.BlobMeta{ 1201 Ref: br, 1202 Size: uint32(size), 1203 CamliType: camliTypeFromMIME_bytes(v[pipe+1:]), 1204 }, true 1205 } 1206 1207 func enumerateBlobMeta(s sorted.KeyValue, cb func(camtypes.BlobMeta) error) (err error) { 1208 it := queryPrefixString(s, "meta:") 1209 defer closeIterator(it, &err) 1210 for it.Next() { 1211 bm, ok := kvBlobMeta(it.Key(), it.Value()) 1212 if !ok { 1213 continue 1214 } 1215 if err := cb(bm); err != nil { 1216 return err 1217 } 1218 } 1219 return nil 1220 } 1221 1222 func enumerateSignerKeyId(s sorted.KeyValue, cb func(blob.Ref, string)) (err error) { 1223 const pfx = "signerkeyid:" 1224 it := queryPrefixString(s, pfx) 1225 defer closeIterator(it, &err) 1226 for it.Next() { 1227 if br, ok := blob.Parse(strings.TrimPrefix(it.Key(), pfx)); ok { 1228 cb(br, it.Value()) 1229 } 1230 } 1231 return 1232 } 1233 1234 // EnumerateBlobMeta sends all metadata about all known blobs to ch and then closes ch. 1235 func (x *Index) EnumerateBlobMeta(ctx *context.Context, ch chan<- camtypes.BlobMeta) (err error) { 1236 if x.corpus != nil { 1237 x.corpus.RLock() 1238 defer x.corpus.RUnlock() 1239 return x.corpus.EnumerateBlobMetaLocked(ctx, ch) 1240 } 1241 defer close(ch) 1242 return enumerateBlobMeta(x.s, func(bm camtypes.BlobMeta) error { 1243 select { 1244 case ch <- bm: 1245 case <-ctx.Done(): 1246 return context.ErrCanceled 1247 } 1248 return nil 1249 }) 1250 } 1251 1252 // Storage returns the index's underlying Storage implementation. 1253 func (x *Index) Storage() sorted.KeyValue { return x.s } 1254 1255 // Close closes the underlying sorted.KeyValue, if the storage has a Close method. 1256 // The return value is the return value of the underlying Close, or 1257 // nil otherwise. 1258 func (x *Index) Close() error { 1259 if cl, ok := x.s.(io.Closer); ok { 1260 return cl.Close() 1261 } 1262 close(x.tickleOoo) 1263 return nil 1264 } 1265 1266 // initNeededMaps initializes x.needs and x.neededBy on start-up. 1267 func (x *Index) initNeededMaps() (err error) { 1268 x.deletes = newDeletionCache() 1269 it := x.queryPrefix(keyMissing) 1270 defer closeIterator(it, &err) 1271 for it.Next() { 1272 key := it.KeyBytes() 1273 pair := key[len("missing|"):] 1274 pipe := bytes.IndexByte(pair, '|') 1275 if pipe < 0 { 1276 return fmt.Errorf("Bogus missing key %q", key) 1277 } 1278 have, ok1 := blob.ParseBytes(pair[:pipe]) 1279 missing, ok2 := blob.ParseBytes(pair[pipe+1:]) 1280 if !ok1 || !ok2 { 1281 return fmt.Errorf("Bogus missing key %q", key) 1282 } 1283 x.noteNeededMemory(have, missing) 1284 } 1285 return 1286 } 1287 1288 func (x *Index) noteNeeded(have, missing blob.Ref) error { 1289 if err := x.s.Set(keyMissing.Key(have, missing), "1"); err != nil { 1290 return err 1291 } 1292 x.noteNeededMemory(have, missing) 1293 return nil 1294 } 1295 1296 func (x *Index) noteNeededMemory(have, missing blob.Ref) { 1297 x.mu.Lock() 1298 x.needs[have] = append(x.needs[have], missing) 1299 x.neededBy[missing] = append(x.neededBy[missing], have) 1300 x.mu.Unlock() 1301 } 1302 1303 const camliTypeMIMEPrefix = "application/json; camliType=" 1304 1305 var camliTypeMIMEPrefixBytes = []byte(camliTypeMIMEPrefix) 1306 1307 // "application/json; camliType=file" => "file" 1308 // "image/gif" => "" 1309 func camliTypeFromMIME(mime string) string { 1310 if v := strings.TrimPrefix(mime, camliTypeMIMEPrefix); v != mime { 1311 return v 1312 } 1313 return "" 1314 } 1315 1316 func camliTypeFromMIME_bytes(mime []byte) string { 1317 if v := bytes.TrimPrefix(mime, camliTypeMIMEPrefixBytes); len(v) != len(mime) { 1318 return strutil.StringFromBytes(v) 1319 } 1320 return "" 1321 } 1322 1323 // TODO(bradfitz): rename this? This is really about signer-attr-value 1324 // (PermanodeOfSignerAttrValue), and not about indexed attributes in general. 1325 func IsIndexedAttribute(attr string) bool { 1326 switch attr { 1327 case "camliRoot", "camliImportRoot", "tag", "title": 1328 return true 1329 } 1330 return false 1331 } 1332 1333 // IsBlobReferenceAttribute returns whether attr is an attribute whose 1334 // value is a blob reference (e.g. camliMember) and thus something the 1335 // indexers should keep inverted indexes on for parent/child-type 1336 // relationships. 1337 func IsBlobReferenceAttribute(attr string) bool { 1338 switch attr { 1339 case "camliMember": 1340 return true 1341 } 1342 return false 1343 } 1344 1345 func IsFulltextAttribute(attr string) bool { 1346 switch attr { 1347 case "tag", "title": 1348 return true 1349 } 1350 return false 1351 }