github.com/timstclair/heapster@v0.20.0-alpha1/Godeps/_workspace/src/google.golang.org/appengine/search/search.go (about) 1 // Copyright 2012 Google Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 package search 6 7 // TODO: let Put specify the document language: "en", "fr", etc. Also: order_id?? storage?? 8 // TODO: Index.GetAll (or Iterator.GetAll)? 9 // TODO: struct <-> protobuf tests. 10 // TODO: enforce Python's MIN_NUMBER_VALUE and MIN_DATE (which would disallow a zero 11 // time.Time)? _MAXIMUM_STRING_LENGTH? 12 13 import ( 14 "errors" 15 "fmt" 16 "math" 17 "reflect" 18 "regexp" 19 "strconv" 20 "strings" 21 "time" 22 "unicode/utf8" 23 24 "github.com/golang/protobuf/proto" 25 "golang.org/x/net/context" 26 27 "google.golang.org/appengine" 28 "google.golang.org/appengine/internal" 29 pb "google.golang.org/appengine/internal/search" 30 ) 31 32 var ( 33 // ErrInvalidDocumentType is returned when methods like Put, Get or Next 34 // are passed a dst or src argument of invalid type. 35 ErrInvalidDocumentType = errors.New("search: invalid document type") 36 37 // ErrNoSuchDocument is returned when no document was found for a given ID. 38 ErrNoSuchDocument = errors.New("search: no such document") 39 ) 40 41 // Atom is a document field whose contents are indexed as a single indivisible 42 // string. 43 type Atom string 44 45 // HTML is a document field whose contents are indexed as HTML. Only text nodes 46 // are indexed: "foo<b>bar" will be treated as "foobar". 47 type HTML string 48 49 // validIndexNameOrDocID is the Go equivalent of Python's 50 // _ValidateVisiblePrintableAsciiNotReserved. 51 func validIndexNameOrDocID(s string) bool { 52 if strings.HasPrefix(s, "!") { 53 return false 54 } 55 for _, c := range s { 56 if c < 0x21 || 0x7f <= c { 57 return false 58 } 59 } 60 return true 61 } 62 63 var ( 64 fieldNameRE = regexp.MustCompile(`^[A-Za-z][A-Za-z0-9_]*$`) 65 languageRE = regexp.MustCompile(`^[a-z]{2}$`) 66 ) 67 68 // validFieldName is the Go equivalent of Python's _CheckFieldName. It checks 69 // the validity of both field and facet names. 70 func validFieldName(s string) bool { 71 return len(s) <= 500 && fieldNameRE.MatchString(s) 72 } 73 74 // validDocRank checks that the ranks is in the range [0, 2^31). 75 func validDocRank(r int) bool { 76 return 0 <= r && r <= (1<<31-1) 77 } 78 79 // validLanguage checks that a language looks like ISO 639-1. 80 func validLanguage(s string) bool { 81 return languageRE.MatchString(s) 82 } 83 84 // validFloat checks that f is in the range [-2147483647, 2147483647]. 85 func validFloat(f float64) bool { 86 return -(1<<31-1) <= f && f <= (1<<31-1) 87 } 88 89 // Index is an index of documents. 90 type Index struct { 91 spec pb.IndexSpec 92 } 93 94 // orderIDEpoch forms the basis for populating OrderId on documents. 95 var orderIDEpoch = time.Date(2011, 1, 1, 0, 0, 0, 0, time.UTC) 96 97 // Open opens the index with the given name. The index is created if it does 98 // not already exist. 99 // 100 // The name is a human-readable ASCII string. It must contain no whitespace 101 // characters and not start with "!". 102 func Open(name string) (*Index, error) { 103 if !validIndexNameOrDocID(name) { 104 return nil, fmt.Errorf("search: invalid index name %q", name) 105 } 106 return &Index{ 107 spec: pb.IndexSpec{ 108 Name: &name, 109 }, 110 }, nil 111 } 112 113 // Put saves src to the index. If id is empty, a new ID is allocated by the 114 // service and returned. If id is not empty, any existing index entry for that 115 // ID is replaced. 116 // 117 // The ID is a human-readable ASCII string. It must contain no whitespace 118 // characters and not start with "!". 119 // 120 // src must be a non-nil struct pointer or implement the FieldLoadSaver 121 // interface. 122 func (x *Index) Put(c context.Context, id string, src interface{}) (string, error) { 123 d, err := saveDoc(src) 124 if err != nil { 125 return "", err 126 } 127 if id != "" { 128 if !validIndexNameOrDocID(id) { 129 return "", fmt.Errorf("search: invalid ID %q", id) 130 } 131 d.Id = proto.String(id) 132 } 133 req := &pb.IndexDocumentRequest{ 134 Params: &pb.IndexDocumentParams{ 135 Document: []*pb.Document{d}, 136 IndexSpec: &x.spec, 137 }, 138 } 139 res := &pb.IndexDocumentResponse{} 140 if err := internal.Call(c, "search", "IndexDocument", req, res); err != nil { 141 return "", err 142 } 143 if len(res.Status) > 0 { 144 if s := res.Status[0]; s.GetCode() != pb.SearchServiceError_OK { 145 return "", fmt.Errorf("search: %s: %s", s.GetCode(), s.GetErrorDetail()) 146 } 147 } 148 if len(res.Status) != 1 || len(res.DocId) != 1 { 149 return "", fmt.Errorf("search: internal error: wrong number of results (%d Statuses, %d DocIDs)", 150 len(res.Status), len(res.DocId)) 151 } 152 return res.DocId[0], nil 153 } 154 155 // Get loads the document with the given ID into dst. 156 // 157 // The ID is a human-readable ASCII string. It must be non-empty, contain no 158 // whitespace characters and not start with "!". 159 // 160 // dst must be a non-nil struct pointer or implement the FieldLoadSaver 161 // interface. 162 // 163 // ErrFieldMismatch is returned when a field is to be loaded into a different 164 // type than the one it was stored from, or when a field is missing or 165 // unexported in the destination struct. ErrFieldMismatch is only returned if 166 // dst is a struct pointer. It is up to the callee to decide whether this error 167 // is fatal, recoverable, or ignorable. 168 func (x *Index) Get(c context.Context, id string, dst interface{}) error { 169 if id == "" || !validIndexNameOrDocID(id) { 170 return fmt.Errorf("search: invalid ID %q", id) 171 } 172 req := &pb.ListDocumentsRequest{ 173 Params: &pb.ListDocumentsParams{ 174 IndexSpec: &x.spec, 175 StartDocId: proto.String(id), 176 Limit: proto.Int32(1), 177 }, 178 } 179 res := &pb.ListDocumentsResponse{} 180 if err := internal.Call(c, "search", "ListDocuments", req, res); err != nil { 181 return err 182 } 183 if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK { 184 return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail()) 185 } 186 if len(res.Document) != 1 || res.Document[0].GetId() != id { 187 return ErrNoSuchDocument 188 } 189 return loadDoc(dst, res.Document[0], nil) 190 } 191 192 // Delete deletes a document from the index. 193 func (x *Index) Delete(c context.Context, id string) error { 194 req := &pb.DeleteDocumentRequest{ 195 Params: &pb.DeleteDocumentParams{ 196 DocId: []string{id}, 197 IndexSpec: &x.spec, 198 }, 199 } 200 res := &pb.DeleteDocumentResponse{} 201 if err := internal.Call(c, "search", "DeleteDocument", req, res); err != nil { 202 return err 203 } 204 if len(res.Status) != 1 { 205 return fmt.Errorf("search: internal error: wrong number of results (%d)", len(res.Status)) 206 } 207 if s := res.Status[0]; s.GetCode() != pb.SearchServiceError_OK { 208 return fmt.Errorf("search: %s: %s", s.GetCode(), s.GetErrorDetail()) 209 } 210 return nil 211 } 212 213 // List lists all of the documents in an index. The documents are returned in 214 // increasing ID order. 215 func (x *Index) List(c context.Context, opts *ListOptions) *Iterator { 216 t := &Iterator{ 217 c: c, 218 index: x, 219 count: -1, 220 listInclusive: true, 221 more: moreList, 222 } 223 if opts != nil { 224 t.listStartID = opts.StartID 225 t.limit = opts.Limit 226 t.idsOnly = opts.IDsOnly 227 } 228 return t 229 } 230 231 func moreList(t *Iterator) error { 232 req := &pb.ListDocumentsRequest{ 233 Params: &pb.ListDocumentsParams{ 234 IndexSpec: &t.index.spec, 235 }, 236 } 237 if t.listStartID != "" { 238 req.Params.StartDocId = &t.listStartID 239 req.Params.IncludeStartDoc = &t.listInclusive 240 } 241 if t.limit > 0 { 242 req.Params.Limit = proto.Int32(int32(t.limit)) 243 } 244 if t.idsOnly { 245 req.Params.KeysOnly = &t.idsOnly 246 } 247 248 res := &pb.ListDocumentsResponse{} 249 if err := internal.Call(t.c, "search", "ListDocuments", req, res); err != nil { 250 return err 251 } 252 if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK { 253 return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail()) 254 } 255 t.listRes = res.Document 256 t.listStartID, t.listInclusive, t.more = "", false, nil 257 if len(res.Document) != 0 && t.limit <= 0 { 258 if id := res.Document[len(res.Document)-1].GetId(); id != "" { 259 t.listStartID, t.more = id, moreList 260 } 261 } 262 return nil 263 } 264 265 // ListOptions are the options for listing documents in an index. Passing a nil 266 // *ListOptions is equivalent to using the default values. 267 type ListOptions struct { 268 // StartID is the inclusive lower bound for the ID of the returned 269 // documents. The zero value means all documents will be returned. 270 StartID string 271 272 // Limit is the maximum number of documents to return. The zero value 273 // indicates no limit. 274 Limit int 275 276 // IDsOnly indicates that only document IDs should be returned for the list 277 // operation; no document fields are populated. 278 IDsOnly bool 279 } 280 281 // Search searches the index for the given query. 282 func (x *Index) Search(c context.Context, query string, opts *SearchOptions) *Iterator { 283 t := &Iterator{ 284 c: c, 285 index: x, 286 searchQuery: query, 287 more: moreSearch, 288 } 289 if opts != nil { 290 if opts.Cursor != "" { 291 if opts.Offset != 0 { 292 return errIter("at most one of Cursor and Offset may be specified") 293 } 294 t.searchCursor = proto.String(string(opts.Cursor)) 295 } 296 t.limit = opts.Limit 297 t.fields = opts.Fields 298 t.idsOnly = opts.IDsOnly 299 t.sort = opts.Sort 300 t.exprs = opts.Expressions 301 t.refinements = opts.Refinements 302 t.facetOpts = opts.Facets 303 t.searchOffset = opts.Offset 304 } 305 return t 306 } 307 308 func moreSearch(t *Iterator) error { 309 // We use per-result (rather than single/per-page) cursors since this 310 // lets us return a Cursor for every iterator document. The two cursor 311 // types are largely interchangeable: a page cursor is the same as the 312 // last per-result cursor in a given search response. 313 req := &pb.SearchRequest{ 314 Params: &pb.SearchParams{ 315 IndexSpec: &t.index.spec, 316 Query: &t.searchQuery, 317 Cursor: t.searchCursor, 318 CursorType: pb.SearchParams_PER_RESULT.Enum(), 319 FieldSpec: &pb.FieldSpec{ 320 Name: t.fields, 321 }, 322 }, 323 } 324 if t.limit > 0 { 325 req.Params.Limit = proto.Int32(int32(t.limit)) 326 } 327 if t.searchOffset > 0 { 328 req.Params.Offset = proto.Int32(int32(t.searchOffset)) 329 t.searchOffset = 0 330 } 331 if t.idsOnly { 332 req.Params.KeysOnly = &t.idsOnly 333 } 334 if t.sort != nil { 335 if err := sortToProto(t.sort, req.Params); err != nil { 336 return err 337 } 338 } 339 if t.refinements != nil { 340 if err := refinementsToProto(t.refinements, req.Params); err != nil { 341 return err 342 } 343 } 344 for _, e := range t.exprs { 345 req.Params.FieldSpec.Expression = append(req.Params.FieldSpec.Expression, &pb.FieldSpec_Expression{ 346 Name: proto.String(e.Name), 347 Expression: proto.String(e.Expr), 348 }) 349 } 350 for _, f := range t.facetOpts { 351 if err := f.setParams(req.Params); err != nil { 352 return fmt.Errorf("bad FacetSearchOption: %v", err) 353 } 354 } 355 // Don't repeat facet search. 356 t.facetOpts = nil 357 358 res := &pb.SearchResponse{} 359 if err := internal.Call(t.c, "search", "Search", req, res); err != nil { 360 return err 361 } 362 if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK { 363 return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail()) 364 } 365 t.searchRes = res.Result 366 if len(res.FacetResult) > 0 { 367 t.facetRes = res.FacetResult 368 } 369 t.count = int(*res.MatchedCount) 370 if t.limit > 0 { 371 t.more = nil 372 } else { 373 t.more = moreSearch 374 } 375 return nil 376 } 377 378 // SearchOptions are the options for searching an index. Passing a nil 379 // *SearchOptions is equivalent to using the default values. 380 type SearchOptions struct { 381 // Limit is the maximum number of documents to return. The zero value 382 // indicates no limit. 383 Limit int 384 385 // IDsOnly indicates that only document IDs should be returned for the search 386 // operation; no document fields are populated. 387 IDsOnly bool 388 389 // Sort controls the ordering of search results. 390 Sort *SortOptions 391 392 // Fields specifies which document fields to include in the results. If omitted, 393 // all document fields are returned. No more than 100 fields may be specified. 394 Fields []string 395 396 // Expressions specifies additional computed fields to add to each returned 397 // document. 398 Expressions []FieldExpression 399 400 // Facets controls what facet information is returned for these search results. 401 // If no options are specified, no facet results will be returned. 402 Facets []FacetSearchOption 403 404 // Refinements filters the returned documents by requiring them to contain facets 405 // with specific values. Refinements are applied in conjunction for facets with 406 // different names, and in disjunction otherwise. 407 Refinements []Facet 408 409 // Cursor causes the results to commence with the first document after 410 // the document associated with the cursor. 411 Cursor Cursor 412 413 // Offset specifies the number of documents to skip over before returning results. 414 // When specified, Cursor must be nil. 415 Offset int 416 } 417 418 // Cursor represents an iterator's position. 419 // 420 // The string value of a cursor is web-safe. It can be saved and restored 421 // for later use. 422 type Cursor string 423 424 // FieldExpression defines a custom expression to evaluate for each result. 425 type FieldExpression struct { 426 // Name is the name to use for the computed field. 427 Name string 428 429 // Expr is evaluated to provide a custom content snippet for each document. 430 // See https://cloud.google.com/appengine/docs/go/search/options for 431 // the supported expression syntax. 432 Expr string 433 } 434 435 // FacetSearchOption controls what facet information is returned in search results. 436 type FacetSearchOption interface { 437 setParams(*pb.SearchParams) error 438 } 439 440 // AutoFacetDiscovery returns a FacetSearchOption which enables automatic facet 441 // discovery for the search. Automatic facet discovery looks for the facets 442 // which appear the most often in the aggregate in the matched documents. 443 // 444 // The maximum number of facets returned is controlled by facetLimit, and the 445 // maximum number of values per facet by facetLimit. A limit of zero indicates 446 // a default limit should be used. 447 func AutoFacetDiscovery(facetLimit, valueLimit int) FacetSearchOption { 448 return &autoFacetOpt{facetLimit, valueLimit} 449 } 450 451 type autoFacetOpt struct { 452 facetLimit, valueLimit int 453 } 454 455 const defaultAutoFacetLimit = 10 // As per python runtime search.py. 456 457 func (o *autoFacetOpt) setParams(params *pb.SearchParams) error { 458 lim := int32(o.facetLimit) 459 if lim == 0 { 460 lim = defaultAutoFacetLimit 461 } 462 params.AutoDiscoverFacetCount = &lim 463 if o.valueLimit > 0 { 464 params.FacetAutoDetectParam = &pb.FacetAutoDetectParam{ 465 ValueLimit: proto.Int32(int32(o.valueLimit)), 466 } 467 } 468 return nil 469 } 470 471 // FacetDiscovery returns a FacetSearchOption which selects a facet to be 472 // returned with the search results. By default, the most frequently 473 // occurring values for that facet will be returned. However, you can also 474 // specify a list of particular Atoms or specific Ranges to return. 475 func FacetDiscovery(name string, value ...interface{}) FacetSearchOption { 476 return &facetOpt{name, value} 477 } 478 479 type facetOpt struct { 480 name string 481 values []interface{} 482 } 483 484 func (o *facetOpt) setParams(params *pb.SearchParams) error { 485 req := &pb.FacetRequest{Name: &o.name} 486 params.IncludeFacet = append(params.IncludeFacet, req) 487 if len(o.values) == 0 { 488 return nil 489 } 490 vtype := reflect.TypeOf(o.values[0]) 491 reqParam := &pb.FacetRequestParam{} 492 for _, v := range o.values { 493 if reflect.TypeOf(v) != vtype { 494 return errors.New("values must all be Atom, or must all be Range") 495 } 496 switch v := v.(type) { 497 case Atom: 498 reqParam.ValueConstraint = append(reqParam.ValueConstraint, string(v)) 499 case Range: 500 rng, err := rangeToProto(v) 501 if err != nil { 502 return fmt.Errorf("invalid range: %v", err) 503 } 504 reqParam.Range = append(reqParam.Range, rng) 505 default: 506 return fmt.Errorf("unsupported value type %T", v) 507 } 508 } 509 req.Params = reqParam 510 return nil 511 } 512 513 // FacetDocumentDepth returns a FacetSearchOption which controls the number of 514 // documents to be evaluated with preparing facet results. 515 func FacetDocumentDepth(depth int) FacetSearchOption { 516 return facetDepthOpt(depth) 517 } 518 519 type facetDepthOpt int 520 521 func (o facetDepthOpt) setParams(params *pb.SearchParams) error { 522 params.FacetDepth = proto.Int32(int32(o)) 523 return nil 524 } 525 526 // FacetResult represents the number of times a particular facet and value 527 // appeared in the documents matching a search request. 528 type FacetResult struct { 529 Facet 530 531 // Count is the number of times this specific facet and value appeared in the 532 // matching documents. 533 Count int 534 } 535 536 // Range represents a numeric range with inclusive start and exclusive end. 537 // Start may be specified as math.Inf(-1) to indicate there is no minimum 538 // value, and End may similarly be specified as math.Inf(1); at least one of 539 // Start or End must be a finite number. 540 type Range struct { 541 Start, End float64 542 } 543 544 var ( 545 negInf = math.Inf(-1) 546 posInf = math.Inf(1) 547 ) 548 549 // AtLeast returns a Range matching any value greater than, or equal to, min. 550 func AtLeast(min float64) Range { 551 return Range{Start: min, End: posInf} 552 } 553 554 // LessThan returns a Range matching any value less than max. 555 func LessThan(max float64) Range { 556 return Range{Start: negInf, End: max} 557 } 558 559 // SortOptions control the ordering and scoring of search results. 560 type SortOptions struct { 561 // Expressions is a slice of expressions representing a multi-dimensional 562 // sort. 563 Expressions []SortExpression 564 565 // Scorer, when specified, will cause the documents to be scored according to 566 // search term frequency. 567 Scorer Scorer 568 569 // Limit is the maximum number of objects to score and/or sort. Limit cannot 570 // be more than 10,000. The zero value indicates a default limit. 571 Limit int 572 } 573 574 // SortExpression defines a single dimension for sorting a document. 575 type SortExpression struct { 576 // Expr is evaluated to provide a sorting value for each document. 577 // See https://cloud.google.com/appengine/docs/go/search/options for 578 // the supported expression syntax. 579 Expr string 580 581 // Reverse causes the documents to be sorted in ascending order. 582 Reverse bool 583 584 // The default value to use when no field is present or the expresion 585 // cannot be calculated for a document. For text sorts, Default must 586 // be of type string; for numeric sorts, float64. 587 Default interface{} 588 } 589 590 // A Scorer defines how a document is scored. 591 type Scorer interface { 592 toProto(*pb.ScorerSpec) 593 } 594 595 type enumScorer struct { 596 enum pb.ScorerSpec_Scorer 597 } 598 599 func (e enumScorer) toProto(spec *pb.ScorerSpec) { 600 spec.Scorer = e.enum.Enum() 601 } 602 603 var ( 604 // MatchScorer assigns a score based on term frequency in a document. 605 MatchScorer Scorer = enumScorer{pb.ScorerSpec_MATCH_SCORER} 606 607 // RescoringMatchScorer assigns a score based on the quality of the query 608 // match. It is similar to a MatchScorer but uses a more complex scoring 609 // algorithm based on match term frequency and other factors like field type. 610 // Please be aware that this algorithm is continually refined and can change 611 // over time without notice. This means that the ordering of search results 612 // that use this scorer can also change without notice. 613 RescoringMatchScorer Scorer = enumScorer{pb.ScorerSpec_RESCORING_MATCH_SCORER} 614 ) 615 616 func sortToProto(sort *SortOptions, params *pb.SearchParams) error { 617 for _, e := range sort.Expressions { 618 spec := &pb.SortSpec{ 619 SortExpression: proto.String(e.Expr), 620 } 621 if e.Reverse { 622 spec.SortDescending = proto.Bool(false) 623 } 624 if e.Default != nil { 625 switch d := e.Default.(type) { 626 case float64: 627 spec.DefaultValueNumeric = &d 628 case string: 629 spec.DefaultValueText = &d 630 default: 631 return fmt.Errorf("search: invalid Default type %T for expression %q", d, e.Expr) 632 } 633 } 634 params.SortSpec = append(params.SortSpec, spec) 635 } 636 637 spec := &pb.ScorerSpec{} 638 if sort.Limit > 0 { 639 spec.Limit = proto.Int32(int32(sort.Limit)) 640 params.ScorerSpec = spec 641 } 642 if sort.Scorer != nil { 643 sort.Scorer.toProto(spec) 644 params.ScorerSpec = spec 645 } 646 647 return nil 648 } 649 650 func refinementsToProto(refinements []Facet, params *pb.SearchParams) error { 651 for _, r := range refinements { 652 ref := &pb.FacetRefinement{ 653 Name: proto.String(r.Name), 654 } 655 switch v := r.Value.(type) { 656 case Atom: 657 ref.Value = proto.String(string(v)) 658 case Range: 659 rng, err := rangeToProto(v) 660 if err != nil { 661 return fmt.Errorf("search: refinement for facet %q: %v", r.Name, err) 662 } 663 // Unfortunately there are two identical messages for identify Facet ranges. 664 ref.Range = &pb.FacetRefinement_Range{Start: rng.Start, End: rng.End} 665 default: 666 return fmt.Errorf("search: unsupported refinement for facet %q of type %T", r.Name, v) 667 } 668 params.FacetRefinement = append(params.FacetRefinement, ref) 669 } 670 return nil 671 } 672 673 func rangeToProto(r Range) (*pb.FacetRange, error) { 674 rng := &pb.FacetRange{} 675 if r.Start != negInf { 676 if !validFloat(r.Start) { 677 return nil, errors.New("invalid value for Start") 678 } 679 rng.Start = proto.String(strconv.FormatFloat(r.Start, 'e', -1, 64)) 680 } else if r.End == posInf { 681 return nil, errors.New("either Start or End must be finite") 682 } 683 if r.End != posInf { 684 if !validFloat(r.End) { 685 return nil, errors.New("invalid value for End") 686 } 687 rng.End = proto.String(strconv.FormatFloat(r.End, 'e', -1, 64)) 688 } 689 return rng, nil 690 } 691 692 func protoToRange(rng *pb.FacetRefinement_Range) Range { 693 r := Range{Start: negInf, End: posInf} 694 if x, err := strconv.ParseFloat(rng.GetStart(), 64); err != nil { 695 r.Start = x 696 } 697 if x, err := strconv.ParseFloat(rng.GetEnd(), 64); err != nil { 698 r.End = x 699 } 700 return r 701 } 702 703 // Iterator is the result of searching an index for a query or listing an 704 // index. 705 type Iterator struct { 706 c context.Context 707 index *Index 708 err error 709 710 listRes []*pb.Document 711 listStartID string 712 listInclusive bool 713 714 searchRes []*pb.SearchResult 715 facetRes []*pb.FacetResult 716 searchQuery string 717 searchCursor *string 718 searchOffset int 719 sort *SortOptions 720 721 fields []string 722 exprs []FieldExpression 723 refinements []Facet 724 facetOpts []FacetSearchOption 725 726 more func(*Iterator) error 727 728 count int 729 limit int // items left to return; 0 for unlimited. 730 idsOnly bool 731 } 732 733 // errIter returns an iterator that only returns the given error. 734 func errIter(err string) *Iterator { 735 return &Iterator{ 736 err: errors.New(err), 737 } 738 } 739 740 // Done is returned when a query iteration has completed. 741 var Done = errors.New("search: query has no more results") 742 743 // Count returns an approximation of the number of documents matched by the 744 // query. It is only valid to call for iterators returned by Search. 745 func (t *Iterator) Count() int { return t.count } 746 747 // fetchMore retrieves more results, if there are no errors or pending results. 748 func (t *Iterator) fetchMore() { 749 if t.err == nil && len(t.listRes)+len(t.searchRes) == 0 && t.more != nil { 750 t.err = t.more(t) 751 } 752 } 753 754 // Next returns the ID of the next result. When there are no more results, 755 // Done is returned as the error. 756 // 757 // dst must be a non-nil struct pointer, implement the FieldLoadSaver 758 // interface, or be a nil interface value. If a non-nil dst is provided, it 759 // will be filled with the indexed fields. dst is ignored if this iterator was 760 // created with an IDsOnly option. 761 func (t *Iterator) Next(dst interface{}) (string, error) { 762 t.fetchMore() 763 if t.err != nil { 764 return "", t.err 765 } 766 767 var doc *pb.Document 768 var exprs []*pb.Field 769 switch { 770 case len(t.listRes) != 0: 771 doc = t.listRes[0] 772 t.listRes = t.listRes[1:] 773 case len(t.searchRes) != 0: 774 doc = t.searchRes[0].Document 775 exprs = t.searchRes[0].Expression 776 t.searchCursor = t.searchRes[0].Cursor 777 t.searchRes = t.searchRes[1:] 778 default: 779 return "", Done 780 } 781 if doc == nil { 782 return "", errors.New("search: internal error: no document returned") 783 } 784 if !t.idsOnly && dst != nil { 785 if err := loadDoc(dst, doc, exprs); err != nil { 786 return "", err 787 } 788 } 789 return doc.GetId(), nil 790 } 791 792 // Cursor returns the cursor associated with the current document (that is, 793 // the document most recently returned by a call to Next). 794 // 795 // Passing this cursor in a future call to Search will cause those results 796 // to commence with the first document after the current document. 797 func (t *Iterator) Cursor() Cursor { 798 if t.searchCursor == nil { 799 return "" 800 } 801 return Cursor(*t.searchCursor) 802 } 803 804 // Facets returns the facets found within the search results, if any facets 805 // were requested in the SearchOptions. 806 func (t *Iterator) Facets() ([][]FacetResult, error) { 807 t.fetchMore() 808 if t.err != nil && t.err != Done { 809 return nil, t.err 810 } 811 812 var facets [][]FacetResult 813 for _, f := range t.facetRes { 814 fres := make([]FacetResult, 0, len(f.Value)) 815 for _, v := range f.Value { 816 ref := v.Refinement 817 facet := FacetResult{ 818 Facet: Facet{Name: ref.GetName()}, 819 Count: int(v.GetCount()), 820 } 821 if ref.Value != nil { 822 facet.Value = Atom(*ref.Value) 823 } else { 824 facet.Value = protoToRange(ref.Range) 825 } 826 fres = append(fres, facet) 827 } 828 facets = append(facets, fres) 829 } 830 return facets, nil 831 } 832 833 // saveDoc converts from a struct pointer or 834 // FieldLoadSaver/FieldMetadataLoadSaver to the Document protobuf. 835 func saveDoc(src interface{}) (*pb.Document, error) { 836 var err error 837 var fields []Field 838 var meta *DocumentMetadata 839 switch x := src.(type) { 840 case FieldLoadSaver: 841 fields, meta, err = x.Save() 842 default: 843 fields, err = SaveStruct(src) 844 } 845 if err != nil { 846 return nil, err 847 } 848 849 fieldsProto, err := fieldsToProto(fields) 850 if err != nil { 851 return nil, err 852 } 853 d := &pb.Document{ 854 Field: fieldsProto, 855 OrderId: proto.Int32(int32(time.Since(orderIDEpoch).Seconds())), 856 } 857 if meta != nil { 858 if meta.Rank != 0 { 859 if !validDocRank(meta.Rank) { 860 return nil, fmt.Errorf("search: invalid rank %d, must be [0, 2^31)", meta.Rank) 861 } 862 *d.OrderId = int32(meta.Rank) 863 } 864 if len(meta.Facets) > 0 { 865 facets, err := facetsToProto(meta.Facets) 866 if err != nil { 867 return nil, err 868 } 869 d.Facet = facets 870 } 871 } 872 return d, nil 873 } 874 875 func fieldsToProto(src []Field) ([]*pb.Field, error) { 876 // Maps to catch duplicate time or numeric fields. 877 timeFields, numericFields := make(map[string]bool), make(map[string]bool) 878 dst := make([]*pb.Field, 0, len(src)) 879 for _, f := range src { 880 if !validFieldName(f.Name) { 881 return nil, fmt.Errorf("search: invalid field name %q", f.Name) 882 } 883 fieldValue := &pb.FieldValue{} 884 switch x := f.Value.(type) { 885 case string: 886 fieldValue.Type = pb.FieldValue_TEXT.Enum() 887 fieldValue.StringValue = proto.String(x) 888 case Atom: 889 fieldValue.Type = pb.FieldValue_ATOM.Enum() 890 fieldValue.StringValue = proto.String(string(x)) 891 case HTML: 892 fieldValue.Type = pb.FieldValue_HTML.Enum() 893 fieldValue.StringValue = proto.String(string(x)) 894 case time.Time: 895 if timeFields[f.Name] { 896 return nil, fmt.Errorf("search: duplicate time field %q", f.Name) 897 } 898 timeFields[f.Name] = true 899 fieldValue.Type = pb.FieldValue_DATE.Enum() 900 fieldValue.StringValue = proto.String(strconv.FormatInt(x.UnixNano()/1e6, 10)) 901 case float64: 902 if numericFields[f.Name] { 903 return nil, fmt.Errorf("search: duplicate numeric field %q", f.Name) 904 } 905 if !validFloat(x) { 906 return nil, fmt.Errorf("search: numeric field %q with invalid value %f", f.Name, x) 907 } 908 numericFields[f.Name] = true 909 fieldValue.Type = pb.FieldValue_NUMBER.Enum() 910 fieldValue.StringValue = proto.String(strconv.FormatFloat(x, 'e', -1, 64)) 911 case appengine.GeoPoint: 912 if !x.Valid() { 913 return nil, fmt.Errorf( 914 "search: GeoPoint field %q with invalid value %v", 915 f.Name, x) 916 } 917 fieldValue.Type = pb.FieldValue_GEO.Enum() 918 fieldValue.Geo = &pb.FieldValue_Geo{ 919 Lat: proto.Float64(x.Lat), 920 Lng: proto.Float64(x.Lng), 921 } 922 default: 923 return nil, fmt.Errorf("search: unsupported field type: %v", reflect.TypeOf(f.Value)) 924 } 925 if f.Language != "" { 926 switch f.Value.(type) { 927 case string, HTML: 928 if !validLanguage(f.Language) { 929 return nil, fmt.Errorf("search: invalid language for field %q: %q", f.Name, f.Language) 930 } 931 fieldValue.Language = proto.String(f.Language) 932 default: 933 return nil, fmt.Errorf("search: setting language not supported for field %q of type %T", f.Name, f.Value) 934 } 935 } 936 if p := fieldValue.StringValue; p != nil && !utf8.ValidString(*p) { 937 return nil, fmt.Errorf("search: %q field is invalid UTF-8: %q", f.Name, *p) 938 } 939 dst = append(dst, &pb.Field{ 940 Name: proto.String(f.Name), 941 Value: fieldValue, 942 }) 943 } 944 return dst, nil 945 } 946 947 func facetsToProto(src []Facet) ([]*pb.Facet, error) { 948 dst := make([]*pb.Facet, 0, len(src)) 949 for _, f := range src { 950 if !validFieldName(f.Name) { 951 return nil, fmt.Errorf("search: invalid facet name %q", f.Name) 952 } 953 facetValue := &pb.FacetValue{} 954 switch x := f.Value.(type) { 955 case Atom: 956 if !utf8.ValidString(string(x)) { 957 return nil, fmt.Errorf("search: %q facet is invalid UTF-8: %q", f.Name, x) 958 } 959 facetValue.Type = pb.FacetValue_ATOM.Enum() 960 facetValue.StringValue = proto.String(string(x)) 961 case float64: 962 if !validFloat(x) { 963 return nil, fmt.Errorf("search: numeric facet %q with invalid value %f", f.Name, x) 964 } 965 facetValue.Type = pb.FacetValue_NUMBER.Enum() 966 facetValue.StringValue = proto.String(strconv.FormatFloat(x, 'e', -1, 64)) 967 default: 968 return nil, fmt.Errorf("search: unsupported facet type: %v", reflect.TypeOf(f.Value)) 969 } 970 dst = append(dst, &pb.Facet{ 971 Name: proto.String(f.Name), 972 Value: facetValue, 973 }) 974 } 975 return dst, nil 976 } 977 978 // loadDoc converts from protobufs to a struct pointer or 979 // FieldLoadSaver/FieldMetadataLoadSaver. The src param provides the document's 980 // stored fields and facets, and any document metadata. An additional slice of 981 // fields, exprs, may optionally be provided to contain any derived expressions 982 // requested by the developer. 983 func loadDoc(dst interface{}, src *pb.Document, exprs []*pb.Field) (err error) { 984 fields, err := protoToFields(src.Field) 985 if err != nil { 986 return err 987 } 988 facets, err := protoToFacets(src.Facet) 989 if err != nil { 990 return err 991 } 992 if len(exprs) > 0 { 993 exprFields, err := protoToFields(exprs) 994 if err != nil { 995 return err 996 } 997 // Mark each field as derived. 998 for i := range exprFields { 999 exprFields[i].Derived = true 1000 } 1001 fields = append(fields, exprFields...) 1002 } 1003 meta := &DocumentMetadata{ 1004 Rank: int(src.GetOrderId()), 1005 Facets: facets, 1006 } 1007 switch x := dst.(type) { 1008 case FieldLoadSaver: 1009 return x.Load(fields, meta) 1010 default: 1011 return loadStructWithMeta(dst, fields, meta) 1012 } 1013 } 1014 1015 func protoToFields(fields []*pb.Field) ([]Field, error) { 1016 dst := make([]Field, 0, len(fields)) 1017 for _, field := range fields { 1018 fieldValue := field.GetValue() 1019 f := Field{ 1020 Name: field.GetName(), 1021 } 1022 switch fieldValue.GetType() { 1023 case pb.FieldValue_TEXT: 1024 f.Value = fieldValue.GetStringValue() 1025 f.Language = fieldValue.GetLanguage() 1026 case pb.FieldValue_ATOM: 1027 f.Value = Atom(fieldValue.GetStringValue()) 1028 case pb.FieldValue_HTML: 1029 f.Value = HTML(fieldValue.GetStringValue()) 1030 f.Language = fieldValue.GetLanguage() 1031 case pb.FieldValue_DATE: 1032 sv := fieldValue.GetStringValue() 1033 millis, err := strconv.ParseInt(sv, 10, 64) 1034 if err != nil { 1035 return nil, fmt.Errorf("search: internal error: bad time.Time encoding %q: %v", sv, err) 1036 } 1037 f.Value = time.Unix(0, millis*1e6) 1038 case pb.FieldValue_NUMBER: 1039 sv := fieldValue.GetStringValue() 1040 x, err := strconv.ParseFloat(sv, 64) 1041 if err != nil { 1042 return nil, err 1043 } 1044 f.Value = x 1045 case pb.FieldValue_GEO: 1046 geoValue := fieldValue.GetGeo() 1047 geoPoint := appengine.GeoPoint{geoValue.GetLat(), geoValue.GetLng()} 1048 if !geoPoint.Valid() { 1049 return nil, fmt.Errorf("search: internal error: invalid GeoPoint encoding: %v", geoPoint) 1050 } 1051 f.Value = geoPoint 1052 default: 1053 return nil, fmt.Errorf("search: internal error: unknown data type %s", fieldValue.GetType()) 1054 } 1055 dst = append(dst, f) 1056 } 1057 return dst, nil 1058 } 1059 1060 func protoToFacets(facets []*pb.Facet) ([]Facet, error) { 1061 if len(facets) == 0 { 1062 return nil, nil 1063 } 1064 dst := make([]Facet, 0, len(facets)) 1065 for _, facet := range facets { 1066 facetValue := facet.GetValue() 1067 f := Facet{ 1068 Name: facet.GetName(), 1069 } 1070 switch facetValue.GetType() { 1071 case pb.FacetValue_ATOM: 1072 f.Value = Atom(facetValue.GetStringValue()) 1073 case pb.FacetValue_NUMBER: 1074 sv := facetValue.GetStringValue() 1075 x, err := strconv.ParseFloat(sv, 64) 1076 if err != nil { 1077 return nil, err 1078 } 1079 f.Value = x 1080 default: 1081 return nil, fmt.Errorf("search: internal error: unknown data type %s", facetValue.GetType()) 1082 } 1083 dst = append(dst, f) 1084 } 1085 return dst, nil 1086 } 1087 1088 func namespaceMod(m proto.Message, namespace string) { 1089 set := func(s **string) { 1090 if *s == nil { 1091 *s = &namespace 1092 } 1093 } 1094 switch m := m.(type) { 1095 case *pb.IndexDocumentRequest: 1096 set(&m.Params.IndexSpec.Namespace) 1097 case *pb.ListDocumentsRequest: 1098 set(&m.Params.IndexSpec.Namespace) 1099 case *pb.DeleteDocumentRequest: 1100 set(&m.Params.IndexSpec.Namespace) 1101 case *pb.SearchRequest: 1102 set(&m.Params.IndexSpec.Namespace) 1103 } 1104 } 1105 1106 func init() { 1107 internal.RegisterErrorCodeMap("search", pb.SearchServiceError_ErrorCode_name) 1108 internal.NamespaceMods["search"] = namespaceMod 1109 }