github.com/timstclair/heapster@v0.20.0-alpha1/Godeps/_workspace/src/google.golang.org/appengine/search/search.go (about)

     1  // Copyright 2012 Google Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package search
     6  
     7  // TODO: let Put specify the document language: "en", "fr", etc. Also: order_id?? storage??
     8  // TODO: Index.GetAll (or Iterator.GetAll)?
     9  // TODO: struct <-> protobuf tests.
    10  // TODO: enforce Python's MIN_NUMBER_VALUE and MIN_DATE (which would disallow a zero
    11  // time.Time)? _MAXIMUM_STRING_LENGTH?
    12  
    13  import (
    14  	"errors"
    15  	"fmt"
    16  	"math"
    17  	"reflect"
    18  	"regexp"
    19  	"strconv"
    20  	"strings"
    21  	"time"
    22  	"unicode/utf8"
    23  
    24  	"github.com/golang/protobuf/proto"
    25  	"golang.org/x/net/context"
    26  
    27  	"google.golang.org/appengine"
    28  	"google.golang.org/appengine/internal"
    29  	pb "google.golang.org/appengine/internal/search"
    30  )
    31  
    32  var (
    33  	// ErrInvalidDocumentType is returned when methods like Put, Get or Next
    34  	// are passed a dst or src argument of invalid type.
    35  	ErrInvalidDocumentType = errors.New("search: invalid document type")
    36  
    37  	// ErrNoSuchDocument is returned when no document was found for a given ID.
    38  	ErrNoSuchDocument = errors.New("search: no such document")
    39  )
    40  
    41  // Atom is a document field whose contents are indexed as a single indivisible
    42  // string.
    43  type Atom string
    44  
    45  // HTML is a document field whose contents are indexed as HTML. Only text nodes
    46  // are indexed: "foo<b>bar" will be treated as "foobar".
    47  type HTML string
    48  
    49  // validIndexNameOrDocID is the Go equivalent of Python's
    50  // _ValidateVisiblePrintableAsciiNotReserved.
    51  func validIndexNameOrDocID(s string) bool {
    52  	if strings.HasPrefix(s, "!") {
    53  		return false
    54  	}
    55  	for _, c := range s {
    56  		if c < 0x21 || 0x7f <= c {
    57  			return false
    58  		}
    59  	}
    60  	return true
    61  }
    62  
    63  var (
    64  	fieldNameRE = regexp.MustCompile(`^[A-Za-z][A-Za-z0-9_]*$`)
    65  	languageRE  = regexp.MustCompile(`^[a-z]{2}$`)
    66  )
    67  
    68  // validFieldName is the Go equivalent of Python's _CheckFieldName. It checks
    69  // the validity of both field and facet names.
    70  func validFieldName(s string) bool {
    71  	return len(s) <= 500 && fieldNameRE.MatchString(s)
    72  }
    73  
    74  // validDocRank checks that the ranks is in the range [0, 2^31).
    75  func validDocRank(r int) bool {
    76  	return 0 <= r && r <= (1<<31-1)
    77  }
    78  
    79  // validLanguage checks that a language looks like ISO 639-1.
    80  func validLanguage(s string) bool {
    81  	return languageRE.MatchString(s)
    82  }
    83  
    84  // validFloat checks that f is in the range [-2147483647, 2147483647].
    85  func validFloat(f float64) bool {
    86  	return -(1<<31-1) <= f && f <= (1<<31-1)
    87  }
    88  
    89  // Index is an index of documents.
    90  type Index struct {
    91  	spec pb.IndexSpec
    92  }
    93  
    94  // orderIDEpoch forms the basis for populating OrderId on documents.
    95  var orderIDEpoch = time.Date(2011, 1, 1, 0, 0, 0, 0, time.UTC)
    96  
    97  // Open opens the index with the given name. The index is created if it does
    98  // not already exist.
    99  //
   100  // The name is a human-readable ASCII string. It must contain no whitespace
   101  // characters and not start with "!".
   102  func Open(name string) (*Index, error) {
   103  	if !validIndexNameOrDocID(name) {
   104  		return nil, fmt.Errorf("search: invalid index name %q", name)
   105  	}
   106  	return &Index{
   107  		spec: pb.IndexSpec{
   108  			Name: &name,
   109  		},
   110  	}, nil
   111  }
   112  
   113  // Put saves src to the index. If id is empty, a new ID is allocated by the
   114  // service and returned. If id is not empty, any existing index entry for that
   115  // ID is replaced.
   116  //
   117  // The ID is a human-readable ASCII string. It must contain no whitespace
   118  // characters and not start with "!".
   119  //
   120  // src must be a non-nil struct pointer or implement the FieldLoadSaver
   121  // interface.
   122  func (x *Index) Put(c context.Context, id string, src interface{}) (string, error) {
   123  	d, err := saveDoc(src)
   124  	if err != nil {
   125  		return "", err
   126  	}
   127  	if id != "" {
   128  		if !validIndexNameOrDocID(id) {
   129  			return "", fmt.Errorf("search: invalid ID %q", id)
   130  		}
   131  		d.Id = proto.String(id)
   132  	}
   133  	req := &pb.IndexDocumentRequest{
   134  		Params: &pb.IndexDocumentParams{
   135  			Document:  []*pb.Document{d},
   136  			IndexSpec: &x.spec,
   137  		},
   138  	}
   139  	res := &pb.IndexDocumentResponse{}
   140  	if err := internal.Call(c, "search", "IndexDocument", req, res); err != nil {
   141  		return "", err
   142  	}
   143  	if len(res.Status) > 0 {
   144  		if s := res.Status[0]; s.GetCode() != pb.SearchServiceError_OK {
   145  			return "", fmt.Errorf("search: %s: %s", s.GetCode(), s.GetErrorDetail())
   146  		}
   147  	}
   148  	if len(res.Status) != 1 || len(res.DocId) != 1 {
   149  		return "", fmt.Errorf("search: internal error: wrong number of results (%d Statuses, %d DocIDs)",
   150  			len(res.Status), len(res.DocId))
   151  	}
   152  	return res.DocId[0], nil
   153  }
   154  
   155  // Get loads the document with the given ID into dst.
   156  //
   157  // The ID is a human-readable ASCII string. It must be non-empty, contain no
   158  // whitespace characters and not start with "!".
   159  //
   160  // dst must be a non-nil struct pointer or implement the FieldLoadSaver
   161  // interface.
   162  //
   163  // ErrFieldMismatch is returned when a field is to be loaded into a different
   164  // type than the one it was stored from, or when a field is missing or
   165  // unexported in the destination struct. ErrFieldMismatch is only returned if
   166  // dst is a struct pointer. It is up to the callee to decide whether this error
   167  // is fatal, recoverable, or ignorable.
   168  func (x *Index) Get(c context.Context, id string, dst interface{}) error {
   169  	if id == "" || !validIndexNameOrDocID(id) {
   170  		return fmt.Errorf("search: invalid ID %q", id)
   171  	}
   172  	req := &pb.ListDocumentsRequest{
   173  		Params: &pb.ListDocumentsParams{
   174  			IndexSpec:  &x.spec,
   175  			StartDocId: proto.String(id),
   176  			Limit:      proto.Int32(1),
   177  		},
   178  	}
   179  	res := &pb.ListDocumentsResponse{}
   180  	if err := internal.Call(c, "search", "ListDocuments", req, res); err != nil {
   181  		return err
   182  	}
   183  	if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK {
   184  		return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail())
   185  	}
   186  	if len(res.Document) != 1 || res.Document[0].GetId() != id {
   187  		return ErrNoSuchDocument
   188  	}
   189  	return loadDoc(dst, res.Document[0], nil)
   190  }
   191  
   192  // Delete deletes a document from the index.
   193  func (x *Index) Delete(c context.Context, id string) error {
   194  	req := &pb.DeleteDocumentRequest{
   195  		Params: &pb.DeleteDocumentParams{
   196  			DocId:     []string{id},
   197  			IndexSpec: &x.spec,
   198  		},
   199  	}
   200  	res := &pb.DeleteDocumentResponse{}
   201  	if err := internal.Call(c, "search", "DeleteDocument", req, res); err != nil {
   202  		return err
   203  	}
   204  	if len(res.Status) != 1 {
   205  		return fmt.Errorf("search: internal error: wrong number of results (%d)", len(res.Status))
   206  	}
   207  	if s := res.Status[0]; s.GetCode() != pb.SearchServiceError_OK {
   208  		return fmt.Errorf("search: %s: %s", s.GetCode(), s.GetErrorDetail())
   209  	}
   210  	return nil
   211  }
   212  
   213  // List lists all of the documents in an index. The documents are returned in
   214  // increasing ID order.
   215  func (x *Index) List(c context.Context, opts *ListOptions) *Iterator {
   216  	t := &Iterator{
   217  		c:             c,
   218  		index:         x,
   219  		count:         -1,
   220  		listInclusive: true,
   221  		more:          moreList,
   222  	}
   223  	if opts != nil {
   224  		t.listStartID = opts.StartID
   225  		t.limit = opts.Limit
   226  		t.idsOnly = opts.IDsOnly
   227  	}
   228  	return t
   229  }
   230  
   231  func moreList(t *Iterator) error {
   232  	req := &pb.ListDocumentsRequest{
   233  		Params: &pb.ListDocumentsParams{
   234  			IndexSpec: &t.index.spec,
   235  		},
   236  	}
   237  	if t.listStartID != "" {
   238  		req.Params.StartDocId = &t.listStartID
   239  		req.Params.IncludeStartDoc = &t.listInclusive
   240  	}
   241  	if t.limit > 0 {
   242  		req.Params.Limit = proto.Int32(int32(t.limit))
   243  	}
   244  	if t.idsOnly {
   245  		req.Params.KeysOnly = &t.idsOnly
   246  	}
   247  
   248  	res := &pb.ListDocumentsResponse{}
   249  	if err := internal.Call(t.c, "search", "ListDocuments", req, res); err != nil {
   250  		return err
   251  	}
   252  	if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK {
   253  		return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail())
   254  	}
   255  	t.listRes = res.Document
   256  	t.listStartID, t.listInclusive, t.more = "", false, nil
   257  	if len(res.Document) != 0 && t.limit <= 0 {
   258  		if id := res.Document[len(res.Document)-1].GetId(); id != "" {
   259  			t.listStartID, t.more = id, moreList
   260  		}
   261  	}
   262  	return nil
   263  }
   264  
   265  // ListOptions are the options for listing documents in an index. Passing a nil
   266  // *ListOptions is equivalent to using the default values.
   267  type ListOptions struct {
   268  	// StartID is the inclusive lower bound for the ID of the returned
   269  	// documents. The zero value means all documents will be returned.
   270  	StartID string
   271  
   272  	// Limit is the maximum number of documents to return. The zero value
   273  	// indicates no limit.
   274  	Limit int
   275  
   276  	// IDsOnly indicates that only document IDs should be returned for the list
   277  	// operation; no document fields are populated.
   278  	IDsOnly bool
   279  }
   280  
   281  // Search searches the index for the given query.
   282  func (x *Index) Search(c context.Context, query string, opts *SearchOptions) *Iterator {
   283  	t := &Iterator{
   284  		c:           c,
   285  		index:       x,
   286  		searchQuery: query,
   287  		more:        moreSearch,
   288  	}
   289  	if opts != nil {
   290  		if opts.Cursor != "" {
   291  			if opts.Offset != 0 {
   292  				return errIter("at most one of Cursor and Offset may be specified")
   293  			}
   294  			t.searchCursor = proto.String(string(opts.Cursor))
   295  		}
   296  		t.limit = opts.Limit
   297  		t.fields = opts.Fields
   298  		t.idsOnly = opts.IDsOnly
   299  		t.sort = opts.Sort
   300  		t.exprs = opts.Expressions
   301  		t.refinements = opts.Refinements
   302  		t.facetOpts = opts.Facets
   303  		t.searchOffset = opts.Offset
   304  	}
   305  	return t
   306  }
   307  
   308  func moreSearch(t *Iterator) error {
   309  	// We use per-result (rather than single/per-page) cursors since this
   310  	// lets us return a Cursor for every iterator document. The two cursor
   311  	// types are largely interchangeable: a page cursor is the same as the
   312  	// last per-result cursor in a given search response.
   313  	req := &pb.SearchRequest{
   314  		Params: &pb.SearchParams{
   315  			IndexSpec:  &t.index.spec,
   316  			Query:      &t.searchQuery,
   317  			Cursor:     t.searchCursor,
   318  			CursorType: pb.SearchParams_PER_RESULT.Enum(),
   319  			FieldSpec: &pb.FieldSpec{
   320  				Name: t.fields,
   321  			},
   322  		},
   323  	}
   324  	if t.limit > 0 {
   325  		req.Params.Limit = proto.Int32(int32(t.limit))
   326  	}
   327  	if t.searchOffset > 0 {
   328  		req.Params.Offset = proto.Int32(int32(t.searchOffset))
   329  		t.searchOffset = 0
   330  	}
   331  	if t.idsOnly {
   332  		req.Params.KeysOnly = &t.idsOnly
   333  	}
   334  	if t.sort != nil {
   335  		if err := sortToProto(t.sort, req.Params); err != nil {
   336  			return err
   337  		}
   338  	}
   339  	if t.refinements != nil {
   340  		if err := refinementsToProto(t.refinements, req.Params); err != nil {
   341  			return err
   342  		}
   343  	}
   344  	for _, e := range t.exprs {
   345  		req.Params.FieldSpec.Expression = append(req.Params.FieldSpec.Expression, &pb.FieldSpec_Expression{
   346  			Name:       proto.String(e.Name),
   347  			Expression: proto.String(e.Expr),
   348  		})
   349  	}
   350  	for _, f := range t.facetOpts {
   351  		if err := f.setParams(req.Params); err != nil {
   352  			return fmt.Errorf("bad FacetSearchOption: %v", err)
   353  		}
   354  	}
   355  	// Don't repeat facet search.
   356  	t.facetOpts = nil
   357  
   358  	res := &pb.SearchResponse{}
   359  	if err := internal.Call(t.c, "search", "Search", req, res); err != nil {
   360  		return err
   361  	}
   362  	if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK {
   363  		return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail())
   364  	}
   365  	t.searchRes = res.Result
   366  	if len(res.FacetResult) > 0 {
   367  		t.facetRes = res.FacetResult
   368  	}
   369  	t.count = int(*res.MatchedCount)
   370  	if t.limit > 0 {
   371  		t.more = nil
   372  	} else {
   373  		t.more = moreSearch
   374  	}
   375  	return nil
   376  }
   377  
   378  // SearchOptions are the options for searching an index. Passing a nil
   379  // *SearchOptions is equivalent to using the default values.
   380  type SearchOptions struct {
   381  	// Limit is the maximum number of documents to return. The zero value
   382  	// indicates no limit.
   383  	Limit int
   384  
   385  	// IDsOnly indicates that only document IDs should be returned for the search
   386  	// operation; no document fields are populated.
   387  	IDsOnly bool
   388  
   389  	// Sort controls the ordering of search results.
   390  	Sort *SortOptions
   391  
   392  	// Fields specifies which document fields to include in the results. If omitted,
   393  	// all document fields are returned. No more than 100 fields may be specified.
   394  	Fields []string
   395  
   396  	// Expressions specifies additional computed fields to add to each returned
   397  	// document.
   398  	Expressions []FieldExpression
   399  
   400  	// Facets controls what facet information is returned for these search results.
   401  	// If no options are specified, no facet results will be returned.
   402  	Facets []FacetSearchOption
   403  
   404  	// Refinements filters the returned documents by requiring them to contain facets
   405  	// with specific values. Refinements are applied in conjunction for facets with
   406  	// different names, and in disjunction otherwise.
   407  	Refinements []Facet
   408  
   409  	// Cursor causes the results to commence with the first document after
   410  	// the document associated with the cursor.
   411  	Cursor Cursor
   412  
   413  	// Offset specifies the number of documents to skip over before returning results.
   414  	// When specified, Cursor must be nil.
   415  	Offset int
   416  }
   417  
   418  // Cursor represents an iterator's position.
   419  //
   420  // The string value of a cursor is web-safe. It can be saved and restored
   421  // for later use.
   422  type Cursor string
   423  
   424  // FieldExpression defines a custom expression to evaluate for each result.
   425  type FieldExpression struct {
   426  	// Name is the name to use for the computed field.
   427  	Name string
   428  
   429  	// Expr is evaluated to provide a custom content snippet for each document.
   430  	// See https://cloud.google.com/appengine/docs/go/search/options for
   431  	// the supported expression syntax.
   432  	Expr string
   433  }
   434  
   435  // FacetSearchOption controls what facet information is returned in search results.
   436  type FacetSearchOption interface {
   437  	setParams(*pb.SearchParams) error
   438  }
   439  
   440  // AutoFacetDiscovery returns a FacetSearchOption which enables automatic facet
   441  // discovery for the search. Automatic facet discovery looks for the facets
   442  // which appear the most often in the aggregate in the matched documents.
   443  //
   444  // The maximum number of facets returned is controlled by facetLimit, and the
   445  // maximum number of values per facet by facetLimit. A limit of zero indicates
   446  // a default limit should be used.
   447  func AutoFacetDiscovery(facetLimit, valueLimit int) FacetSearchOption {
   448  	return &autoFacetOpt{facetLimit, valueLimit}
   449  }
   450  
   451  type autoFacetOpt struct {
   452  	facetLimit, valueLimit int
   453  }
   454  
   455  const defaultAutoFacetLimit = 10 // As per python runtime search.py.
   456  
   457  func (o *autoFacetOpt) setParams(params *pb.SearchParams) error {
   458  	lim := int32(o.facetLimit)
   459  	if lim == 0 {
   460  		lim = defaultAutoFacetLimit
   461  	}
   462  	params.AutoDiscoverFacetCount = &lim
   463  	if o.valueLimit > 0 {
   464  		params.FacetAutoDetectParam = &pb.FacetAutoDetectParam{
   465  			ValueLimit: proto.Int32(int32(o.valueLimit)),
   466  		}
   467  	}
   468  	return nil
   469  }
   470  
   471  // FacetDiscovery returns a FacetSearchOption which selects a facet to be
   472  // returned with the search results. By default, the most frequently
   473  // occurring values for that facet will be returned. However, you can also
   474  // specify a list of particular Atoms or specific Ranges to return.
   475  func FacetDiscovery(name string, value ...interface{}) FacetSearchOption {
   476  	return &facetOpt{name, value}
   477  }
   478  
   479  type facetOpt struct {
   480  	name   string
   481  	values []interface{}
   482  }
   483  
   484  func (o *facetOpt) setParams(params *pb.SearchParams) error {
   485  	req := &pb.FacetRequest{Name: &o.name}
   486  	params.IncludeFacet = append(params.IncludeFacet, req)
   487  	if len(o.values) == 0 {
   488  		return nil
   489  	}
   490  	vtype := reflect.TypeOf(o.values[0])
   491  	reqParam := &pb.FacetRequestParam{}
   492  	for _, v := range o.values {
   493  		if reflect.TypeOf(v) != vtype {
   494  			return errors.New("values must all be Atom, or must all be Range")
   495  		}
   496  		switch v := v.(type) {
   497  		case Atom:
   498  			reqParam.ValueConstraint = append(reqParam.ValueConstraint, string(v))
   499  		case Range:
   500  			rng, err := rangeToProto(v)
   501  			if err != nil {
   502  				return fmt.Errorf("invalid range: %v", err)
   503  			}
   504  			reqParam.Range = append(reqParam.Range, rng)
   505  		default:
   506  			return fmt.Errorf("unsupported value type %T", v)
   507  		}
   508  	}
   509  	req.Params = reqParam
   510  	return nil
   511  }
   512  
   513  // FacetDocumentDepth returns a FacetSearchOption which controls the number of
   514  // documents to be evaluated with preparing facet results.
   515  func FacetDocumentDepth(depth int) FacetSearchOption {
   516  	return facetDepthOpt(depth)
   517  }
   518  
   519  type facetDepthOpt int
   520  
   521  func (o facetDepthOpt) setParams(params *pb.SearchParams) error {
   522  	params.FacetDepth = proto.Int32(int32(o))
   523  	return nil
   524  }
   525  
   526  // FacetResult represents the number of times a particular facet and value
   527  // appeared in the documents matching a search request.
   528  type FacetResult struct {
   529  	Facet
   530  
   531  	// Count is the number of times this specific facet and value appeared in the
   532  	// matching documents.
   533  	Count int
   534  }
   535  
   536  // Range represents a numeric range with inclusive start and exclusive end.
   537  // Start may be specified as math.Inf(-1) to indicate there is no minimum
   538  // value, and End may similarly be specified as math.Inf(1); at least one of
   539  // Start or End must be a finite number.
   540  type Range struct {
   541  	Start, End float64
   542  }
   543  
   544  var (
   545  	negInf = math.Inf(-1)
   546  	posInf = math.Inf(1)
   547  )
   548  
   549  // AtLeast returns a Range matching any value greater than, or equal to, min.
   550  func AtLeast(min float64) Range {
   551  	return Range{Start: min, End: posInf}
   552  }
   553  
   554  // LessThan returns a Range matching any value less than max.
   555  func LessThan(max float64) Range {
   556  	return Range{Start: negInf, End: max}
   557  }
   558  
   559  // SortOptions control the ordering and scoring of search results.
   560  type SortOptions struct {
   561  	// Expressions is a slice of expressions representing a multi-dimensional
   562  	// sort.
   563  	Expressions []SortExpression
   564  
   565  	// Scorer, when specified, will cause the documents to be scored according to
   566  	// search term frequency.
   567  	Scorer Scorer
   568  
   569  	// Limit is the maximum number of objects to score and/or sort. Limit cannot
   570  	// be more than 10,000. The zero value indicates a default limit.
   571  	Limit int
   572  }
   573  
   574  // SortExpression defines a single dimension for sorting a document.
   575  type SortExpression struct {
   576  	// Expr is evaluated to provide a sorting value for each document.
   577  	// See https://cloud.google.com/appengine/docs/go/search/options for
   578  	// the supported expression syntax.
   579  	Expr string
   580  
   581  	// Reverse causes the documents to be sorted in ascending order.
   582  	Reverse bool
   583  
   584  	// The default value to use when no field is present or the expresion
   585  	// cannot be calculated for a document. For text sorts, Default must
   586  	// be of type string; for numeric sorts, float64.
   587  	Default interface{}
   588  }
   589  
   590  // A Scorer defines how a document is scored.
   591  type Scorer interface {
   592  	toProto(*pb.ScorerSpec)
   593  }
   594  
   595  type enumScorer struct {
   596  	enum pb.ScorerSpec_Scorer
   597  }
   598  
   599  func (e enumScorer) toProto(spec *pb.ScorerSpec) {
   600  	spec.Scorer = e.enum.Enum()
   601  }
   602  
   603  var (
   604  	// MatchScorer assigns a score based on term frequency in a document.
   605  	MatchScorer Scorer = enumScorer{pb.ScorerSpec_MATCH_SCORER}
   606  
   607  	// RescoringMatchScorer assigns a score based on the quality of the query
   608  	// match. It is similar to a MatchScorer but uses a more complex scoring
   609  	// algorithm based on match term frequency and other factors like field type.
   610  	// Please be aware that this algorithm is continually refined and can change
   611  	// over time without notice. This means that the ordering of search results
   612  	// that use this scorer can also change without notice.
   613  	RescoringMatchScorer Scorer = enumScorer{pb.ScorerSpec_RESCORING_MATCH_SCORER}
   614  )
   615  
   616  func sortToProto(sort *SortOptions, params *pb.SearchParams) error {
   617  	for _, e := range sort.Expressions {
   618  		spec := &pb.SortSpec{
   619  			SortExpression: proto.String(e.Expr),
   620  		}
   621  		if e.Reverse {
   622  			spec.SortDescending = proto.Bool(false)
   623  		}
   624  		if e.Default != nil {
   625  			switch d := e.Default.(type) {
   626  			case float64:
   627  				spec.DefaultValueNumeric = &d
   628  			case string:
   629  				spec.DefaultValueText = &d
   630  			default:
   631  				return fmt.Errorf("search: invalid Default type %T for expression %q", d, e.Expr)
   632  			}
   633  		}
   634  		params.SortSpec = append(params.SortSpec, spec)
   635  	}
   636  
   637  	spec := &pb.ScorerSpec{}
   638  	if sort.Limit > 0 {
   639  		spec.Limit = proto.Int32(int32(sort.Limit))
   640  		params.ScorerSpec = spec
   641  	}
   642  	if sort.Scorer != nil {
   643  		sort.Scorer.toProto(spec)
   644  		params.ScorerSpec = spec
   645  	}
   646  
   647  	return nil
   648  }
   649  
   650  func refinementsToProto(refinements []Facet, params *pb.SearchParams) error {
   651  	for _, r := range refinements {
   652  		ref := &pb.FacetRefinement{
   653  			Name: proto.String(r.Name),
   654  		}
   655  		switch v := r.Value.(type) {
   656  		case Atom:
   657  			ref.Value = proto.String(string(v))
   658  		case Range:
   659  			rng, err := rangeToProto(v)
   660  			if err != nil {
   661  				return fmt.Errorf("search: refinement for facet %q: %v", r.Name, err)
   662  			}
   663  			// Unfortunately there are two identical messages for identify Facet ranges.
   664  			ref.Range = &pb.FacetRefinement_Range{Start: rng.Start, End: rng.End}
   665  		default:
   666  			return fmt.Errorf("search: unsupported refinement for facet %q of type %T", r.Name, v)
   667  		}
   668  		params.FacetRefinement = append(params.FacetRefinement, ref)
   669  	}
   670  	return nil
   671  }
   672  
   673  func rangeToProto(r Range) (*pb.FacetRange, error) {
   674  	rng := &pb.FacetRange{}
   675  	if r.Start != negInf {
   676  		if !validFloat(r.Start) {
   677  			return nil, errors.New("invalid value for Start")
   678  		}
   679  		rng.Start = proto.String(strconv.FormatFloat(r.Start, 'e', -1, 64))
   680  	} else if r.End == posInf {
   681  		return nil, errors.New("either Start or End must be finite")
   682  	}
   683  	if r.End != posInf {
   684  		if !validFloat(r.End) {
   685  			return nil, errors.New("invalid value for End")
   686  		}
   687  		rng.End = proto.String(strconv.FormatFloat(r.End, 'e', -1, 64))
   688  	}
   689  	return rng, nil
   690  }
   691  
   692  func protoToRange(rng *pb.FacetRefinement_Range) Range {
   693  	r := Range{Start: negInf, End: posInf}
   694  	if x, err := strconv.ParseFloat(rng.GetStart(), 64); err != nil {
   695  		r.Start = x
   696  	}
   697  	if x, err := strconv.ParseFloat(rng.GetEnd(), 64); err != nil {
   698  		r.End = x
   699  	}
   700  	return r
   701  }
   702  
   703  // Iterator is the result of searching an index for a query or listing an
   704  // index.
   705  type Iterator struct {
   706  	c     context.Context
   707  	index *Index
   708  	err   error
   709  
   710  	listRes       []*pb.Document
   711  	listStartID   string
   712  	listInclusive bool
   713  
   714  	searchRes    []*pb.SearchResult
   715  	facetRes     []*pb.FacetResult
   716  	searchQuery  string
   717  	searchCursor *string
   718  	searchOffset int
   719  	sort         *SortOptions
   720  
   721  	fields      []string
   722  	exprs       []FieldExpression
   723  	refinements []Facet
   724  	facetOpts   []FacetSearchOption
   725  
   726  	more func(*Iterator) error
   727  
   728  	count   int
   729  	limit   int // items left to return; 0 for unlimited.
   730  	idsOnly bool
   731  }
   732  
   733  // errIter returns an iterator that only returns the given error.
   734  func errIter(err string) *Iterator {
   735  	return &Iterator{
   736  		err: errors.New(err),
   737  	}
   738  }
   739  
   740  // Done is returned when a query iteration has completed.
   741  var Done = errors.New("search: query has no more results")
   742  
   743  // Count returns an approximation of the number of documents matched by the
   744  // query. It is only valid to call for iterators returned by Search.
   745  func (t *Iterator) Count() int { return t.count }
   746  
   747  // fetchMore retrieves more results, if there are no errors or pending results.
   748  func (t *Iterator) fetchMore() {
   749  	if t.err == nil && len(t.listRes)+len(t.searchRes) == 0 && t.more != nil {
   750  		t.err = t.more(t)
   751  	}
   752  }
   753  
   754  // Next returns the ID of the next result. When there are no more results,
   755  // Done is returned as the error.
   756  //
   757  // dst must be a non-nil struct pointer, implement the FieldLoadSaver
   758  // interface, or be a nil interface value. If a non-nil dst is provided, it
   759  // will be filled with the indexed fields. dst is ignored if this iterator was
   760  // created with an IDsOnly option.
   761  func (t *Iterator) Next(dst interface{}) (string, error) {
   762  	t.fetchMore()
   763  	if t.err != nil {
   764  		return "", t.err
   765  	}
   766  
   767  	var doc *pb.Document
   768  	var exprs []*pb.Field
   769  	switch {
   770  	case len(t.listRes) != 0:
   771  		doc = t.listRes[0]
   772  		t.listRes = t.listRes[1:]
   773  	case len(t.searchRes) != 0:
   774  		doc = t.searchRes[0].Document
   775  		exprs = t.searchRes[0].Expression
   776  		t.searchCursor = t.searchRes[0].Cursor
   777  		t.searchRes = t.searchRes[1:]
   778  	default:
   779  		return "", Done
   780  	}
   781  	if doc == nil {
   782  		return "", errors.New("search: internal error: no document returned")
   783  	}
   784  	if !t.idsOnly && dst != nil {
   785  		if err := loadDoc(dst, doc, exprs); err != nil {
   786  			return "", err
   787  		}
   788  	}
   789  	return doc.GetId(), nil
   790  }
   791  
   792  // Cursor returns the cursor associated with the current document (that is,
   793  // the document most recently returned by a call to Next).
   794  //
   795  // Passing this cursor in a future call to Search will cause those results
   796  // to commence with the first document after the current document.
   797  func (t *Iterator) Cursor() Cursor {
   798  	if t.searchCursor == nil {
   799  		return ""
   800  	}
   801  	return Cursor(*t.searchCursor)
   802  }
   803  
   804  // Facets returns the facets found within the search results, if any facets
   805  // were requested in the SearchOptions.
   806  func (t *Iterator) Facets() ([][]FacetResult, error) {
   807  	t.fetchMore()
   808  	if t.err != nil && t.err != Done {
   809  		return nil, t.err
   810  	}
   811  
   812  	var facets [][]FacetResult
   813  	for _, f := range t.facetRes {
   814  		fres := make([]FacetResult, 0, len(f.Value))
   815  		for _, v := range f.Value {
   816  			ref := v.Refinement
   817  			facet := FacetResult{
   818  				Facet: Facet{Name: ref.GetName()},
   819  				Count: int(v.GetCount()),
   820  			}
   821  			if ref.Value != nil {
   822  				facet.Value = Atom(*ref.Value)
   823  			} else {
   824  				facet.Value = protoToRange(ref.Range)
   825  			}
   826  			fres = append(fres, facet)
   827  		}
   828  		facets = append(facets, fres)
   829  	}
   830  	return facets, nil
   831  }
   832  
   833  // saveDoc converts from a struct pointer or
   834  // FieldLoadSaver/FieldMetadataLoadSaver to the Document protobuf.
   835  func saveDoc(src interface{}) (*pb.Document, error) {
   836  	var err error
   837  	var fields []Field
   838  	var meta *DocumentMetadata
   839  	switch x := src.(type) {
   840  	case FieldLoadSaver:
   841  		fields, meta, err = x.Save()
   842  	default:
   843  		fields, err = SaveStruct(src)
   844  	}
   845  	if err != nil {
   846  		return nil, err
   847  	}
   848  
   849  	fieldsProto, err := fieldsToProto(fields)
   850  	if err != nil {
   851  		return nil, err
   852  	}
   853  	d := &pb.Document{
   854  		Field:   fieldsProto,
   855  		OrderId: proto.Int32(int32(time.Since(orderIDEpoch).Seconds())),
   856  	}
   857  	if meta != nil {
   858  		if meta.Rank != 0 {
   859  			if !validDocRank(meta.Rank) {
   860  				return nil, fmt.Errorf("search: invalid rank %d, must be [0, 2^31)", meta.Rank)
   861  			}
   862  			*d.OrderId = int32(meta.Rank)
   863  		}
   864  		if len(meta.Facets) > 0 {
   865  			facets, err := facetsToProto(meta.Facets)
   866  			if err != nil {
   867  				return nil, err
   868  			}
   869  			d.Facet = facets
   870  		}
   871  	}
   872  	return d, nil
   873  }
   874  
   875  func fieldsToProto(src []Field) ([]*pb.Field, error) {
   876  	// Maps to catch duplicate time or numeric fields.
   877  	timeFields, numericFields := make(map[string]bool), make(map[string]bool)
   878  	dst := make([]*pb.Field, 0, len(src))
   879  	for _, f := range src {
   880  		if !validFieldName(f.Name) {
   881  			return nil, fmt.Errorf("search: invalid field name %q", f.Name)
   882  		}
   883  		fieldValue := &pb.FieldValue{}
   884  		switch x := f.Value.(type) {
   885  		case string:
   886  			fieldValue.Type = pb.FieldValue_TEXT.Enum()
   887  			fieldValue.StringValue = proto.String(x)
   888  		case Atom:
   889  			fieldValue.Type = pb.FieldValue_ATOM.Enum()
   890  			fieldValue.StringValue = proto.String(string(x))
   891  		case HTML:
   892  			fieldValue.Type = pb.FieldValue_HTML.Enum()
   893  			fieldValue.StringValue = proto.String(string(x))
   894  		case time.Time:
   895  			if timeFields[f.Name] {
   896  				return nil, fmt.Errorf("search: duplicate time field %q", f.Name)
   897  			}
   898  			timeFields[f.Name] = true
   899  			fieldValue.Type = pb.FieldValue_DATE.Enum()
   900  			fieldValue.StringValue = proto.String(strconv.FormatInt(x.UnixNano()/1e6, 10))
   901  		case float64:
   902  			if numericFields[f.Name] {
   903  				return nil, fmt.Errorf("search: duplicate numeric field %q", f.Name)
   904  			}
   905  			if !validFloat(x) {
   906  				return nil, fmt.Errorf("search: numeric field %q with invalid value %f", f.Name, x)
   907  			}
   908  			numericFields[f.Name] = true
   909  			fieldValue.Type = pb.FieldValue_NUMBER.Enum()
   910  			fieldValue.StringValue = proto.String(strconv.FormatFloat(x, 'e', -1, 64))
   911  		case appengine.GeoPoint:
   912  			if !x.Valid() {
   913  				return nil, fmt.Errorf(
   914  					"search: GeoPoint field %q with invalid value %v",
   915  					f.Name, x)
   916  			}
   917  			fieldValue.Type = pb.FieldValue_GEO.Enum()
   918  			fieldValue.Geo = &pb.FieldValue_Geo{
   919  				Lat: proto.Float64(x.Lat),
   920  				Lng: proto.Float64(x.Lng),
   921  			}
   922  		default:
   923  			return nil, fmt.Errorf("search: unsupported field type: %v", reflect.TypeOf(f.Value))
   924  		}
   925  		if f.Language != "" {
   926  			switch f.Value.(type) {
   927  			case string, HTML:
   928  				if !validLanguage(f.Language) {
   929  					return nil, fmt.Errorf("search: invalid language for field %q: %q", f.Name, f.Language)
   930  				}
   931  				fieldValue.Language = proto.String(f.Language)
   932  			default:
   933  				return nil, fmt.Errorf("search: setting language not supported for field %q of type %T", f.Name, f.Value)
   934  			}
   935  		}
   936  		if p := fieldValue.StringValue; p != nil && !utf8.ValidString(*p) {
   937  			return nil, fmt.Errorf("search: %q field is invalid UTF-8: %q", f.Name, *p)
   938  		}
   939  		dst = append(dst, &pb.Field{
   940  			Name:  proto.String(f.Name),
   941  			Value: fieldValue,
   942  		})
   943  	}
   944  	return dst, nil
   945  }
   946  
   947  func facetsToProto(src []Facet) ([]*pb.Facet, error) {
   948  	dst := make([]*pb.Facet, 0, len(src))
   949  	for _, f := range src {
   950  		if !validFieldName(f.Name) {
   951  			return nil, fmt.Errorf("search: invalid facet name %q", f.Name)
   952  		}
   953  		facetValue := &pb.FacetValue{}
   954  		switch x := f.Value.(type) {
   955  		case Atom:
   956  			if !utf8.ValidString(string(x)) {
   957  				return nil, fmt.Errorf("search: %q facet is invalid UTF-8: %q", f.Name, x)
   958  			}
   959  			facetValue.Type = pb.FacetValue_ATOM.Enum()
   960  			facetValue.StringValue = proto.String(string(x))
   961  		case float64:
   962  			if !validFloat(x) {
   963  				return nil, fmt.Errorf("search: numeric facet %q with invalid value %f", f.Name, x)
   964  			}
   965  			facetValue.Type = pb.FacetValue_NUMBER.Enum()
   966  			facetValue.StringValue = proto.String(strconv.FormatFloat(x, 'e', -1, 64))
   967  		default:
   968  			return nil, fmt.Errorf("search: unsupported facet type: %v", reflect.TypeOf(f.Value))
   969  		}
   970  		dst = append(dst, &pb.Facet{
   971  			Name:  proto.String(f.Name),
   972  			Value: facetValue,
   973  		})
   974  	}
   975  	return dst, nil
   976  }
   977  
   978  // loadDoc converts from protobufs to a struct pointer or
   979  // FieldLoadSaver/FieldMetadataLoadSaver. The src param provides the document's
   980  // stored fields and facets, and any document metadata.  An additional slice of
   981  // fields, exprs, may optionally be provided to contain any derived expressions
   982  // requested by the developer.
   983  func loadDoc(dst interface{}, src *pb.Document, exprs []*pb.Field) (err error) {
   984  	fields, err := protoToFields(src.Field)
   985  	if err != nil {
   986  		return err
   987  	}
   988  	facets, err := protoToFacets(src.Facet)
   989  	if err != nil {
   990  		return err
   991  	}
   992  	if len(exprs) > 0 {
   993  		exprFields, err := protoToFields(exprs)
   994  		if err != nil {
   995  			return err
   996  		}
   997  		// Mark each field as derived.
   998  		for i := range exprFields {
   999  			exprFields[i].Derived = true
  1000  		}
  1001  		fields = append(fields, exprFields...)
  1002  	}
  1003  	meta := &DocumentMetadata{
  1004  		Rank:   int(src.GetOrderId()),
  1005  		Facets: facets,
  1006  	}
  1007  	switch x := dst.(type) {
  1008  	case FieldLoadSaver:
  1009  		return x.Load(fields, meta)
  1010  	default:
  1011  		return loadStructWithMeta(dst, fields, meta)
  1012  	}
  1013  }
  1014  
  1015  func protoToFields(fields []*pb.Field) ([]Field, error) {
  1016  	dst := make([]Field, 0, len(fields))
  1017  	for _, field := range fields {
  1018  		fieldValue := field.GetValue()
  1019  		f := Field{
  1020  			Name: field.GetName(),
  1021  		}
  1022  		switch fieldValue.GetType() {
  1023  		case pb.FieldValue_TEXT:
  1024  			f.Value = fieldValue.GetStringValue()
  1025  			f.Language = fieldValue.GetLanguage()
  1026  		case pb.FieldValue_ATOM:
  1027  			f.Value = Atom(fieldValue.GetStringValue())
  1028  		case pb.FieldValue_HTML:
  1029  			f.Value = HTML(fieldValue.GetStringValue())
  1030  			f.Language = fieldValue.GetLanguage()
  1031  		case pb.FieldValue_DATE:
  1032  			sv := fieldValue.GetStringValue()
  1033  			millis, err := strconv.ParseInt(sv, 10, 64)
  1034  			if err != nil {
  1035  				return nil, fmt.Errorf("search: internal error: bad time.Time encoding %q: %v", sv, err)
  1036  			}
  1037  			f.Value = time.Unix(0, millis*1e6)
  1038  		case pb.FieldValue_NUMBER:
  1039  			sv := fieldValue.GetStringValue()
  1040  			x, err := strconv.ParseFloat(sv, 64)
  1041  			if err != nil {
  1042  				return nil, err
  1043  			}
  1044  			f.Value = x
  1045  		case pb.FieldValue_GEO:
  1046  			geoValue := fieldValue.GetGeo()
  1047  			geoPoint := appengine.GeoPoint{geoValue.GetLat(), geoValue.GetLng()}
  1048  			if !geoPoint.Valid() {
  1049  				return nil, fmt.Errorf("search: internal error: invalid GeoPoint encoding: %v", geoPoint)
  1050  			}
  1051  			f.Value = geoPoint
  1052  		default:
  1053  			return nil, fmt.Errorf("search: internal error: unknown data type %s", fieldValue.GetType())
  1054  		}
  1055  		dst = append(dst, f)
  1056  	}
  1057  	return dst, nil
  1058  }
  1059  
  1060  func protoToFacets(facets []*pb.Facet) ([]Facet, error) {
  1061  	if len(facets) == 0 {
  1062  		return nil, nil
  1063  	}
  1064  	dst := make([]Facet, 0, len(facets))
  1065  	for _, facet := range facets {
  1066  		facetValue := facet.GetValue()
  1067  		f := Facet{
  1068  			Name: facet.GetName(),
  1069  		}
  1070  		switch facetValue.GetType() {
  1071  		case pb.FacetValue_ATOM:
  1072  			f.Value = Atom(facetValue.GetStringValue())
  1073  		case pb.FacetValue_NUMBER:
  1074  			sv := facetValue.GetStringValue()
  1075  			x, err := strconv.ParseFloat(sv, 64)
  1076  			if err != nil {
  1077  				return nil, err
  1078  			}
  1079  			f.Value = x
  1080  		default:
  1081  			return nil, fmt.Errorf("search: internal error: unknown data type %s", facetValue.GetType())
  1082  		}
  1083  		dst = append(dst, f)
  1084  	}
  1085  	return dst, nil
  1086  }
  1087  
  1088  func namespaceMod(m proto.Message, namespace string) {
  1089  	set := func(s **string) {
  1090  		if *s == nil {
  1091  			*s = &namespace
  1092  		}
  1093  	}
  1094  	switch m := m.(type) {
  1095  	case *pb.IndexDocumentRequest:
  1096  		set(&m.Params.IndexSpec.Namespace)
  1097  	case *pb.ListDocumentsRequest:
  1098  		set(&m.Params.IndexSpec.Namespace)
  1099  	case *pb.DeleteDocumentRequest:
  1100  		set(&m.Params.IndexSpec.Namespace)
  1101  	case *pb.SearchRequest:
  1102  		set(&m.Params.IndexSpec.Namespace)
  1103  	}
  1104  }
  1105  
  1106  func init() {
  1107  	internal.RegisterErrorCodeMap("search", pb.SearchServiceError_ErrorCode_name)
  1108  	internal.NamespaceMods["search"] = namespaceMod
  1109  }