code.gitea.io/gitea@v1.22.3/modules/indexer/issues/meilisearch/meilisearch.go (about)

     1  // Copyright 2023 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package meilisearch
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"strconv"
    11  	"strings"
    12  
    13  	indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
    14  	inner_meilisearch "code.gitea.io/gitea/modules/indexer/internal/meilisearch"
    15  	"code.gitea.io/gitea/modules/indexer/issues/internal"
    16  
    17  	"github.com/meilisearch/meilisearch-go"
    18  )
    19  
    20  const (
    21  	issueIndexerLatestVersion = 3
    22  
    23  	// TODO: make this configurable if necessary
    24  	maxTotalHits = 10000
    25  )
    26  
    27  // ErrMalformedResponse is never expected as we initialize the indexer ourself and so define the types.
    28  var ErrMalformedResponse = errors.New("meilisearch returned unexpected malformed content")
    29  
    30  var _ internal.Indexer = &Indexer{}
    31  
    32  // Indexer implements Indexer interface
    33  type Indexer struct {
    34  	inner                    *inner_meilisearch.Indexer
    35  	indexer_internal.Indexer // do not composite inner_meilisearch.Indexer directly to avoid exposing too much
    36  }
    37  
    38  // NewIndexer creates a new meilisearch indexer
    39  func NewIndexer(url, apiKey, indexerName string) *Indexer {
    40  	settings := &meilisearch.Settings{
    41  		// The default ranking rules of meilisearch are: ["words", "typo", "proximity", "attribute", "sort", "exactness"]
    42  		// So even if we specify the sort order, it could not be respected because the priority of "sort" is so low.
    43  		// So we need to specify the ranking rules to make sure the sort order is respected.
    44  		// See https://www.meilisearch.com/docs/learn/core_concepts/relevancy
    45  		RankingRules: []string{"sort", // make sure "sort" has the highest priority
    46  			"words", "typo", "proximity", "attribute", "exactness"},
    47  
    48  		SearchableAttributes: []string{
    49  			"title",
    50  			"content",
    51  			"comments",
    52  		},
    53  		DisplayedAttributes: []string{
    54  			"id",
    55  			"title",
    56  			"content",
    57  			"comments",
    58  		},
    59  		FilterableAttributes: []string{
    60  			"repo_id",
    61  			"is_public",
    62  			"is_pull",
    63  			"is_closed",
    64  			"label_ids",
    65  			"no_label",
    66  			"milestone_id",
    67  			"project_id",
    68  			"project_board_id",
    69  			"poster_id",
    70  			"assignee_id",
    71  			"mention_ids",
    72  			"reviewed_ids",
    73  			"review_requested_ids",
    74  			"subscriber_ids",
    75  			"updated_unix",
    76  		},
    77  		SortableAttributes: []string{
    78  			"updated_unix",
    79  			"created_unix",
    80  			"deadline_unix",
    81  			"comment_count",
    82  			"id",
    83  		},
    84  		Pagination: &meilisearch.Pagination{
    85  			MaxTotalHits: maxTotalHits,
    86  		},
    87  	}
    88  
    89  	inner := inner_meilisearch.NewIndexer(url, apiKey, indexerName, issueIndexerLatestVersion, settings)
    90  	indexer := &Indexer{
    91  		inner:   inner,
    92  		Indexer: inner,
    93  	}
    94  	return indexer
    95  }
    96  
    97  // Index will save the index data
    98  func (b *Indexer) Index(_ context.Context, issues ...*internal.IndexerData) error {
    99  	if len(issues) == 0 {
   100  		return nil
   101  	}
   102  	for _, issue := range issues {
   103  		_, err := b.inner.Client.Index(b.inner.VersionedIndexName()).AddDocuments(issue)
   104  		if err != nil {
   105  			return err
   106  		}
   107  	}
   108  	// TODO: bulk send index data
   109  	return nil
   110  }
   111  
   112  // Delete deletes indexes by ids
   113  func (b *Indexer) Delete(_ context.Context, ids ...int64) error {
   114  	if len(ids) == 0 {
   115  		return nil
   116  	}
   117  
   118  	for _, id := range ids {
   119  		_, err := b.inner.Client.Index(b.inner.VersionedIndexName()).DeleteDocument(strconv.FormatInt(id, 10))
   120  		if err != nil {
   121  			return err
   122  		}
   123  	}
   124  	// TODO: bulk send deletes
   125  	return nil
   126  }
   127  
   128  // Search searches for issues by given conditions.
   129  // Returns the matching issue IDs
   130  func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
   131  	query := inner_meilisearch.FilterAnd{}
   132  
   133  	if len(options.RepoIDs) > 0 {
   134  		q := &inner_meilisearch.FilterOr{}
   135  		q.Or(inner_meilisearch.NewFilterIn("repo_id", options.RepoIDs...))
   136  		if options.AllPublic {
   137  			q.Or(inner_meilisearch.NewFilterEq("is_public", true))
   138  		}
   139  		query.And(q)
   140  	}
   141  
   142  	if options.IsPull.Has() {
   143  		query.And(inner_meilisearch.NewFilterEq("is_pull", options.IsPull.Value()))
   144  	}
   145  	if options.IsClosed.Has() {
   146  		query.And(inner_meilisearch.NewFilterEq("is_closed", options.IsClosed.Value()))
   147  	}
   148  
   149  	if options.NoLabelOnly {
   150  		query.And(inner_meilisearch.NewFilterEq("no_label", true))
   151  	} else {
   152  		if len(options.IncludedLabelIDs) > 0 {
   153  			q := &inner_meilisearch.FilterAnd{}
   154  			for _, labelID := range options.IncludedLabelIDs {
   155  				q.And(inner_meilisearch.NewFilterEq("label_ids", labelID))
   156  			}
   157  			query.And(q)
   158  		} else if len(options.IncludedAnyLabelIDs) > 0 {
   159  			query.And(inner_meilisearch.NewFilterIn("label_ids", options.IncludedAnyLabelIDs...))
   160  		}
   161  		if len(options.ExcludedLabelIDs) > 0 {
   162  			q := &inner_meilisearch.FilterAnd{}
   163  			for _, labelID := range options.ExcludedLabelIDs {
   164  				q.And(inner_meilisearch.NewFilterNot(inner_meilisearch.NewFilterEq("label_ids", labelID)))
   165  			}
   166  			query.And(q)
   167  		}
   168  	}
   169  
   170  	if len(options.MilestoneIDs) > 0 {
   171  		query.And(inner_meilisearch.NewFilterIn("milestone_id", options.MilestoneIDs...))
   172  	}
   173  
   174  	if options.ProjectID.Has() {
   175  		query.And(inner_meilisearch.NewFilterEq("project_id", options.ProjectID.Value()))
   176  	}
   177  	if options.ProjectBoardID.Has() {
   178  		query.And(inner_meilisearch.NewFilterEq("project_board_id", options.ProjectBoardID.Value()))
   179  	}
   180  
   181  	if options.PosterID.Has() {
   182  		query.And(inner_meilisearch.NewFilterEq("poster_id", options.PosterID.Value()))
   183  	}
   184  
   185  	if options.AssigneeID.Has() {
   186  		query.And(inner_meilisearch.NewFilterEq("assignee_id", options.AssigneeID.Value()))
   187  	}
   188  
   189  	if options.MentionID.Has() {
   190  		query.And(inner_meilisearch.NewFilterEq("mention_ids", options.MentionID.Value()))
   191  	}
   192  
   193  	if options.ReviewedID.Has() {
   194  		query.And(inner_meilisearch.NewFilterEq("reviewed_ids", options.ReviewedID.Value()))
   195  	}
   196  	if options.ReviewRequestedID.Has() {
   197  		query.And(inner_meilisearch.NewFilterEq("review_requested_ids", options.ReviewRequestedID.Value()))
   198  	}
   199  
   200  	if options.SubscriberID.Has() {
   201  		query.And(inner_meilisearch.NewFilterEq("subscriber_ids", options.SubscriberID.Value()))
   202  	}
   203  
   204  	if options.UpdatedAfterUnix.Has() {
   205  		query.And(inner_meilisearch.NewFilterGte("updated_unix", options.UpdatedAfterUnix.Value()))
   206  	}
   207  	if options.UpdatedBeforeUnix.Has() {
   208  		query.And(inner_meilisearch.NewFilterLte("updated_unix", options.UpdatedBeforeUnix.Value()))
   209  	}
   210  
   211  	if options.SortBy == "" {
   212  		options.SortBy = internal.SortByCreatedAsc
   213  	}
   214  	sortBy := []string{
   215  		parseSortBy(options.SortBy),
   216  		"id:desc",
   217  	}
   218  
   219  	skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxTotalHits)
   220  
   221  	counting := limit == 0
   222  	if counting {
   223  		// If set limit to 0, it will be 20 by default, and -1 is not allowed.
   224  		// See https://www.meilisearch.com/docs/reference/api/search#limit
   225  		// So set limit to 1 to make the cost as low as possible, then clear the result before returning.
   226  		limit = 1
   227  	}
   228  
   229  	keyword := options.Keyword
   230  	if !options.IsFuzzyKeyword {
   231  		// to make it non fuzzy ("typo tolerance" in meilisearch terms), we have to quote the keyword(s)
   232  		// https://www.meilisearch.com/docs/reference/api/search#phrase-search
   233  		keyword = doubleQuoteKeyword(keyword)
   234  	}
   235  
   236  	searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(keyword, &meilisearch.SearchRequest{
   237  		Filter:           query.Statement(),
   238  		Limit:            int64(limit),
   239  		Offset:           int64(skip),
   240  		Sort:             sortBy,
   241  		MatchingStrategy: "all",
   242  	})
   243  	if err != nil {
   244  		return nil, err
   245  	}
   246  
   247  	if counting {
   248  		searchRes.Hits = nil
   249  	}
   250  
   251  	hits, err := convertHits(searchRes)
   252  	if err != nil {
   253  		return nil, err
   254  	}
   255  
   256  	return &internal.SearchResult{
   257  		Total: searchRes.EstimatedTotalHits,
   258  		Hits:  hits,
   259  	}, nil
   260  }
   261  
   262  func parseSortBy(sortBy internal.SortBy) string {
   263  	field := strings.TrimPrefix(string(sortBy), "-")
   264  	if strings.HasPrefix(string(sortBy), "-") {
   265  		return field + ":desc"
   266  	}
   267  	return field + ":asc"
   268  }
   269  
   270  func doubleQuoteKeyword(k string) string {
   271  	kp := strings.Split(k, " ")
   272  	parts := 0
   273  	for i := range kp {
   274  		part := strings.Trim(kp[i], "\"")
   275  		if part != "" {
   276  			kp[parts] = fmt.Sprintf(`"%s"`, part)
   277  			parts++
   278  		}
   279  	}
   280  	return strings.Join(kp[:parts], " ")
   281  }
   282  
   283  func convertHits(searchRes *meilisearch.SearchResponse) ([]internal.Match, error) {
   284  	hits := make([]internal.Match, 0, len(searchRes.Hits))
   285  	for _, hit := range searchRes.Hits {
   286  		hit, ok := hit.(map[string]any)
   287  		if !ok {
   288  			return nil, ErrMalformedResponse
   289  		}
   290  
   291  		issueID, ok := hit["id"].(float64)
   292  		if !ok {
   293  			return nil, ErrMalformedResponse
   294  		}
   295  
   296  		hits = append(hits, internal.Match{
   297  			ID: int64(issueID),
   298  		})
   299  	}
   300  	return hits, nil
   301  }