code.gitea.io/gitea@v1.22.3/modules/indexer/issues/elasticsearch/elasticsearch.go (about)

     1  // Copyright 2019 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package elasticsearch
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"strconv"
    10  	"strings"
    11  
    12  	"code.gitea.io/gitea/modules/graceful"
    13  	indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
    14  	inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
    15  	"code.gitea.io/gitea/modules/indexer/issues/internal"
    16  
    17  	"github.com/olivere/elastic/v7"
    18  )
    19  
    20  const (
    21  	issueIndexerLatestVersion = 1
    22  	// multi-match-types, currently only 2 types are used
    23  	// Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types
    24  	esMultiMatchTypeBestFields   = "best_fields"
    25  	esMultiMatchTypePhrasePrefix = "phrase_prefix"
    26  )
    27  
    28  var _ internal.Indexer = &Indexer{}
    29  
    30  // Indexer implements Indexer interface
    31  type Indexer struct {
    32  	inner                    *inner_elasticsearch.Indexer
    33  	indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much
    34  }
    35  
    36  // NewIndexer creates a new elasticsearch indexer
    37  func NewIndexer(url, indexerName string) *Indexer {
    38  	inner := inner_elasticsearch.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping)
    39  	indexer := &Indexer{
    40  		inner:   inner,
    41  		Indexer: inner,
    42  	}
    43  	return indexer
    44  }
    45  
    46  const (
    47  	defaultMapping = `
    48  {
    49  	"mappings": {
    50  		"properties": {
    51  			"id": { "type": "integer", "index": true },
    52  			"repo_id": { "type": "integer", "index": true },
    53  			"is_public": { "type": "boolean", "index": true },
    54  
    55  			"title": {  "type": "text", "index": true },
    56  			"content": { "type": "text", "index": true },
    57  			"comments": { "type" : "text", "index": true },
    58  
    59  			"is_pull": { "type": "boolean", "index": true },
    60  			"is_closed": { "type": "boolean", "index": true },
    61  			"label_ids": { "type": "integer", "index": true },
    62  			"no_label": { "type": "boolean", "index": true },
    63  			"milestone_id": { "type": "integer", "index": true },
    64  			"project_id": { "type": "integer", "index": true },
    65  			"project_board_id": { "type": "integer", "index": true },
    66  			"poster_id": { "type": "integer", "index": true },
    67  			"assignee_id": { "type": "integer", "index": true },
    68  			"mention_ids": { "type": "integer", "index": true },
    69  			"reviewed_ids": { "type": "integer", "index": true },
    70  			"review_requested_ids": { "type": "integer", "index": true },
    71  			"subscriber_ids": { "type": "integer", "index": true },
    72  			"updated_unix": { "type": "integer", "index": true },
    73  
    74  			"created_unix": { "type": "integer", "index": true },
    75  			"deadline_unix": { "type": "integer", "index": true },
    76  			"comment_count": { "type": "integer", "index": true }
    77  		}
    78  	}
    79  }
    80  `
    81  )
    82  
    83  // Index will save the index data
    84  func (b *Indexer) Index(ctx context.Context, issues ...*internal.IndexerData) error {
    85  	if len(issues) == 0 {
    86  		return nil
    87  	} else if len(issues) == 1 {
    88  		issue := issues[0]
    89  		_, err := b.inner.Client.Index().
    90  			Index(b.inner.VersionedIndexName()).
    91  			Id(fmt.Sprintf("%d", issue.ID)).
    92  			BodyJson(issue).
    93  			Do(ctx)
    94  		return err
    95  	}
    96  
    97  	reqs := make([]elastic.BulkableRequest, 0)
    98  	for _, issue := range issues {
    99  		reqs = append(reqs,
   100  			elastic.NewBulkIndexRequest().
   101  				Index(b.inner.VersionedIndexName()).
   102  				Id(fmt.Sprintf("%d", issue.ID)).
   103  				Doc(issue),
   104  		)
   105  	}
   106  
   107  	_, err := b.inner.Client.Bulk().
   108  		Index(b.inner.VersionedIndexName()).
   109  		Add(reqs...).
   110  		Do(graceful.GetManager().HammerContext())
   111  	return err
   112  }
   113  
   114  // Delete deletes indexes by ids
   115  func (b *Indexer) Delete(ctx context.Context, ids ...int64) error {
   116  	if len(ids) == 0 {
   117  		return nil
   118  	} else if len(ids) == 1 {
   119  		_, err := b.inner.Client.Delete().
   120  			Index(b.inner.VersionedIndexName()).
   121  			Id(fmt.Sprintf("%d", ids[0])).
   122  			Do(ctx)
   123  		return err
   124  	}
   125  
   126  	reqs := make([]elastic.BulkableRequest, 0)
   127  	for _, id := range ids {
   128  		reqs = append(reqs,
   129  			elastic.NewBulkDeleteRequest().
   130  				Index(b.inner.VersionedIndexName()).
   131  				Id(fmt.Sprintf("%d", id)),
   132  		)
   133  	}
   134  
   135  	_, err := b.inner.Client.Bulk().
   136  		Index(b.inner.VersionedIndexName()).
   137  		Add(reqs...).
   138  		Do(graceful.GetManager().HammerContext())
   139  	return err
   140  }
   141  
   142  // Search searches for issues by given conditions.
   143  // Returns the matching issue IDs
   144  func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
   145  	query := elastic.NewBoolQuery()
   146  
   147  	if options.Keyword != "" {
   148  		searchType := esMultiMatchTypePhrasePrefix
   149  		if options.IsFuzzyKeyword {
   150  			searchType = esMultiMatchTypeBestFields
   151  		}
   152  
   153  		query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(searchType))
   154  	}
   155  
   156  	if len(options.RepoIDs) > 0 {
   157  		q := elastic.NewBoolQuery()
   158  		q.Should(elastic.NewTermsQuery("repo_id", toAnySlice(options.RepoIDs)...))
   159  		if options.AllPublic {
   160  			q.Should(elastic.NewTermQuery("is_public", true))
   161  		}
   162  		query.Must(q)
   163  	}
   164  
   165  	if options.IsPull.Has() {
   166  		query.Must(elastic.NewTermQuery("is_pull", options.IsPull.Value()))
   167  	}
   168  	if options.IsClosed.Has() {
   169  		query.Must(elastic.NewTermQuery("is_closed", options.IsClosed.Value()))
   170  	}
   171  
   172  	if options.NoLabelOnly {
   173  		query.Must(elastic.NewTermQuery("no_label", true))
   174  	} else {
   175  		if len(options.IncludedLabelIDs) > 0 {
   176  			q := elastic.NewBoolQuery()
   177  			for _, labelID := range options.IncludedLabelIDs {
   178  				q.Must(elastic.NewTermQuery("label_ids", labelID))
   179  			}
   180  			query.Must(q)
   181  		} else if len(options.IncludedAnyLabelIDs) > 0 {
   182  			query.Must(elastic.NewTermsQuery("label_ids", toAnySlice(options.IncludedAnyLabelIDs)...))
   183  		}
   184  		if len(options.ExcludedLabelIDs) > 0 {
   185  			q := elastic.NewBoolQuery()
   186  			for _, labelID := range options.ExcludedLabelIDs {
   187  				q.MustNot(elastic.NewTermQuery("label_ids", labelID))
   188  			}
   189  			query.Must(q)
   190  		}
   191  	}
   192  
   193  	if len(options.MilestoneIDs) > 0 {
   194  		query.Must(elastic.NewTermsQuery("milestone_id", toAnySlice(options.MilestoneIDs)...))
   195  	}
   196  
   197  	if options.ProjectID.Has() {
   198  		query.Must(elastic.NewTermQuery("project_id", options.ProjectID.Value()))
   199  	}
   200  	if options.ProjectBoardID.Has() {
   201  		query.Must(elastic.NewTermQuery("project_board_id", options.ProjectBoardID.Value()))
   202  	}
   203  
   204  	if options.PosterID.Has() {
   205  		query.Must(elastic.NewTermQuery("poster_id", options.PosterID.Value()))
   206  	}
   207  
   208  	if options.AssigneeID.Has() {
   209  		query.Must(elastic.NewTermQuery("assignee_id", options.AssigneeID.Value()))
   210  	}
   211  
   212  	if options.MentionID.Has() {
   213  		query.Must(elastic.NewTermQuery("mention_ids", options.MentionID.Value()))
   214  	}
   215  
   216  	if options.ReviewedID.Has() {
   217  		query.Must(elastic.NewTermQuery("reviewed_ids", options.ReviewedID.Value()))
   218  	}
   219  	if options.ReviewRequestedID.Has() {
   220  		query.Must(elastic.NewTermQuery("review_requested_ids", options.ReviewRequestedID.Value()))
   221  	}
   222  
   223  	if options.SubscriberID.Has() {
   224  		query.Must(elastic.NewTermQuery("subscriber_ids", options.SubscriberID.Value()))
   225  	}
   226  
   227  	if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() {
   228  		q := elastic.NewRangeQuery("updated_unix")
   229  		if options.UpdatedAfterUnix.Has() {
   230  			q.Gte(options.UpdatedAfterUnix.Value())
   231  		}
   232  		if options.UpdatedBeforeUnix.Has() {
   233  			q.Lte(options.UpdatedBeforeUnix.Value())
   234  		}
   235  		query.Must(q)
   236  	}
   237  
   238  	if options.SortBy == "" {
   239  		options.SortBy = internal.SortByCreatedAsc
   240  	}
   241  	sortBy := []elastic.Sorter{
   242  		parseSortBy(options.SortBy),
   243  		elastic.NewFieldSort("id").Desc(),
   244  	}
   245  
   246  	// See https://stackoverflow.com/questions/35206409/elasticsearch-2-1-result-window-is-too-large-index-max-result-window/35221900
   247  	// TODO: make it configurable since it's configurable in elasticsearch
   248  	const maxPageSize = 10000
   249  
   250  	skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxPageSize)
   251  	searchResult, err := b.inner.Client.Search().
   252  		Index(b.inner.VersionedIndexName()).
   253  		Query(query).
   254  		SortBy(sortBy...).
   255  		From(skip).Size(limit).
   256  		Do(ctx)
   257  	if err != nil {
   258  		return nil, err
   259  	}
   260  
   261  	hits := make([]internal.Match, 0, limit)
   262  	for _, hit := range searchResult.Hits.Hits {
   263  		id, _ := strconv.ParseInt(hit.Id, 10, 64)
   264  		hits = append(hits, internal.Match{
   265  			ID: id,
   266  		})
   267  	}
   268  
   269  	return &internal.SearchResult{
   270  		Total: searchResult.TotalHits(),
   271  		Hits:  hits,
   272  	}, nil
   273  }
   274  
   275  func toAnySlice[T any](s []T) []any {
   276  	ret := make([]any, 0, len(s))
   277  	for _, item := range s {
   278  		ret = append(ret, item)
   279  	}
   280  	return ret
   281  }
   282  
   283  func parseSortBy(sortBy internal.SortBy) elastic.Sorter {
   284  	field := strings.TrimPrefix(string(sortBy), "-")
   285  	ret := elastic.NewFieldSort(field)
   286  	if strings.HasPrefix(string(sortBy), "-") {
   287  		ret.Desc()
   288  	}
   289  	return ret
   290  }