code.gitea.io/gitea@v1.19.3/modules/indexer/issues/bleve.go (about)

     1  // Copyright 2018 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package issues
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"os"
    10  	"strconv"
    11  
    12  	gitea_bleve "code.gitea.io/gitea/modules/indexer/bleve"
    13  	"code.gitea.io/gitea/modules/log"
    14  	"code.gitea.io/gitea/modules/util"
    15  
    16  	"github.com/blevesearch/bleve/v2"
    17  	"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
    18  	"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
    19  	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
    20  	"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
    21  	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
    22  	"github.com/blevesearch/bleve/v2/index/upsidedown"
    23  	"github.com/blevesearch/bleve/v2/mapping"
    24  	"github.com/blevesearch/bleve/v2/search/query"
    25  	"github.com/ethantkoenig/rupture"
    26  )
    27  
    28  const (
    29  	issueIndexerAnalyzer      = "issueIndexer"
    30  	issueIndexerDocType       = "issueIndexerDocType"
    31  	issueIndexerLatestVersion = 2
    32  )
    33  
    34  // indexerID a bleve-compatible unique identifier for an integer id
    35  func indexerID(id int64) string {
    36  	return strconv.FormatInt(id, 36)
    37  }
    38  
    39  // idOfIndexerID the integer id associated with an indexer id
    40  func idOfIndexerID(indexerID string) (int64, error) {
    41  	id, err := strconv.ParseInt(indexerID, 36, 64)
    42  	if err != nil {
    43  		return 0, fmt.Errorf("Unexpected indexer ID %s: %w", indexerID, err)
    44  	}
    45  	return id, nil
    46  }
    47  
    48  // numericEqualityQuery a numeric equality query for the given value and field
    49  func numericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
    50  	f := float64(value)
    51  	tru := true
    52  	q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
    53  	q.SetField(field)
    54  	return q
    55  }
    56  
    57  func newMatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery {
    58  	q := bleve.NewMatchPhraseQuery(matchPhrase)
    59  	q.FieldVal = field
    60  	q.Analyzer = analyzer
    61  	return q
    62  }
    63  
    64  const unicodeNormalizeName = "unicodeNormalize"
    65  
    66  func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
    67  	return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]interface{}{
    68  		"type": unicodenorm.Name,
    69  		"form": unicodenorm.NFC,
    70  	})
    71  }
    72  
    73  const maxBatchSize = 16
    74  
    75  // openIndexer open the index at the specified path, checking for metadata
    76  // updates and bleve version updates.  If index needs to be created (or
    77  // re-created), returns (nil, nil)
    78  func openIndexer(path string, latestVersion int) (bleve.Index, error) {
    79  	_, err := os.Stat(path)
    80  	if err != nil && os.IsNotExist(err) {
    81  		return nil, nil
    82  	} else if err != nil {
    83  		return nil, err
    84  	}
    85  
    86  	metadata, err := rupture.ReadIndexMetadata(path)
    87  	if err != nil {
    88  		return nil, err
    89  	}
    90  	if metadata.Version < latestVersion {
    91  		// the indexer is using a previous version, so we should delete it and
    92  		// re-populate
    93  		return nil, util.RemoveAll(path)
    94  	}
    95  
    96  	index, err := bleve.Open(path)
    97  	if err != nil && err == upsidedown.IncompatibleVersion {
    98  		// the indexer was built with a previous version of bleve, so we should
    99  		// delete it and re-populate
   100  		return nil, util.RemoveAll(path)
   101  	} else if err != nil {
   102  		return nil, err
   103  	}
   104  
   105  	return index, nil
   106  }
   107  
   108  // BleveIndexerData an update to the issue indexer
   109  type BleveIndexerData IndexerData
   110  
   111  // Type returns the document type, for bleve's mapping.Classifier interface.
   112  func (i *BleveIndexerData) Type() string {
   113  	return issueIndexerDocType
   114  }
   115  
   116  // createIssueIndexer create an issue indexer if one does not already exist
   117  func createIssueIndexer(path string, latestVersion int) (bleve.Index, error) {
   118  	mapping := bleve.NewIndexMapping()
   119  	docMapping := bleve.NewDocumentMapping()
   120  
   121  	numericFieldMapping := bleve.NewNumericFieldMapping()
   122  	numericFieldMapping.IncludeInAll = false
   123  	docMapping.AddFieldMappingsAt("RepoID", numericFieldMapping)
   124  
   125  	textFieldMapping := bleve.NewTextFieldMapping()
   126  	textFieldMapping.Store = false
   127  	textFieldMapping.IncludeInAll = false
   128  	docMapping.AddFieldMappingsAt("Title", textFieldMapping)
   129  	docMapping.AddFieldMappingsAt("Content", textFieldMapping)
   130  	docMapping.AddFieldMappingsAt("Comments", textFieldMapping)
   131  
   132  	if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
   133  		return nil, err
   134  	} else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]interface{}{
   135  		"type":          custom.Name,
   136  		"char_filters":  []string{},
   137  		"tokenizer":     unicode.Name,
   138  		"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
   139  	}); err != nil {
   140  		return nil, err
   141  	}
   142  
   143  	mapping.DefaultAnalyzer = issueIndexerAnalyzer
   144  	mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
   145  	mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
   146  
   147  	index, err := bleve.New(path, mapping)
   148  	if err != nil {
   149  		return nil, err
   150  	}
   151  
   152  	if err = rupture.WriteIndexMetadata(path, &rupture.IndexMetadata{
   153  		Version: latestVersion,
   154  	}); err != nil {
   155  		return nil, err
   156  	}
   157  	return index, nil
   158  }
   159  
   160  var _ Indexer = &BleveIndexer{}
   161  
   162  // BleveIndexer implements Indexer interface
   163  type BleveIndexer struct {
   164  	indexDir string
   165  	indexer  bleve.Index
   166  }
   167  
   168  // NewBleveIndexer creates a new bleve local indexer
   169  func NewBleveIndexer(indexDir string) *BleveIndexer {
   170  	return &BleveIndexer{
   171  		indexDir: indexDir,
   172  	}
   173  }
   174  
   175  // Init will initialize the indexer
   176  func (b *BleveIndexer) Init() (bool, error) {
   177  	var err error
   178  	b.indexer, err = openIndexer(b.indexDir, issueIndexerLatestVersion)
   179  	if err != nil {
   180  		return false, err
   181  	}
   182  	if b.indexer != nil {
   183  		return true, nil
   184  	}
   185  
   186  	b.indexer, err = createIssueIndexer(b.indexDir, issueIndexerLatestVersion)
   187  	return false, err
   188  }
   189  
   190  // SetAvailabilityChangeCallback does nothing
   191  func (b *BleveIndexer) SetAvailabilityChangeCallback(callback func(bool)) {
   192  }
   193  
   194  // Ping does nothing
   195  func (b *BleveIndexer) Ping() bool {
   196  	return true
   197  }
   198  
   199  // Close will close the bleve indexer
   200  func (b *BleveIndexer) Close() {
   201  	if b.indexer != nil {
   202  		if err := b.indexer.Close(); err != nil {
   203  			log.Error("Error whilst closing indexer: %v", err)
   204  		}
   205  	}
   206  }
   207  
   208  // Index will save the index data
   209  func (b *BleveIndexer) Index(issues []*IndexerData) error {
   210  	batch := gitea_bleve.NewFlushingBatch(b.indexer, maxBatchSize)
   211  	for _, issue := range issues {
   212  		if err := batch.Index(indexerID(issue.ID), struct {
   213  			RepoID   int64
   214  			Title    string
   215  			Content  string
   216  			Comments []string
   217  		}{
   218  			RepoID:   issue.RepoID,
   219  			Title:    issue.Title,
   220  			Content:  issue.Content,
   221  			Comments: issue.Comments,
   222  		}); err != nil {
   223  			return err
   224  		}
   225  	}
   226  	return batch.Flush()
   227  }
   228  
   229  // Delete deletes indexes by ids
   230  func (b *BleveIndexer) Delete(ids ...int64) error {
   231  	batch := gitea_bleve.NewFlushingBatch(b.indexer, maxBatchSize)
   232  	for _, id := range ids {
   233  		if err := batch.Delete(indexerID(id)); err != nil {
   234  			return err
   235  		}
   236  	}
   237  	return batch.Flush()
   238  }
   239  
   240  // Search searches for issues by given conditions.
   241  // Returns the matching issue IDs
   242  func (b *BleveIndexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*SearchResult, error) {
   243  	var repoQueriesP []*query.NumericRangeQuery
   244  	for _, repoID := range repoIDs {
   245  		repoQueriesP = append(repoQueriesP, numericEqualityQuery(repoID, "RepoID"))
   246  	}
   247  	repoQueries := make([]query.Query, len(repoQueriesP))
   248  	for i, v := range repoQueriesP {
   249  		repoQueries[i] = query.Query(v)
   250  	}
   251  
   252  	indexerQuery := bleve.NewConjunctionQuery(
   253  		bleve.NewDisjunctionQuery(repoQueries...),
   254  		bleve.NewDisjunctionQuery(
   255  			newMatchPhraseQuery(keyword, "Title", issueIndexerAnalyzer),
   256  			newMatchPhraseQuery(keyword, "Content", issueIndexerAnalyzer),
   257  			newMatchPhraseQuery(keyword, "Comments", issueIndexerAnalyzer),
   258  		))
   259  	search := bleve.NewSearchRequestOptions(indexerQuery, limit, start, false)
   260  	search.SortBy([]string{"-_score"})
   261  
   262  	result, err := b.indexer.SearchInContext(ctx, search)
   263  	if err != nil {
   264  		return nil, err
   265  	}
   266  
   267  	ret := SearchResult{
   268  		Hits: make([]Match, 0, len(result.Hits)),
   269  	}
   270  	for _, hit := range result.Hits {
   271  		id, err := idOfIndexerID(hit.ID)
   272  		if err != nil {
   273  			return nil, err
   274  		}
   275  		ret.Hits = append(ret.Hits, Match{
   276  			ID: id,
   277  		})
   278  	}
   279  	return &ret, nil
   280  }