code.gitea.io/gitea@v1.19.3/modules/indexer/issues/bleve.go (about) 1 // Copyright 2018 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package issues 5 6 import ( 7 "context" 8 "fmt" 9 "os" 10 "strconv" 11 12 gitea_bleve "code.gitea.io/gitea/modules/indexer/bleve" 13 "code.gitea.io/gitea/modules/log" 14 "code.gitea.io/gitea/modules/util" 15 16 "github.com/blevesearch/bleve/v2" 17 "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" 18 "github.com/blevesearch/bleve/v2/analysis/token/camelcase" 19 "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 20 "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 21 "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 22 "github.com/blevesearch/bleve/v2/index/upsidedown" 23 "github.com/blevesearch/bleve/v2/mapping" 24 "github.com/blevesearch/bleve/v2/search/query" 25 "github.com/ethantkoenig/rupture" 26 ) 27 28 const ( 29 issueIndexerAnalyzer = "issueIndexer" 30 issueIndexerDocType = "issueIndexerDocType" 31 issueIndexerLatestVersion = 2 32 ) 33 34 // indexerID a bleve-compatible unique identifier for an integer id 35 func indexerID(id int64) string { 36 return strconv.FormatInt(id, 36) 37 } 38 39 // idOfIndexerID the integer id associated with an indexer id 40 func idOfIndexerID(indexerID string) (int64, error) { 41 id, err := strconv.ParseInt(indexerID, 36, 64) 42 if err != nil { 43 return 0, fmt.Errorf("Unexpected indexer ID %s: %w", indexerID, err) 44 } 45 return id, nil 46 } 47 48 // numericEqualityQuery a numeric equality query for the given value and field 49 func numericEqualityQuery(value int64, field string) *query.NumericRangeQuery { 50 f := float64(value) 51 tru := true 52 q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru) 53 q.SetField(field) 54 return q 55 } 56 57 func newMatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery { 58 q := bleve.NewMatchPhraseQuery(matchPhrase) 59 q.FieldVal = field 60 q.Analyzer = analyzer 61 return q 62 } 63 64 const unicodeNormalizeName = "unicodeNormalize" 65 66 func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { 67 return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]interface{}{ 68 "type": unicodenorm.Name, 69 "form": unicodenorm.NFC, 70 }) 71 } 72 73 const maxBatchSize = 16 74 75 // openIndexer open the index at the specified path, checking for metadata 76 // updates and bleve version updates. If index needs to be created (or 77 // re-created), returns (nil, nil) 78 func openIndexer(path string, latestVersion int) (bleve.Index, error) { 79 _, err := os.Stat(path) 80 if err != nil && os.IsNotExist(err) { 81 return nil, nil 82 } else if err != nil { 83 return nil, err 84 } 85 86 metadata, err := rupture.ReadIndexMetadata(path) 87 if err != nil { 88 return nil, err 89 } 90 if metadata.Version < latestVersion { 91 // the indexer is using a previous version, so we should delete it and 92 // re-populate 93 return nil, util.RemoveAll(path) 94 } 95 96 index, err := bleve.Open(path) 97 if err != nil && err == upsidedown.IncompatibleVersion { 98 // the indexer was built with a previous version of bleve, so we should 99 // delete it and re-populate 100 return nil, util.RemoveAll(path) 101 } else if err != nil { 102 return nil, err 103 } 104 105 return index, nil 106 } 107 108 // BleveIndexerData an update to the issue indexer 109 type BleveIndexerData IndexerData 110 111 // Type returns the document type, for bleve's mapping.Classifier interface. 112 func (i *BleveIndexerData) Type() string { 113 return issueIndexerDocType 114 } 115 116 // createIssueIndexer create an issue indexer if one does not already exist 117 func createIssueIndexer(path string, latestVersion int) (bleve.Index, error) { 118 mapping := bleve.NewIndexMapping() 119 docMapping := bleve.NewDocumentMapping() 120 121 numericFieldMapping := bleve.NewNumericFieldMapping() 122 numericFieldMapping.IncludeInAll = false 123 docMapping.AddFieldMappingsAt("RepoID", numericFieldMapping) 124 125 textFieldMapping := bleve.NewTextFieldMapping() 126 textFieldMapping.Store = false 127 textFieldMapping.IncludeInAll = false 128 docMapping.AddFieldMappingsAt("Title", textFieldMapping) 129 docMapping.AddFieldMappingsAt("Content", textFieldMapping) 130 docMapping.AddFieldMappingsAt("Comments", textFieldMapping) 131 132 if err := addUnicodeNormalizeTokenFilter(mapping); err != nil { 133 return nil, err 134 } else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]interface{}{ 135 "type": custom.Name, 136 "char_filters": []string{}, 137 "tokenizer": unicode.Name, 138 "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, 139 }); err != nil { 140 return nil, err 141 } 142 143 mapping.DefaultAnalyzer = issueIndexerAnalyzer 144 mapping.AddDocumentMapping(issueIndexerDocType, docMapping) 145 mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) 146 147 index, err := bleve.New(path, mapping) 148 if err != nil { 149 return nil, err 150 } 151 152 if err = rupture.WriteIndexMetadata(path, &rupture.IndexMetadata{ 153 Version: latestVersion, 154 }); err != nil { 155 return nil, err 156 } 157 return index, nil 158 } 159 160 var _ Indexer = &BleveIndexer{} 161 162 // BleveIndexer implements Indexer interface 163 type BleveIndexer struct { 164 indexDir string 165 indexer bleve.Index 166 } 167 168 // NewBleveIndexer creates a new bleve local indexer 169 func NewBleveIndexer(indexDir string) *BleveIndexer { 170 return &BleveIndexer{ 171 indexDir: indexDir, 172 } 173 } 174 175 // Init will initialize the indexer 176 func (b *BleveIndexer) Init() (bool, error) { 177 var err error 178 b.indexer, err = openIndexer(b.indexDir, issueIndexerLatestVersion) 179 if err != nil { 180 return false, err 181 } 182 if b.indexer != nil { 183 return true, nil 184 } 185 186 b.indexer, err = createIssueIndexer(b.indexDir, issueIndexerLatestVersion) 187 return false, err 188 } 189 190 // SetAvailabilityChangeCallback does nothing 191 func (b *BleveIndexer) SetAvailabilityChangeCallback(callback func(bool)) { 192 } 193 194 // Ping does nothing 195 func (b *BleveIndexer) Ping() bool { 196 return true 197 } 198 199 // Close will close the bleve indexer 200 func (b *BleveIndexer) Close() { 201 if b.indexer != nil { 202 if err := b.indexer.Close(); err != nil { 203 log.Error("Error whilst closing indexer: %v", err) 204 } 205 } 206 } 207 208 // Index will save the index data 209 func (b *BleveIndexer) Index(issues []*IndexerData) error { 210 batch := gitea_bleve.NewFlushingBatch(b.indexer, maxBatchSize) 211 for _, issue := range issues { 212 if err := batch.Index(indexerID(issue.ID), struct { 213 RepoID int64 214 Title string 215 Content string 216 Comments []string 217 }{ 218 RepoID: issue.RepoID, 219 Title: issue.Title, 220 Content: issue.Content, 221 Comments: issue.Comments, 222 }); err != nil { 223 return err 224 } 225 } 226 return batch.Flush() 227 } 228 229 // Delete deletes indexes by ids 230 func (b *BleveIndexer) Delete(ids ...int64) error { 231 batch := gitea_bleve.NewFlushingBatch(b.indexer, maxBatchSize) 232 for _, id := range ids { 233 if err := batch.Delete(indexerID(id)); err != nil { 234 return err 235 } 236 } 237 return batch.Flush() 238 } 239 240 // Search searches for issues by given conditions. 241 // Returns the matching issue IDs 242 func (b *BleveIndexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*SearchResult, error) { 243 var repoQueriesP []*query.NumericRangeQuery 244 for _, repoID := range repoIDs { 245 repoQueriesP = append(repoQueriesP, numericEqualityQuery(repoID, "RepoID")) 246 } 247 repoQueries := make([]query.Query, len(repoQueriesP)) 248 for i, v := range repoQueriesP { 249 repoQueries[i] = query.Query(v) 250 } 251 252 indexerQuery := bleve.NewConjunctionQuery( 253 bleve.NewDisjunctionQuery(repoQueries...), 254 bleve.NewDisjunctionQuery( 255 newMatchPhraseQuery(keyword, "Title", issueIndexerAnalyzer), 256 newMatchPhraseQuery(keyword, "Content", issueIndexerAnalyzer), 257 newMatchPhraseQuery(keyword, "Comments", issueIndexerAnalyzer), 258 )) 259 search := bleve.NewSearchRequestOptions(indexerQuery, limit, start, false) 260 search.SortBy([]string{"-_score"}) 261 262 result, err := b.indexer.SearchInContext(ctx, search) 263 if err != nil { 264 return nil, err 265 } 266 267 ret := SearchResult{ 268 Hits: make([]Match, 0, len(result.Hits)), 269 } 270 for _, hit := range result.Hits { 271 id, err := idOfIndexerID(hit.ID) 272 if err != nil { 273 return nil, err 274 } 275 ret.Hits = append(ret.Hits, Match{ 276 ID: id, 277 }) 278 } 279 return &ret, nil 280 }