code.gitea.io/gitea@v1.22.3/modules/indexer/issues/bleve/bleve.go (about) 1 // Copyright 2018 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package bleve 5 6 import ( 7 "context" 8 9 indexer_internal "code.gitea.io/gitea/modules/indexer/internal" 10 inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" 11 "code.gitea.io/gitea/modules/indexer/issues/internal" 12 13 "github.com/blevesearch/bleve/v2" 14 "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" 15 "github.com/blevesearch/bleve/v2/analysis/token/camelcase" 16 "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 17 "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 18 "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 19 "github.com/blevesearch/bleve/v2/mapping" 20 "github.com/blevesearch/bleve/v2/search/query" 21 ) 22 23 const ( 24 issueIndexerAnalyzer = "issueIndexer" 25 issueIndexerDocType = "issueIndexerDocType" 26 issueIndexerLatestVersion = 4 27 ) 28 29 const unicodeNormalizeName = "unicodeNormalize" 30 31 func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { 32 return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ 33 "type": unicodenorm.Name, 34 "form": unicodenorm.NFC, 35 }) 36 } 37 38 const maxBatchSize = 16 39 40 // IndexerData an update to the issue indexer 41 type IndexerData internal.IndexerData 42 43 // Type returns the document type, for bleve's mapping.Classifier interface. 44 func (i *IndexerData) Type() string { 45 return issueIndexerDocType 46 } 47 48 // generateIssueIndexMapping generates the bleve index mapping for issues 49 func generateIssueIndexMapping() (mapping.IndexMapping, error) { 50 mapping := bleve.NewIndexMapping() 51 docMapping := bleve.NewDocumentMapping() 52 53 numericFieldMapping := bleve.NewNumericFieldMapping() 54 numericFieldMapping.Store = false 55 numericFieldMapping.IncludeInAll = false 56 docMapping.AddFieldMappingsAt("repo_id", numericFieldMapping) 57 58 textFieldMapping := bleve.NewTextFieldMapping() 59 textFieldMapping.Store = false 60 textFieldMapping.IncludeInAll = false 61 62 boolFieldMapping := bleve.NewBooleanFieldMapping() 63 boolFieldMapping.Store = false 64 boolFieldMapping.IncludeInAll = false 65 66 numberFieldMapping := bleve.NewNumericFieldMapping() 67 numberFieldMapping.Store = false 68 numberFieldMapping.IncludeInAll = false 69 70 docMapping.AddFieldMappingsAt("is_public", boolFieldMapping) 71 72 docMapping.AddFieldMappingsAt("title", textFieldMapping) 73 docMapping.AddFieldMappingsAt("content", textFieldMapping) 74 docMapping.AddFieldMappingsAt("comments", textFieldMapping) 75 76 docMapping.AddFieldMappingsAt("is_pull", boolFieldMapping) 77 docMapping.AddFieldMappingsAt("is_closed", boolFieldMapping) 78 docMapping.AddFieldMappingsAt("label_ids", numberFieldMapping) 79 docMapping.AddFieldMappingsAt("no_label", boolFieldMapping) 80 docMapping.AddFieldMappingsAt("milestone_id", numberFieldMapping) 81 docMapping.AddFieldMappingsAt("project_id", numberFieldMapping) 82 docMapping.AddFieldMappingsAt("project_board_id", numberFieldMapping) 83 docMapping.AddFieldMappingsAt("poster_id", numberFieldMapping) 84 docMapping.AddFieldMappingsAt("assignee_id", numberFieldMapping) 85 docMapping.AddFieldMappingsAt("mention_ids", numberFieldMapping) 86 docMapping.AddFieldMappingsAt("reviewed_ids", numberFieldMapping) 87 docMapping.AddFieldMappingsAt("review_requested_ids", numberFieldMapping) 88 docMapping.AddFieldMappingsAt("subscriber_ids", numberFieldMapping) 89 docMapping.AddFieldMappingsAt("updated_unix", numberFieldMapping) 90 91 docMapping.AddFieldMappingsAt("created_unix", numberFieldMapping) 92 docMapping.AddFieldMappingsAt("deadline_unix", numberFieldMapping) 93 docMapping.AddFieldMappingsAt("comment_count", numberFieldMapping) 94 95 if err := addUnicodeNormalizeTokenFilter(mapping); err != nil { 96 return nil, err 97 } else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{ 98 "type": custom.Name, 99 "char_filters": []string{}, 100 "tokenizer": unicode.Name, 101 "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, 102 }); err != nil { 103 return nil, err 104 } 105 106 mapping.DefaultAnalyzer = issueIndexerAnalyzer 107 mapping.AddDocumentMapping(issueIndexerDocType, docMapping) 108 mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) 109 mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() // disable default mapping, avoid indexing unexpected structs 110 111 return mapping, nil 112 } 113 114 var _ internal.Indexer = &Indexer{} 115 116 // Indexer implements Indexer interface 117 type Indexer struct { 118 inner *inner_bleve.Indexer 119 indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much 120 } 121 122 // NewIndexer creates a new bleve local indexer 123 func NewIndexer(indexDir string) *Indexer { 124 inner := inner_bleve.NewIndexer(indexDir, issueIndexerLatestVersion, generateIssueIndexMapping) 125 return &Indexer{ 126 Indexer: inner, 127 inner: inner, 128 } 129 } 130 131 // Index will save the index data 132 func (b *Indexer) Index(_ context.Context, issues ...*internal.IndexerData) error { 133 batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) 134 for _, issue := range issues { 135 if err := batch.Index(indexer_internal.Base36(issue.ID), (*IndexerData)(issue)); err != nil { 136 return err 137 } 138 } 139 return batch.Flush() 140 } 141 142 // Delete deletes indexes by ids 143 func (b *Indexer) Delete(_ context.Context, ids ...int64) error { 144 batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) 145 for _, id := range ids { 146 if err := batch.Delete(indexer_internal.Base36(id)); err != nil { 147 return err 148 } 149 } 150 return batch.Flush() 151 } 152 153 // Search searches for issues by given conditions. 154 // Returns the matching issue IDs 155 func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) { 156 var queries []query.Query 157 158 if options.Keyword != "" { 159 fuzziness := 0 160 if options.IsFuzzyKeyword { 161 fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword) 162 } 163 164 queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ 165 inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness), 166 inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness), 167 inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness), 168 }...)) 169 } 170 171 if len(options.RepoIDs) > 0 || options.AllPublic { 172 var repoQueries []query.Query 173 for _, repoID := range options.RepoIDs { 174 repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "repo_id")) 175 } 176 if options.AllPublic { 177 repoQueries = append(repoQueries, inner_bleve.BoolFieldQuery(true, "is_public")) 178 } 179 queries = append(queries, bleve.NewDisjunctionQuery(repoQueries...)) 180 } 181 182 if options.IsPull.Has() { 183 queries = append(queries, inner_bleve.BoolFieldQuery(options.IsPull.Value(), "is_pull")) 184 } 185 if options.IsClosed.Has() { 186 queries = append(queries, inner_bleve.BoolFieldQuery(options.IsClosed.Value(), "is_closed")) 187 } 188 189 if options.NoLabelOnly { 190 queries = append(queries, inner_bleve.BoolFieldQuery(true, "no_label")) 191 } else { 192 if len(options.IncludedLabelIDs) > 0 { 193 var includeQueries []query.Query 194 for _, labelID := range options.IncludedLabelIDs { 195 includeQueries = append(includeQueries, inner_bleve.NumericEqualityQuery(labelID, "label_ids")) 196 } 197 queries = append(queries, bleve.NewConjunctionQuery(includeQueries...)) 198 } else if len(options.IncludedAnyLabelIDs) > 0 { 199 var includeQueries []query.Query 200 for _, labelID := range options.IncludedAnyLabelIDs { 201 includeQueries = append(includeQueries, inner_bleve.NumericEqualityQuery(labelID, "label_ids")) 202 } 203 queries = append(queries, bleve.NewDisjunctionQuery(includeQueries...)) 204 } 205 if len(options.ExcludedLabelIDs) > 0 { 206 var excludeQueries []query.Query 207 for _, labelID := range options.ExcludedLabelIDs { 208 q := bleve.NewBooleanQuery() 209 q.AddMustNot(inner_bleve.NumericEqualityQuery(labelID, "label_ids")) 210 excludeQueries = append(excludeQueries, q) 211 } 212 queries = append(queries, bleve.NewConjunctionQuery(excludeQueries...)) 213 } 214 } 215 216 if len(options.MilestoneIDs) > 0 { 217 var milestoneQueries []query.Query 218 for _, milestoneID := range options.MilestoneIDs { 219 milestoneQueries = append(milestoneQueries, inner_bleve.NumericEqualityQuery(milestoneID, "milestone_id")) 220 } 221 queries = append(queries, bleve.NewDisjunctionQuery(milestoneQueries...)) 222 } 223 224 if options.ProjectID.Has() { 225 queries = append(queries, inner_bleve.NumericEqualityQuery(options.ProjectID.Value(), "project_id")) 226 } 227 if options.ProjectBoardID.Has() { 228 queries = append(queries, inner_bleve.NumericEqualityQuery(options.ProjectBoardID.Value(), "project_board_id")) 229 } 230 231 if options.PosterID.Has() { 232 queries = append(queries, inner_bleve.NumericEqualityQuery(options.PosterID.Value(), "poster_id")) 233 } 234 235 if options.AssigneeID.Has() { 236 queries = append(queries, inner_bleve.NumericEqualityQuery(options.AssigneeID.Value(), "assignee_id")) 237 } 238 239 if options.MentionID.Has() { 240 queries = append(queries, inner_bleve.NumericEqualityQuery(options.MentionID.Value(), "mention_ids")) 241 } 242 243 if options.ReviewedID.Has() { 244 queries = append(queries, inner_bleve.NumericEqualityQuery(options.ReviewedID.Value(), "reviewed_ids")) 245 } 246 if options.ReviewRequestedID.Has() { 247 queries = append(queries, inner_bleve.NumericEqualityQuery(options.ReviewRequestedID.Value(), "review_requested_ids")) 248 } 249 250 if options.SubscriberID.Has() { 251 queries = append(queries, inner_bleve.NumericEqualityQuery(options.SubscriberID.Value(), "subscriber_ids")) 252 } 253 254 if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() { 255 queries = append(queries, inner_bleve.NumericRangeInclusiveQuery( 256 options.UpdatedAfterUnix, 257 options.UpdatedBeforeUnix, 258 "updated_unix")) 259 } 260 261 var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...) 262 if len(queries) == 0 { 263 indexerQuery = bleve.NewMatchAllQuery() 264 } 265 266 skip, limit := indexer_internal.ParsePaginator(options.Paginator) 267 search := bleve.NewSearchRequestOptions(indexerQuery, limit, skip, false) 268 269 if options.SortBy == "" { 270 options.SortBy = internal.SortByCreatedAsc 271 } 272 273 search.SortBy([]string{string(options.SortBy), "-_id"}) 274 275 result, err := b.inner.Indexer.SearchInContext(ctx, search) 276 if err != nil { 277 return nil, err 278 } 279 280 ret := &internal.SearchResult{ 281 Total: int64(result.Total), 282 Hits: make([]internal.Match, 0, len(result.Hits)), 283 } 284 for _, hit := range result.Hits { 285 id, err := indexer_internal.ParseBase36(hit.ID) 286 if err != nil { 287 return nil, err 288 } 289 ret.Hits = append(ret.Hits, internal.Match{ 290 ID: id, 291 }) 292 } 293 return ret, nil 294 }