code.gitea.io/gitea@v1.22.3/modules/indexer/internal/bleve/util.go (about) 1 // Copyright 2023 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package bleve 5 6 import ( 7 "errors" 8 "os" 9 10 "code.gitea.io/gitea/modules/log" 11 "code.gitea.io/gitea/modules/util" 12 13 "github.com/blevesearch/bleve/v2" 14 "github.com/blevesearch/bleve/v2/index/upsidedown" 15 "github.com/ethantkoenig/rupture" 16 ) 17 18 // openIndexer open the index at the specified path, checking for metadata 19 // updates and bleve version updates. If index needs to be created (or 20 // re-created), returns (nil, nil) 21 func openIndexer(path string, latestVersion int) (bleve.Index, int, error) { 22 _, err := os.Stat(path) 23 if err != nil && os.IsNotExist(err) { 24 return nil, 0, nil 25 } else if err != nil { 26 return nil, 0, err 27 } 28 29 metadata, err := rupture.ReadIndexMetadata(path) 30 if err != nil { 31 return nil, 0, err 32 } 33 if metadata.Version < latestVersion { 34 // the indexer is using a previous version, so we should delete it and 35 // re-populate 36 return nil, metadata.Version, util.RemoveAll(path) 37 } 38 39 index, err := bleve.Open(path) 40 if err != nil { 41 if errors.Is(err, upsidedown.IncompatibleVersion) { 42 log.Warn("Indexer was built with a previous version of bleve, deleting and rebuilding") 43 return nil, 0, util.RemoveAll(path) 44 } 45 return nil, 0, err 46 } 47 48 return index, 0, nil 49 } 50 51 func GuessFuzzinessByKeyword(s string) int { 52 // according to https://github.com/blevesearch/bleve/issues/1563, the supported max fuzziness is 2 53 // magic number 4 was chosen to determine the levenshtein distance per each character of a keyword 54 // BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot. 55 for _, r := range s { 56 if r >= 128 { 57 return 0 58 } 59 } 60 return min(2, len(s)/4) 61 }