github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/config.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package inverted 13 14 import ( 15 "runtime" 16 "strings" 17 18 "github.com/pkg/errors" 19 "github.com/weaviate/weaviate/adapters/repos/db/inverted/stopwords" 20 "github.com/weaviate/weaviate/entities/models" 21 "github.com/weaviate/weaviate/entities/schema" 22 "github.com/weaviate/weaviate/usecases/config" 23 ) 24 25 var _NUMCPU = runtime.NumCPU() 26 27 func ValidateConfig(conf *models.InvertedIndexConfig) error { 28 if conf.CleanupIntervalSeconds < 0 { 29 return errors.Errorf("cleanup interval seconds must be > 0") 30 } 31 32 err := validateBM25Config(conf.Bm25) 33 if err != nil { 34 return err 35 } 36 37 err = validateStopwordConfig(conf.Stopwords) 38 if err != nil { 39 return err 40 } 41 42 return nil 43 } 44 45 func ConfigFromModel(iicm *models.InvertedIndexConfig) schema.InvertedIndexConfig { 46 var conf schema.InvertedIndexConfig 47 48 conf.IndexTimestamps = iicm.IndexTimestamps 49 conf.IndexNullState = iicm.IndexNullState 50 conf.IndexPropertyLength = iicm.IndexPropertyLength 51 52 if iicm.Bm25 == nil { 53 conf.BM25.K1 = float64(config.DefaultBM25k1) 54 conf.BM25.B = float64(config.DefaultBM25b) 55 } else { 56 conf.BM25.K1 = float64(iicm.Bm25.K1) 57 conf.BM25.B = float64(iicm.Bm25.B) 58 } 59 60 if iicm.Stopwords == nil { 61 conf.Stopwords = models.StopwordConfig{ 62 Preset: stopwords.EnglishPreset, 63 } 64 } else { 65 conf.Stopwords.Preset = iicm.Stopwords.Preset 66 conf.Stopwords.Additions = iicm.Stopwords.Additions 67 conf.Stopwords.Removals = iicm.Stopwords.Removals 68 } 69 70 return conf 71 } 72 73 func validateBM25Config(conf *models.BM25Config) error { 74 if conf == nil { 75 return nil 76 } 77 78 if conf.K1 < 0 { 79 return errors.Errorf("BM25.k1 must be >= 0") 80 } 81 if conf.B < 0 || conf.B > 1 { 82 return errors.Errorf("BM25.b must be <= 0 and <= 1") 83 } 84 85 return nil 86 } 87 88 func validateStopwordConfig(conf *models.StopwordConfig) error { 89 if conf == nil { 90 conf = &models.StopwordConfig{} 91 } 92 93 if conf.Preset == "" { 94 conf.Preset = stopwords.EnglishPreset 95 } 96 97 if _, ok := stopwords.Presets[conf.Preset]; !ok { 98 return errors.Errorf("stopwordPreset '%s' does not exist", conf.Preset) 99 } 100 101 err := validateStopwordAdditionsRemovals(conf) 102 if err != nil { 103 return err 104 } 105 106 return nil 107 } 108 109 func validateStopwordAdditionsRemovals(conf *models.StopwordConfig) error { 110 // the same stopword cannot exist 111 // in both additions and removals 112 foundAdditions := make(map[string]int) 113 114 for idx, add := range conf.Additions { 115 if strings.TrimSpace(add) == "" { 116 return errors.Errorf("cannot use whitespace in stopword.additions") 117 } 118 119 // save the index of the addition since it 120 // is readily available here. we will need 121 // this below when trimming additions that 122 // already exist in the selected preset 123 foundAdditions[add] = idx 124 } 125 126 for _, rem := range conf.Removals { 127 if strings.TrimSpace(rem) == "" { 128 return errors.Errorf("cannot use whitespace in stopword.removals") 129 } 130 131 if _, ok := foundAdditions[rem]; ok { 132 return errors.Errorf( 133 "found '%s' in both stopwords.additions and stopwords.removals", rem) 134 } 135 } 136 137 removeStopwordAdditionsIfInPreset(conf, foundAdditions) 138 return nil 139 } 140 141 func removeStopwordAdditionsIfInPreset(conf *models.StopwordConfig, foundAdditions map[string]int) { 142 presets := stopwords.Presets[conf.Preset] 143 144 // if any of the elements in stopwords.additions 145 // already exist in the preset, mark it as to 146 // be removed 147 indicesToRemove := make(map[int]bool) 148 for _, preset := range presets { 149 if idx, ok := foundAdditions[preset]; ok { 150 indicesToRemove[idx] = true 151 } 152 } 153 154 if len(indicesToRemove) == 0 { 155 return 156 } 157 158 // take remaining additions, build new list 159 var trimmedAdditions []string 160 for idx, add := range conf.Additions { 161 if _, ok := indicesToRemove[idx]; !ok { 162 trimmedAdditions = append(trimmedAdditions, add) 163 } 164 } 165 conf.Additions = trimmedAdditions 166 }