github.com/keybase/client/go@v0.0.0-20241007131713-f10651d043c8/chat/search/utils.go (about) 1 package search 2 3 import ( 4 "context" 5 "fmt" 6 "regexp" 7 "strings" 8 9 "github.com/araddon/dateparse" 10 mapset "github.com/deckarep/golang-set" 11 "github.com/keybase/client/go/chat/globals" 12 "github.com/keybase/client/go/chat/utils" 13 "github.com/keybase/client/go/protocol/chat1" 14 "github.com/keybase/client/go/protocol/gregor1" 15 porterstemmer "github.com/keybase/go-porterstemmer" 16 ) 17 18 // Split on whitespace, punctuation, code and quote markdown separators 19 var splitExpr = regexp.MustCompile(`[\s\.,\?!]`) 20 21 // Strip the following separators to create tokens 22 var stripSeps = []string{ 23 // groupings 24 "<", ">", 25 "\\(", "\\)", 26 "\\[", "\\]", 27 "\\{", "\\}", 28 "\"", 29 "'", 30 // phone number delimiter 31 "-", 32 // mentions 33 "@", 34 "#", 35 // markdown 36 "\\*", 37 "_", 38 "~", 39 "`", 40 } 41 var stripExpr = regexp.MustCompile(strings.Join(stripSeps, "|")) 42 43 func prefixes(token string) (res []string) { 44 if len(token) < MinTokenLength { 45 return nil 46 } 47 for i := range token { 48 if i < MinTokenLength { 49 continue 50 } 51 // Skip any prefixes longer than `maxPrefixLength` to limit the index size. 52 if i > maxPrefixLength { 53 break 54 } 55 res = append(res, token[:i]) 56 } 57 return res 58 } 59 60 type tokenMap map[string]map[string]chat1.EmptyStruct 61 62 // getIndexTokens splits the content of the given message on whitespace and 63 // special characters returning a map of tokens to aliases normalized to lowercase. 64 func tokenize(msgText string) tokenMap { 65 if msgText == "" { 66 return nil 67 } 68 69 // split the message text up on basic punctuation/spaces 70 tokens := splitExpr.Split(msgText, -1) 71 tokenMap := tokenMap{} 72 for _, token := range tokens { 73 if len(token) < MinTokenLength { 74 continue 75 } 76 77 token = strings.ToLower(token) 78 if _, ok := tokenMap[token]; !ok { 79 tokenMap[token] = map[string]chat1.EmptyStruct{} 80 } 81 82 // strip separators to raw tokens which we count as an alias to the 83 // original token 84 stripped := stripExpr.Split(token, -1) 85 for _, s := range stripped { 86 if s == "" { 87 continue 88 } 89 tokenMap[token][s] = chat1.EmptyStruct{} 90 91 // add the stem as an alias 92 stemmed := porterstemmer.StemWithoutLowerCasing([]rune(s)) 93 tokenMap[token][string(stemmed)] = chat1.EmptyStruct{} 94 95 // calculate prefixes to alias to the token 96 for _, prefix := range prefixes(s) { 97 tokenMap[token][prefix] = chat1.EmptyStruct{} 98 } 99 } 100 // drop the original token from the set of aliases 101 delete(tokenMap[token], token) 102 } 103 return tokenMap 104 } 105 106 func tokensFromMsg(msg chat1.MessageUnboxed) tokenMap { 107 return tokenize(msg.SearchableText()) 108 } 109 110 func msgIDsFromSet(set mapset.Set) []chat1.MessageID { 111 if set == nil { 112 return nil 113 } 114 msgIDSlice := []chat1.MessageID{} 115 for _, el := range set.ToSlice() { 116 msgID, ok := el.(chat1.MessageID) 117 if ok { 118 msgIDSlice = append(msgIDSlice, msgID) 119 } 120 } 121 return msgIDSlice 122 } 123 124 func searchMatches(msg chat1.MessageUnboxed, queryRe *regexp.Regexp) (validMatches []chat1.ChatSearchMatch) { 125 msgText := msg.SearchableText() 126 matches := queryRe.FindAllStringIndex(msgText, -1) 127 for _, m := range matches { 128 if len(m) != 2 { 129 // sanity check but regex package should always return a two 130 // element slice 131 continue 132 } 133 startIndex := m[0] 134 endIndex := m[1] 135 if startIndex != endIndex { 136 validMatches = append(validMatches, chat1.ChatSearchMatch{ 137 StartIndex: startIndex, 138 EndIndex: endIndex, 139 Match: msgText[startIndex:endIndex], 140 }) 141 } 142 } 143 return validMatches 144 } 145 146 // Order messages ascending by ID for presentation 147 func getUIMsgs(ctx context.Context, g *globals.Context, convID chat1.ConversationID, 148 uid gregor1.UID, msgs []chat1.MessageUnboxed) (uiMsgs []chat1.UIMessage) { 149 for i := len(msgs) - 1; i >= 0; i-- { 150 msg := msgs[i] 151 uiMsg := utils.PresentMessageUnboxed(ctx, g, msg, uid, convID) 152 uiMsgs = append(uiMsgs, uiMsg) 153 } 154 return uiMsgs 155 } 156 157 const beforeFilter = "before:" 158 const afterFilter = "after:" 159 const fromFilter = "from:" 160 const toFilter = "to:" 161 162 var senderRegex = regexp.MustCompile(fmt.Sprintf( 163 "(%s|%s)(@?[a-z0-9][a-z0-9_]+)", fromFilter, toFilter)) 164 var dateRangeRegex = regexp.MustCompile(fmt.Sprintf( 165 `(%s|%s)(\d{1,4}[-/\.]+\d{1,2}[-/\.]+\d{1,4})`, beforeFilter, afterFilter)) 166 167 func UpgradeSearchOptsFromQuery(query string, opts chat1.SearchOpts, username string) (string, chat1.SearchOpts) { 168 query = strings.Trim(query, " ") 169 var hasQueryOpts bool 170 171 // To/From 172 matches := senderRegex.FindAllStringSubmatch(query, 2) 173 for _, match := range matches { 174 // [fullMatch, filter, sender] 175 if len(match) != 3 { 176 continue 177 } 178 hasQueryOpts = true 179 query = strings.TrimSpace(strings.ReplaceAll(query, match[0], "")) 180 sender := strings.TrimSpace(strings.ReplaceAll(match[2], "@", "")) 181 if sender == "me" { 182 sender = username 183 } 184 switch match[1] { 185 case fromFilter: 186 opts.SentBy = sender 187 case toFilter: 188 opts.SentTo = sender 189 } 190 } 191 if opts.SentTo == username { 192 opts.MatchMentions = true 193 } 194 195 matches = dateRangeRegex.FindAllStringSubmatch(query, 2) 196 for _, match := range matches { 197 // [fullMatch, filter, dateRange] 198 if len(match) != 3 { 199 continue 200 } 201 hasQueryOpts = true 202 query = strings.TrimSpace(strings.Replace(query, match[0], "", 1)) 203 time, err := dateparse.ParseAny(strings.TrimSpace(match[2])) 204 if err != nil { 205 continue 206 } 207 208 gtime := gregor1.ToTime(time) 209 switch match[1] { 210 case beforeFilter: 211 opts.SentBefore = gtime 212 case afterFilter: 213 opts.SentAfter = gtime 214 } 215 } 216 217 if hasQueryOpts && len(query) == 0 { 218 query = "/.*/" 219 } 220 // IsRegex 221 if len(query) > 2 && query[0] == '/' && query[len(query)-1] == '/' { 222 query = query[1 : len(query)-1] 223 opts.IsRegex = true 224 } 225 return query, opts 226 } 227 228 func MinMaxIDs(conv chat1.Conversation) (min, max chat1.MessageID) { 229 // lowest msgID we care about 230 min = conv.GetMaxDeletedUpTo() 231 if min == 0 { 232 min = 1 233 } 234 // highest msgID we care about 235 max = conv.GetMaxMessageID() 236 return min, max 237 }