vitess.io/vitess@v0.16.2/web/vtadmin/src/util/tokenize.ts (about) 1 /** 2 * Copyright 2021 The Vitess Authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 export interface Token { 18 matches: string[]; 19 token: string; 20 type: string; 21 } 22 23 /** 24 * `tokenize` is a tiny, simple tokenizer that parses tokens from the `input` string 25 * based on the regexes in `patterns`. 26 * 27 * At the time of writing, `tokenize` is only used as the tokenizer behind `tokenizeSearch`. 28 * Isolating tokenization logic in its own function, however, is a useful separation of concerns 29 * should we want to swap in a different implementation. 30 * 31 * Since `tokenize` is regex-based, it is by no means as robust as using a "real" 32 * query syntax. If or when we need more complex parsing capabilities, 33 * it would be worth investigating a true parser (like peg.js), combined with a well-known 34 * query syntax (like Apache Lucene). 35 * 36 * Mostly lifted from https://gist.github.com/borgar/451393/7698c95178898c9466214867b46acb2ab2f56d68. 37 */ 38 const tokenize = (input: string, patterns: { [k: string]: RegExp }): Token[] => { 39 const tokens: Token[] = []; 40 let s = input; 41 42 while (s) { 43 let t = null; 44 let m = s.length; 45 46 for (const key in patterns) { 47 const r = patterns[key].exec(s); 48 // Try to choose the best match if there are several, 49 // where "best" is the closest to the current starting point 50 if (r && r.index < m) { 51 t = { 52 token: r[0], 53 type: key, 54 matches: r.slice(1), 55 }; 56 m = r.index; 57 } 58 } 59 60 if (t) { 61 tokens.push(t); 62 } 63 64 s = s.substr(m + (t ? t.token.length : 0)); 65 } 66 67 return tokens; 68 }; 69 70 export enum SearchTokenTypes { 71 EXACT = 'exact', 72 FUZZY = 'fuzzy', 73 KEY_VALUE = 'keyValue', 74 } 75 76 export type SearchToken = ExactSearchToken | FuzzySearchToken | KeyValueSearchToken; 77 78 export interface ExactSearchToken { 79 type: SearchTokenTypes.EXACT; 80 value: string; 81 } 82 83 export interface FuzzySearchToken { 84 type: SearchTokenTypes.FUZZY; 85 value: string; 86 } 87 export interface KeyValueSearchToken { 88 type: SearchTokenTypes.KEY_VALUE; 89 key: string; 90 value: string; 91 } 92 93 /** 94 * `tokenizeSearch` parses tokens from search strings, such as those used to filter 95 * lists of tablets and other nouns. 96 */ 97 export const tokenizeSearch = (input: string): SearchToken[] => { 98 return tokenize(input, { 99 keyValue: /(\w+):([^\s"]+)/, 100 exact: /"([^\s"]+)"/, 101 fuzzy: /([^\s"]+)/, 102 }).reduce((acc, token) => { 103 switch (token.type) { 104 case SearchTokenTypes.EXACT: 105 acc.push({ type: token.type, value: token.matches[0] }); 106 break; 107 case SearchTokenTypes.FUZZY: 108 acc.push({ type: token.type, value: token.matches[0] }); 109 break; 110 case SearchTokenTypes.KEY_VALUE: 111 acc.push({ type: token.type, key: token.matches[0], value: token.matches[1] }); 112 break; 113 } 114 return acc; 115 }, [] as SearchToken[]); 116 };