vitess.io/vitess@v0.16.2/web/vtadmin/src/util/tokenize.ts (about)

     1  /**
     2   * Copyright 2021 The Vitess Authors.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  export interface Token {
    18      matches: string[];
    19      token: string;
    20      type: string;
    21  }
    22  
    23  /**
    24   * `tokenize` is a tiny, simple tokenizer that parses tokens from the `input` string
    25   * based on the regexes in `patterns`.
    26   *
    27   * At the time of writing, `tokenize` is only used as the tokenizer behind `tokenizeSearch`.
    28   * Isolating tokenization logic in its own function, however, is a useful separation of concerns
    29   * should we want to swap in a different implementation.
    30   *
    31   * Since `tokenize` is regex-based, it is by no means as robust as using a "real"
    32   * query syntax. If or when we need more complex parsing capabilities,
    33   * it would be worth investigating a true parser (like peg.js), combined with a well-known
    34   * query syntax (like Apache Lucene).
    35   *
    36   * Mostly lifted from https://gist.github.com/borgar/451393/7698c95178898c9466214867b46acb2ab2f56d68.
    37   */
    38  const tokenize = (input: string, patterns: { [k: string]: RegExp }): Token[] => {
    39      const tokens: Token[] = [];
    40      let s = input;
    41  
    42      while (s) {
    43          let t = null;
    44          let m = s.length;
    45  
    46          for (const key in patterns) {
    47              const r = patterns[key].exec(s);
    48              // Try to choose the best match if there are several,
    49              // where "best" is the closest to the current starting point
    50              if (r && r.index < m) {
    51                  t = {
    52                      token: r[0],
    53                      type: key,
    54                      matches: r.slice(1),
    55                  };
    56                  m = r.index;
    57              }
    58          }
    59  
    60          if (t) {
    61              tokens.push(t);
    62          }
    63  
    64          s = s.substr(m + (t ? t.token.length : 0));
    65      }
    66  
    67      return tokens;
    68  };
    69  
    70  export enum SearchTokenTypes {
    71      EXACT = 'exact',
    72      FUZZY = 'fuzzy',
    73      KEY_VALUE = 'keyValue',
    74  }
    75  
    76  export type SearchToken = ExactSearchToken | FuzzySearchToken | KeyValueSearchToken;
    77  
    78  export interface ExactSearchToken {
    79      type: SearchTokenTypes.EXACT;
    80      value: string;
    81  }
    82  
    83  export interface FuzzySearchToken {
    84      type: SearchTokenTypes.FUZZY;
    85      value: string;
    86  }
    87  export interface KeyValueSearchToken {
    88      type: SearchTokenTypes.KEY_VALUE;
    89      key: string;
    90      value: string;
    91  }
    92  
    93  /**
    94   * `tokenizeSearch` parses tokens from search strings, such as those used to filter
    95   * lists of tablets and other nouns.
    96   */
    97  export const tokenizeSearch = (input: string): SearchToken[] => {
    98      return tokenize(input, {
    99          keyValue: /(\w+):([^\s"]+)/,
   100          exact: /"([^\s"]+)"/,
   101          fuzzy: /([^\s"]+)/,
   102      }).reduce((acc, token) => {
   103          switch (token.type) {
   104              case SearchTokenTypes.EXACT:
   105                  acc.push({ type: token.type, value: token.matches[0] });
   106                  break;
   107              case SearchTokenTypes.FUZZY:
   108                  acc.push({ type: token.type, value: token.matches[0] });
   109                  break;
   110              case SearchTokenTypes.KEY_VALUE:
   111                  acc.push({ type: token.type, key: token.matches[0], value: token.matches[1] });
   112                  break;
   113          }
   114          return acc;
   115      }, [] as SearchToken[]);
   116  };