github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/sql/lexbase/allkeywords/main.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  // all-keywords generates sql/lexbase/keywords.go from sql.y.
    12  //
    13  // It is generically structured with Go templates to allow for quick
    14  // prototyping of different code generation structures for keyword token
    15  // lookup. Previous attempts:
    16  //
    17  // Using github.com/cespare/mph to generate a perfect hash function. Was 10%
    18  // slower. Also attempted to populate the mph.Table with a sparse array where
    19  // the index correlated to the token id. This generated such a large array
    20  // (~65k entries) that the mph package never returned from its Build call.
    21  //
    22  // A `KeywordsTokens = map[string]int32` map from string -> token id.
    23  package main
    24  
    25  import (
    26  	"bufio"
    27  	"log"
    28  	"os"
    29  	"regexp"
    30  	"sort"
    31  	"strings"
    32  	"text/template"
    33  )
    34  
    35  func main() {
    36  	blockRE := regexp.MustCompile(`^.*_keyword:`)
    37  	keywordRE := regexp.MustCompile(`[A-Z].*`)
    38  
    39  	// keyword indicates whether we are currently in a block prefixed by blockRE.
    40  	keyword := false
    41  	category := ""
    42  	scanner := bufio.NewScanner(os.Stdin)
    43  	type entry struct {
    44  		Keyword, Ident, Category string
    45  	}
    46  	var data []entry
    47  	// Look for lines that start with "XXX_keyword:" and record the category. For
    48  	// subsequent non-empty lines, all words are keywords so add them to our
    49  	// data list. An empty line indicates the end of the keyword section, so
    50  	// stop recording.
    51  	for scanner.Scan() {
    52  		line := scanner.Text()
    53  		if match := blockRE.FindString(line); match != "" {
    54  			keyword = true
    55  			category = categories[match]
    56  			if category == "" {
    57  				log.Fatal("unknown keyword type:", match)
    58  			}
    59  		} else if line == "" {
    60  			keyword = false
    61  		} else if match = keywordRE.FindString(line); keyword && match != "" {
    62  			data = append(data, entry{
    63  				Keyword:  strings.ToLower(match),
    64  				Ident:    match,
    65  				Category: category,
    66  			})
    67  		}
    68  	}
    69  	if err := scanner.Err(); err != nil {
    70  		log.Fatal("reading standard input:", err)
    71  	}
    72  
    73  	// Some output variables need their output to be sorted for deterministic
    74  	// output.
    75  	sort.Slice(data, func(i, j int) bool {
    76  		return data[i].Ident < data[j].Ident
    77  	})
    78  
    79  	// Just panic if the template isn't parseable.
    80  	if err := template.Must(template.New("").Parse(tmpl)).Execute(os.Stdout, data); err != nil {
    81  		log.Fatal(err)
    82  	}
    83  }
    84  
    85  // Category codes are for pg_get_keywords, see
    86  // src/backend/utils/adt/misc.c in pg's sources.
    87  var categories = map[string]string{
    88  	"col_name_keyword:":                     "C",
    89  	"unreserved_keyword:":                   "U",
    90  	"type_func_name_keyword:":               "T",
    91  	"type_func_name_no_crdb_extra_keyword:": "T",
    92  	"type_func_name_crdb_extra_keyword:":    "T",
    93  	"reserved_keyword:":                     "R",
    94  	"cockroachdb_extra_reserved_keyword:":   "R",
    95  }
    96  
    97  const tmpl = `// Code generated by pkg/sql/lexbase/allkeywords. DO NOT EDIT.
    98  
    99  package lexbase
   100  
   101  var KeywordsCategories = map[string]string{
   102  {{range . -}}
   103  	"{{.Keyword}}": "{{.Category}}",
   104  {{end -}}
   105  }
   106  
   107  // KeywordNames contains all keywords sorted, so that pg_get_keywords returns
   108  // deterministic results.
   109  var KeywordNames = []string{
   110  {{range . -}}
   111  	"{{.Keyword}}",
   112  {{end -}}
   113  }
   114  
   115  // GetKeywordID returns the lex id of the SQL keyword k or IDENT if k is
   116  // not a keyword.
   117  func GetKeywordID(k string) int32 {
   118  	// The previous implementation generated a map that did a string ->
   119  	// id lookup. Various ideas were benchmarked and the implementation below
   120  	// was the fastest of those, between 3% and 10% faster (at parsing, so the
   121  	// scanning speedup is even more) than the map implementation.
   122  	switch k {
   123  	{{range . -}}
   124  	case "{{.Keyword}}": return {{.Ident}}
   125  	{{end -}}
   126  	default: return IDENT
   127  	}
   128  }
   129  `