github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/sql/lexbase/allkeywords/main.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 // all-keywords generates sql/lexbase/keywords.go from sql.y. 12 // 13 // It is generically structured with Go templates to allow for quick 14 // prototyping of different code generation structures for keyword token 15 // lookup. Previous attempts: 16 // 17 // Using github.com/cespare/mph to generate a perfect hash function. Was 10% 18 // slower. Also attempted to populate the mph.Table with a sparse array where 19 // the index correlated to the token id. This generated such a large array 20 // (~65k entries) that the mph package never returned from its Build call. 21 // 22 // A `KeywordsTokens = map[string]int32` map from string -> token id. 23 package main 24 25 import ( 26 "bufio" 27 "log" 28 "os" 29 "regexp" 30 "sort" 31 "strings" 32 "text/template" 33 ) 34 35 func main() { 36 blockRE := regexp.MustCompile(`^.*_keyword:`) 37 keywordRE := regexp.MustCompile(`[A-Z].*`) 38 39 // keyword indicates whether we are currently in a block prefixed by blockRE. 40 keyword := false 41 category := "" 42 scanner := bufio.NewScanner(os.Stdin) 43 type entry struct { 44 Keyword, Ident, Category string 45 } 46 var data []entry 47 // Look for lines that start with "XXX_keyword:" and record the category. For 48 // subsequent non-empty lines, all words are keywords so add them to our 49 // data list. An empty line indicates the end of the keyword section, so 50 // stop recording. 51 for scanner.Scan() { 52 line := scanner.Text() 53 if match := blockRE.FindString(line); match != "" { 54 keyword = true 55 category = categories[match] 56 if category == "" { 57 log.Fatal("unknown keyword type:", match) 58 } 59 } else if line == "" { 60 keyword = false 61 } else if match = keywordRE.FindString(line); keyword && match != "" { 62 data = append(data, entry{ 63 Keyword: strings.ToLower(match), 64 Ident: match, 65 Category: category, 66 }) 67 } 68 } 69 if err := scanner.Err(); err != nil { 70 log.Fatal("reading standard input:", err) 71 } 72 73 // Some output variables need their output to be sorted for deterministic 74 // output. 75 sort.Slice(data, func(i, j int) bool { 76 return data[i].Ident < data[j].Ident 77 }) 78 79 // Just panic if the template isn't parseable. 80 if err := template.Must(template.New("").Parse(tmpl)).Execute(os.Stdout, data); err != nil { 81 log.Fatal(err) 82 } 83 } 84 85 // Category codes are for pg_get_keywords, see 86 // src/backend/utils/adt/misc.c in pg's sources. 87 var categories = map[string]string{ 88 "col_name_keyword:": "C", 89 "unreserved_keyword:": "U", 90 "type_func_name_keyword:": "T", 91 "type_func_name_no_crdb_extra_keyword:": "T", 92 "type_func_name_crdb_extra_keyword:": "T", 93 "reserved_keyword:": "R", 94 "cockroachdb_extra_reserved_keyword:": "R", 95 } 96 97 const tmpl = `// Code generated by pkg/sql/lexbase/allkeywords. DO NOT EDIT. 98 99 package lexbase 100 101 var KeywordsCategories = map[string]string{ 102 {{range . -}} 103 "{{.Keyword}}": "{{.Category}}", 104 {{end -}} 105 } 106 107 // KeywordNames contains all keywords sorted, so that pg_get_keywords returns 108 // deterministic results. 109 var KeywordNames = []string{ 110 {{range . -}} 111 "{{.Keyword}}", 112 {{end -}} 113 } 114 115 // GetKeywordID returns the lex id of the SQL keyword k or IDENT if k is 116 // not a keyword. 117 func GetKeywordID(k string) int32 { 118 // The previous implementation generated a map that did a string -> 119 // id lookup. Various ideas were benchmarked and the implementation below 120 // was the fastest of those, between 3% and 10% faster (at parsing, so the 121 // scanning speedup is even more) than the map implementation. 122 switch k { 123 {{range . -}} 124 case "{{.Keyword}}": return {{.Ident}} 125 {{end -}} 126 default: return IDENT 127 } 128 } 129 `