github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/lex/all_keywords.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 // +build all-keywords 12 13 // all-keywords generates sql/lex/keywords.go from sql.y. 14 // 15 // It is generically structured with Go templates to allow for quick 16 // prototyping of different code generation structures for keyword token 17 // lookup. Previous attempts: 18 // 19 // Using github.com/cespare/mph to generate a perfect hash function. Was 10% 20 // slower. Also attempted to populate the mph.Table with a sparse array where 21 // the index correlated to the token id. This generated such a large array 22 // (~65k entries) that the mph package never returned from its Build call. 23 // 24 // A `KeywordsTokens = map[string]int32` map from string -> token id. 25 package main 26 27 import ( 28 "bufio" 29 "log" 30 "os" 31 "regexp" 32 "sort" 33 "strings" 34 "text/template" 35 ) 36 37 func main() { 38 blockRE := regexp.MustCompile(`^.*_keyword:`) 39 keywordRE := regexp.MustCompile(`[A-Z].*`) 40 41 // keyword indicates whether we are currently in a block prefixed by blockRE. 42 keyword := false 43 category := "" 44 scanner := bufio.NewScanner(os.Stdin) 45 type entry struct { 46 Keyword, Ident, Category string 47 } 48 var data []entry 49 // Look for lines that start with "XXX_keyword:" and record the category. For 50 // subsequent non-empty lines, all words are keywords so add them to our 51 // data list. An empty line indicates the end of the keyword section, so 52 // stop recording. 53 for scanner.Scan() { 54 line := scanner.Text() 55 if match := blockRE.FindString(line); match != "" { 56 keyword = true 57 category = categories[match] 58 if category == "" { 59 log.Fatal("unknown keyword type:", match) 60 } 61 } else if line == "" { 62 keyword = false 63 } else if match = keywordRE.FindString(line); keyword && match != "" { 64 data = append(data, entry{ 65 Keyword: strings.ToLower(match), 66 Ident: match, 67 Category: category, 68 }) 69 } 70 } 71 if err := scanner.Err(); err != nil { 72 log.Fatal("reading standard input:", err) 73 } 74 75 // Some output variables need their output to be sorted for deterministic 76 // output. 77 sort.Slice(data, func(i, j int) bool { 78 return data[i].Ident < data[j].Ident 79 }) 80 81 // Just panic if the template isn't parseable. 82 if err := template.Must(template.New("").Parse(tmpl)).Execute(os.Stdout, data); err != nil { 83 log.Fatal(err) 84 } 85 } 86 87 // Category codes are for pg_get_keywords, see 88 // src/backend/utils/adt/misc.c in pg's sources. 89 var categories = map[string]string{ 90 "col_name_keyword:": "C", 91 "unreserved_keyword:": "U", 92 "type_func_name_keyword:": "T", 93 "type_func_name_no_crdb_extra_keyword:": "T", 94 "type_func_name_crdb_extra_keyword:": "T", 95 "reserved_keyword:": "R", 96 "cockroachdb_extra_reserved_keyword:": "R", 97 } 98 99 const tmpl = `// Code generated by cmd/all-keywords. DO NOT EDIT. 100 101 package lex 102 103 var KeywordsCategories = map[string]string{ 104 {{range . -}} 105 "{{.Keyword}}": "{{.Category}}", 106 {{end -}} 107 } 108 109 // KeywordNames contains all keywords sorted, so that pg_get_keywords returns 110 // deterministic results. 111 var KeywordNames = []string{ 112 {{range . -}} 113 "{{.Keyword}}", 114 {{end -}} 115 } 116 117 // GetKeywordID returns the lex id of the SQL keyword k or IDENT if k is 118 // not a keyword. 119 func GetKeywordID(k string) int32 { 120 // The previous implementation generated a map that did a string -> 121 // id lookup. Various ideas were benchmarked and the implementation below 122 // was the fastest of those, between 3% and 10% faster (at parsing, so the 123 // scanning speedup is even more) than the map implementation. 124 switch k { 125 {{range . -}} 126 case "{{.Keyword}}": return {{.Ident}} 127 {{end -}} 128 default: return IDENT 129 } 130 } 131 `