github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/lex/all_keywords.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  // +build all-keywords
    12  
    13  // all-keywords generates sql/lex/keywords.go from sql.y.
    14  //
    15  // It is generically structured with Go templates to allow for quick
    16  // prototyping of different code generation structures for keyword token
    17  // lookup. Previous attempts:
    18  //
    19  // Using github.com/cespare/mph to generate a perfect hash function. Was 10%
    20  // slower. Also attempted to populate the mph.Table with a sparse array where
    21  // the index correlated to the token id. This generated such a large array
    22  // (~65k entries) that the mph package never returned from its Build call.
    23  //
    24  // A `KeywordsTokens = map[string]int32` map from string -> token id.
    25  package main
    26  
    27  import (
    28  	"bufio"
    29  	"log"
    30  	"os"
    31  	"regexp"
    32  	"sort"
    33  	"strings"
    34  	"text/template"
    35  )
    36  
    37  func main() {
    38  	blockRE := regexp.MustCompile(`^.*_keyword:`)
    39  	keywordRE := regexp.MustCompile(`[A-Z].*`)
    40  
    41  	// keyword indicates whether we are currently in a block prefixed by blockRE.
    42  	keyword := false
    43  	category := ""
    44  	scanner := bufio.NewScanner(os.Stdin)
    45  	type entry struct {
    46  		Keyword, Ident, Category string
    47  	}
    48  	var data []entry
    49  	// Look for lines that start with "XXX_keyword:" and record the category. For
    50  	// subsequent non-empty lines, all words are keywords so add them to our
    51  	// data list. An empty line indicates the end of the keyword section, so
    52  	// stop recording.
    53  	for scanner.Scan() {
    54  		line := scanner.Text()
    55  		if match := blockRE.FindString(line); match != "" {
    56  			keyword = true
    57  			category = categories[match]
    58  			if category == "" {
    59  				log.Fatal("unknown keyword type:", match)
    60  			}
    61  		} else if line == "" {
    62  			keyword = false
    63  		} else if match = keywordRE.FindString(line); keyword && match != "" {
    64  			data = append(data, entry{
    65  				Keyword:  strings.ToLower(match),
    66  				Ident:    match,
    67  				Category: category,
    68  			})
    69  		}
    70  	}
    71  	if err := scanner.Err(); err != nil {
    72  		log.Fatal("reading standard input:", err)
    73  	}
    74  
    75  	// Some output variables need their output to be sorted for deterministic
    76  	// output.
    77  	sort.Slice(data, func(i, j int) bool {
    78  		return data[i].Ident < data[j].Ident
    79  	})
    80  
    81  	// Just panic if the template isn't parseable.
    82  	if err := template.Must(template.New("").Parse(tmpl)).Execute(os.Stdout, data); err != nil {
    83  		log.Fatal(err)
    84  	}
    85  }
    86  
    87  // Category codes are for pg_get_keywords, see
    88  // src/backend/utils/adt/misc.c in pg's sources.
    89  var categories = map[string]string{
    90  	"col_name_keyword:":                     "C",
    91  	"unreserved_keyword:":                   "U",
    92  	"type_func_name_keyword:":               "T",
    93  	"type_func_name_no_crdb_extra_keyword:": "T",
    94  	"type_func_name_crdb_extra_keyword:":    "T",
    95  	"reserved_keyword:":                     "R",
    96  	"cockroachdb_extra_reserved_keyword:":   "R",
    97  }
    98  
    99  const tmpl = `// Code generated by cmd/all-keywords. DO NOT EDIT.
   100  
   101  package lex
   102  
   103  var KeywordsCategories = map[string]string{
   104  {{range . -}}
   105  	"{{.Keyword}}": "{{.Category}}",
   106  {{end -}}
   107  }
   108  
   109  // KeywordNames contains all keywords sorted, so that pg_get_keywords returns
   110  // deterministic results.
   111  var KeywordNames = []string{
   112  {{range . -}}
   113  	"{{.Keyword}}",
   114  {{end -}}
   115  }
   116  
   117  // GetKeywordID returns the lex id of the SQL keyword k or IDENT if k is
   118  // not a keyword.
   119  func GetKeywordID(k string) int32 {
   120  	// The previous implementation generated a map that did a string ->
   121  	// id lookup. Various ideas were benchmarked and the implementation below
   122  	// was the fastest of those, between 3% and 10% faster (at parsing, so the
   123  	// scanning speedup is even more) than the map implementation.
   124  	switch k {
   125  	{{range . -}}
   126  	case "{{.Keyword}}": return {{.Ident}}
   127  	{{end -}}
   128  	default: return IDENT
   129  	}
   130  }
   131  `