github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/pgwire/hba/scanner.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package hba 12 13 import ( 14 "regexp" 15 "strings" 16 17 "github.com/cockroachdb/errors" 18 ) 19 20 // This file contains a scanner for the pg_hba.conf token syntax. 21 // 22 // The algorithm used here is as follows: first the input is split 23 // into lines. Then each line is scanned using a rule-based algorithm. 24 // 25 26 // rule represents one scanning rule. 27 type rule struct { 28 // re is the regular expression to match at the current text position. 29 re string 30 // fn is the action function to call if the rule matches. 31 // - if foundToken is found true, the lexer stops scanning and returns the current token. 32 // - err stops the scan and returns an error. 33 fn func(l *lex) (foundToken bool, err error) 34 } 35 36 // lex represents the state of the scanner. 37 // This is not meant to be used in parsing rules. 38 type lex struct { 39 String 40 41 // comma is set to true if the last found token was succeeded by a 42 // comma. 43 comma bool 44 45 // lexed is set to the portion of the text matched by the current 46 // rule, and is provided as input to the rule's action function. 47 lexed string 48 } 49 50 // rules describes the scanning rules. 51 // 52 // As per pg's source, file src/backend/libpq/hba.c: 53 // Tokens are strings of non-blank 54 // characters bounded by blank characters, commas, beginning of line, and 55 // end of line. Blank means space or tab. Tokens can be delimited by 56 // double quotes (this allows the inclusion of blanks, but not newlines). 57 // 58 // The scanner implemented here is slightly more strict than the one 59 // used by PostgreSQL. For example, PostgreSQL supports tokens written 60 // as: abc"def"geh to represent the single string "abcdefgeh". The 61 // same input here will yield 3 different tokens "abc", "def"(quoted), 62 // "geh". 63 // 64 // PostgreSQL also accepts including special (control) characters 65 // inside quoted and unquoted strings, including tabs (\t) and 66 // carriage returns (\r) inside quoted strings. These are not accepted 67 // here for the sake of simplicity in the pretty-printer. If a use 68 // case comes up where they should be accepted, care should be taken 69 // to implement a new pretty-printer that does not rewrite whitespace 70 // in HBA strings. 71 // 72 // This difference is intended; it makes the implementation simpler 73 // and the result less surprising. 74 // 75 // Meanwhile, the scanner does implements some other oddities of 76 // PostgreSQL. For example: 77 // a, b (space after comma) counts as a single comma-delimited field. 78 // a ,b (space before comma) counts as two fields. 79 // 80 var rules = []struct { 81 r rule 82 rg *regexp.Regexp 83 }{ 84 {r: rule{`[ \t\r,]*` /***********/, func(l *lex) (bool, error) { return false, nil }}}, 85 {r: rule{`#.*$` /****************/, func(l *lex) (bool, error) { return false, nil }}}, 86 {r: rule{`[^[:cntrl:] ",]+,?` /**/, func(l *lex) (bool, error) { l.checkComma(); l.Value = l.lexed; return true, nil }}}, 87 {r: rule{`"[^[:cntrl:]"]*",?` /**/, func(l *lex) (bool, error) { l.checkComma(); l.stripQuotes(); l.Value = l.lexed; return true, nil }}}, 88 {r: rule{`"[^"]*$` /*************/, func(l *lex) (bool, error) { return false, errors.New("unterminated quoted string") }}}, 89 {r: rule{`"[^"]*"` /*************/, func(l *lex) (bool, error) { return false, errors.New("invalid characters in quoted string") }}}, 90 {r: rule{`.` /*******************/, func(l *lex) (bool, error) { return false, errors.Newf("unsupported character: %q", l.lexed) }}}, 91 } 92 93 func (l *lex) checkComma() { 94 l.comma = l.lexed[len(l.lexed)-1] == ',' 95 if l.comma { 96 l.lexed = l.lexed[:len(l.lexed)-1] 97 } 98 } 99 100 func (l *lex) stripQuotes() { 101 l.Quoted = true 102 l.lexed = l.lexed[1 : len(l.lexed)-1] 103 } 104 105 func init() { 106 for i := range rules { 107 rules[i].rg = regexp.MustCompile("^" + rules[i].r.re) 108 } 109 } 110 111 // nextToken reads the next token from buf. A token is a simple or 112 // quoted string. If there is no token (e.g. just whitespace), the 113 // returned token is empty. trailingComma indicates whether the token 114 // is immediately followed by a comma. 115 // 116 // Inspired from pg's src/backend/libpq/hba.c, next_token(). 117 func nextToken(buf string) (remaining string, tok String, trailingComma bool, err error) { 118 remaining = buf 119 var l lex 120 outer: 121 for remaining != "" { 122 l = lex{} 123 inner: 124 for _, rule := range rules { 125 l.lexed = rule.rg.FindString(remaining) 126 remaining = remaining[len(l.lexed):] 127 if l.lexed != "" { 128 var foundToken bool 129 foundToken, err = rule.r.fn(&l) 130 if foundToken || err != nil { 131 break outer 132 } 133 break inner 134 } 135 } 136 } 137 return remaining, l.String, l.comma, err 138 } 139 140 // nextFieldExpand reads the next comma-separated list of string from buf. 141 // commas count as separator only when they immediately follow a string. 142 // 143 // Inspired from pg's src/backend/libpq/hba.c, next_field_expand(). 144 func nextFieldExpand(buf string) (remaining string, field []String, err error) { 145 remaining = buf 146 for { 147 var trailingComma bool 148 var tok String 149 remaining, tok, trailingComma, err = nextToken(remaining) 150 if tok.Empty() || err != nil { 151 return 152 } 153 field = append(field, tok) 154 if !trailingComma { 155 break 156 } 157 } 158 return 159 } 160 161 // tokenize splits the input into tokens. 162 // 163 // Inspired from pg's src/backend/libpq/hba.c, tokenize_file(). 164 func tokenize(input string) (res scannedInput, err error) { 165 inputLines := strings.Split(input, "\n") 166 167 for lineIdx, lineS := range inputLines { 168 var currentLine hbaLine 169 currentLine.input = strings.TrimSpace(lineS) 170 for remaining := lineS; remaining != ""; { 171 var currentField []String 172 remaining, currentField, err = nextFieldExpand(remaining) 173 if err != nil { 174 return res, errors.Wrapf(err, "line %d", lineIdx+1) 175 } 176 if len(currentField) > 0 { 177 currentLine.tokens = append(currentLine.tokens, currentField) 178 } 179 } 180 if len(currentLine.tokens) > 0 { 181 res.lines = append(res.lines, currentLine) 182 res.linenos = append(res.linenos, lineIdx+1) 183 } 184 } 185 return res, err 186 }