github.com/Seikaijyu/gio@v0.0.1/text/family_parser.go (about) 1 package text 2 3 import ( 4 "fmt" 5 "strings" 6 "unicode" 7 "unicode/utf8" 8 ) 9 10 type tokenKind uint8 11 12 const ( 13 tokenStr tokenKind = iota 14 tokenComma 15 tokenEOF 16 ) 17 18 type token struct { 19 kind tokenKind 20 value string 21 } 22 23 func (t token) String() string { 24 switch t.kind { 25 case tokenStr: 26 return t.value 27 case tokenComma: 28 return "," 29 case tokenEOF: 30 return "EOF" 31 default: 32 return "unknown" 33 } 34 } 35 36 type lexState func(*lexer) lexState 37 38 func lexText(l *lexer) lexState { 39 for { 40 switch r := l.next(); { 41 case r == -1: 42 l.ignore() 43 l.emit(tokenEOF) 44 return nil 45 case unicode.IsSpace(r): 46 continue 47 case r == ',': 48 l.ignore() 49 l.emit(tokenComma) 50 case r == '"': 51 l.ignore() 52 return lexDquote 53 case r == '\'': 54 l.ignore() 55 return lexSquote 56 default: 57 return lexBareStr 58 } 59 } 60 } 61 62 func lexBareStr(l *lexer) lexState { 63 defer l.emitProcessed(tokenStr, func(s string) (string, error) { 64 return strings.TrimSpace(s), nil 65 }) 66 for { 67 if strings.HasPrefix(l.input[l.pos:], `,`) { 68 return lexText 69 } 70 switch r := l.next(); { 71 case r == -1: 72 return lexText 73 } 74 } 75 } 76 77 func lexDquote(l *lexer) lexState { 78 return lexQuote(l, `"`) 79 } 80 81 func lexSquote(l *lexer) lexState { 82 return lexQuote(l, `'`) 83 } 84 85 func unescape(s string, quote rune) (string, error) { 86 var b strings.Builder 87 hitNonSpace := false 88 var wb strings.Builder 89 for i := 0; i < len(s); { 90 r, sz := utf8.DecodeRuneInString(s[i:]) 91 i += sz 92 if unicode.IsSpace(r) { 93 if !hitNonSpace { 94 continue 95 } 96 wb.WriteRune(r) 97 continue 98 } 99 hitNonSpace = true 100 // If we get here, we're not looking at whitespace. 101 // Insert any buffered up whitespace characters from 102 // the gap between words. 103 b.WriteString(wb.String()) 104 wb.Reset() 105 if r == '\\' { 106 r, sz := utf8.DecodeRuneInString(s[i:]) 107 i += sz 108 switch r { 109 case '\\', quote: 110 b.WriteRune(r) 111 default: 112 return "", fmt.Errorf("illegal escape sequence \\%c", r) 113 } 114 } else { 115 b.WriteRune(r) 116 } 117 } 118 return b.String(), nil 119 } 120 121 func lexQuote(l *lexer, mark string) lexState { 122 escaping := false 123 for { 124 if isQuote := strings.HasPrefix(l.input[l.pos:], mark); isQuote && !escaping { 125 err := l.emitProcessed(tokenStr, func(s string) (string, error) { 126 return unescape(s, []rune(mark)[0]) 127 }) 128 if err != nil { 129 l.err = err 130 return nil 131 } 132 l.next() 133 l.ignore() 134 return lexText 135 } 136 escaped := escaping 137 switch r := l.next(); { 138 case r == -1: 139 l.err = fmt.Errorf("unexpected EOF while parsing %s-quoted family", mark) 140 return lexText 141 case r == '\\': 142 if !escaped { 143 escaping = true 144 } 145 } 146 if escaped { 147 escaping = false 148 } 149 } 150 } 151 152 type lexer struct { 153 input string 154 pos int 155 tokens []token 156 err error 157 } 158 159 func (l *lexer) ignore() { 160 l.input = l.input[l.pos:] 161 l.pos = 0 162 } 163 164 // next decodes the next rune in the input and returns it. 165 func (l *lexer) next() int32 { 166 if l.pos >= len(l.input) { 167 return -1 168 } 169 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 170 l.pos += w 171 return r 172 } 173 174 // emit adds a token of the given kind. 175 func (l *lexer) emit(t tokenKind) { 176 l.emitProcessed(t, func(s string) (string, error) { return s, nil }) 177 } 178 179 // emitProcessed adds a token of the given kind, but transforms its value 180 // with the provided closure first. 181 func (l *lexer) emitProcessed(t tokenKind, f func(string) (string, error)) error { 182 val, err := f(l.input[:l.pos]) 183 l.tokens = append(l.tokens, token{ 184 kind: t, 185 value: val, 186 }) 187 l.ignore() 188 return err 189 } 190 191 // run executes the lexer on the given input. 192 func (l *lexer) run(input string) ([]token, error) { 193 l.input = input 194 l.tokens = l.tokens[:0] 195 l.pos = 0 196 for state := lexText; state != nil; { 197 state = state(l) 198 } 199 return l.tokens, l.err 200 } 201 202 // parser implements a simple recursive descent parser for font family fallback 203 // expressions. 204 type parser struct { 205 faces []string 206 lexer lexer 207 tokens []token 208 } 209 210 // parse the provided rule and return the extracted font families. The returned families 211 // are valid only until the next call to parse. If parsing fails, an error describing the 212 // failure is returned instead. 213 func (p *parser) parse(rule string) ([]string, error) { 214 var err error 215 p.tokens, err = p.lexer.run(rule) 216 if err != nil { 217 return nil, err 218 } 219 p.faces = p.faces[:0] 220 return p.faces, p.parseList() 221 } 222 223 // parse implements the production: 224 // 225 // LIST ::= <FACE> <COMMA> <LIST> | <FACE> 226 func (p *parser) parseList() error { 227 if len(p.tokens) < 0 { 228 return fmt.Errorf("expected family name, got EOF") 229 } 230 if head := p.tokens[0]; head.kind != tokenStr { 231 return fmt.Errorf("expected family name, got %s", head) 232 } else { 233 p.faces = append(p.faces, head.value) 234 p.tokens = p.tokens[1:] 235 } 236 237 switch head := p.tokens[0]; head.kind { 238 case tokenEOF: 239 return nil 240 case tokenComma: 241 p.tokens = p.tokens[1:] 242 return p.parseList() 243 default: 244 return fmt.Errorf("unexpected token %s", head) 245 } 246 }