github.com/utopiagio/gio@v0.0.8/text/family_parser.go (about)

     1  package text
     2  
     3  import (
     4  	"fmt"
     5  	"strings"
     6  	"unicode"
     7  	"unicode/utf8"
     8  )
     9  
    10  type tokenKind uint8
    11  
    12  const (
    13  	tokenStr tokenKind = iota
    14  	tokenComma
    15  	tokenEOF
    16  )
    17  
    18  type token struct {
    19  	kind  tokenKind
    20  	value string
    21  }
    22  
    23  func (t token) String() string {
    24  	switch t.kind {
    25  	case tokenStr:
    26  		return t.value
    27  	case tokenComma:
    28  		return ","
    29  	case tokenEOF:
    30  		return "EOF"
    31  	default:
    32  		return "unknown"
    33  	}
    34  }
    35  
    36  type lexState func(*lexer) lexState
    37  
    38  func lexText(l *lexer) lexState {
    39  	for {
    40  		switch r := l.next(); {
    41  		case r == -1:
    42  			l.ignore()
    43  			l.emit(tokenEOF)
    44  			return nil
    45  		case unicode.IsSpace(r):
    46  			continue
    47  		case r == ',':
    48  			l.ignore()
    49  			l.emit(tokenComma)
    50  		case r == '"':
    51  			l.ignore()
    52  			return lexDquote
    53  		case r == '\'':
    54  			l.ignore()
    55  			return lexSquote
    56  		default:
    57  			return lexBareStr
    58  		}
    59  	}
    60  }
    61  
    62  func lexBareStr(l *lexer) lexState {
    63  	defer l.emitProcessed(tokenStr, func(s string) (string, error) {
    64  		return strings.TrimSpace(s), nil
    65  	})
    66  	for {
    67  		if strings.HasPrefix(l.input[l.pos:], `,`) {
    68  			return lexText
    69  		}
    70  		switch r := l.next(); {
    71  		case r == -1:
    72  			return lexText
    73  		}
    74  	}
    75  }
    76  
    77  func lexDquote(l *lexer) lexState {
    78  	return lexQuote(l, `"`)
    79  }
    80  
    81  func lexSquote(l *lexer) lexState {
    82  	return lexQuote(l, `'`)
    83  }
    84  
    85  func unescape(s string, quote rune) (string, error) {
    86  	var b strings.Builder
    87  	hitNonSpace := false
    88  	var wb strings.Builder
    89  	for i := 0; i < len(s); {
    90  		r, sz := utf8.DecodeRuneInString(s[i:])
    91  		i += sz
    92  		if unicode.IsSpace(r) {
    93  			if !hitNonSpace {
    94  				continue
    95  			}
    96  			wb.WriteRune(r)
    97  			continue
    98  		}
    99  		hitNonSpace = true
   100  		// If we get here, we're not looking at whitespace.
   101  		// Insert any buffered up whitespace characters from
   102  		// the gap between words.
   103  		b.WriteString(wb.String())
   104  		wb.Reset()
   105  		if r == '\\' {
   106  			r, sz := utf8.DecodeRuneInString(s[i:])
   107  			i += sz
   108  			switch r {
   109  			case '\\', quote:
   110  				b.WriteRune(r)
   111  			default:
   112  				return "", fmt.Errorf("illegal escape sequence \\%c", r)
   113  			}
   114  		} else {
   115  			b.WriteRune(r)
   116  		}
   117  	}
   118  	return b.String(), nil
   119  }
   120  
   121  func lexQuote(l *lexer, mark string) lexState {
   122  	escaping := false
   123  	for {
   124  		if isQuote := strings.HasPrefix(l.input[l.pos:], mark); isQuote && !escaping {
   125  			err := l.emitProcessed(tokenStr, func(s string) (string, error) {
   126  				return unescape(s, []rune(mark)[0])
   127  			})
   128  			if err != nil {
   129  				l.err = err
   130  				return nil
   131  			}
   132  			l.next()
   133  			l.ignore()
   134  			return lexText
   135  		}
   136  		escaped := escaping
   137  		switch r := l.next(); {
   138  		case r == -1:
   139  			l.err = fmt.Errorf("unexpected EOF while parsing %s-quoted family", mark)
   140  			return lexText
   141  		case r == '\\':
   142  			if !escaped {
   143  				escaping = true
   144  			}
   145  		}
   146  		if escaped {
   147  			escaping = false
   148  		}
   149  	}
   150  }
   151  
   152  type lexer struct {
   153  	input  string
   154  	pos    int
   155  	tokens []token
   156  	err    error
   157  }
   158  
   159  func (l *lexer) ignore() {
   160  	l.input = l.input[l.pos:]
   161  	l.pos = 0
   162  }
   163  
   164  // next decodes the next rune in the input and returns it.
   165  func (l *lexer) next() int32 {
   166  	if l.pos >= len(l.input) {
   167  		return -1
   168  	}
   169  	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
   170  	l.pos += w
   171  	return r
   172  }
   173  
   174  // emit adds a token of the given kind.
   175  func (l *lexer) emit(t tokenKind) {
   176  	l.emitProcessed(t, func(s string) (string, error) { return s, nil })
   177  }
   178  
   179  // emitProcessed adds a token of the given kind, but transforms its value
   180  // with the provided closure first.
   181  func (l *lexer) emitProcessed(t tokenKind, f func(string) (string, error)) error {
   182  	val, err := f(l.input[:l.pos])
   183  	l.tokens = append(l.tokens, token{
   184  		kind:  t,
   185  		value: val,
   186  	})
   187  	l.ignore()
   188  	return err
   189  }
   190  
   191  // run executes the lexer on the given input.
   192  func (l *lexer) run(input string) ([]token, error) {
   193  	l.input = input
   194  	l.tokens = l.tokens[:0]
   195  	l.pos = 0
   196  	for state := lexText; state != nil; {
   197  		state = state(l)
   198  	}
   199  	return l.tokens, l.err
   200  }
   201  
   202  // parser implements a simple recursive descent parser for font family fallback
   203  // expressions.
   204  type parser struct {
   205  	faces  []string
   206  	lexer  lexer
   207  	tokens []token
   208  }
   209  
   210  // parse the provided rule and return the extracted font families. The returned families
   211  // are valid only until the next call to parse. If parsing fails, an error describing the
   212  // failure is returned instead.
   213  func (p *parser) parse(rule string) ([]string, error) {
   214  	var err error
   215  	p.tokens, err = p.lexer.run(rule)
   216  	if err != nil {
   217  		return nil, err
   218  	}
   219  	p.faces = p.faces[:0]
   220  	return p.faces, p.parseList()
   221  }
   222  
   223  // parse implements the production:
   224  //
   225  //	LIST ::= <FACE> <COMMA> <LIST> | <FACE>
   226  func (p *parser) parseList() error {
   227  	if len(p.tokens) < 0 {
   228  		return fmt.Errorf("expected family name, got EOF")
   229  	}
   230  	if head := p.tokens[0]; head.kind != tokenStr {
   231  		return fmt.Errorf("expected family name, got %s", head)
   232  	} else {
   233  		p.faces = append(p.faces, head.value)
   234  		p.tokens = p.tokens[1:]
   235  	}
   236  
   237  	switch head := p.tokens[0]; head.kind {
   238  	case tokenEOF:
   239  		return nil
   240  	case tokenComma:
   241  		p.tokens = p.tokens[1:]
   242  		return p.parseList()
   243  	default:
   244  		return fmt.Errorf("unexpected token %s", head)
   245  	}
   246  }