github.com/SuCicada/su-hugo@v1.0.0/parser/pageparser/pagelexer_shortcode.go (about)

     1  // Copyright 2018 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package pageparser
    15  
    16  type lexerShortcodeState struct {
    17  	currLeftDelimItem  ItemType
    18  	currRightDelimItem ItemType
    19  	isInline           bool
    20  	currShortcodeName  string          // is only set when a shortcode is in opened state
    21  	closingState       int             // > 0 = on its way to be closed
    22  	elementStepNum     int             // step number in element
    23  	paramElements      int             // number of elements (name + value = 2) found first
    24  	openShortcodes     map[string]bool // set of shortcodes in open state
    25  
    26  }
    27  
    28  // Shortcode syntax
    29  var (
    30  	leftDelimSc            = []byte("{{")
    31  	leftDelimScNoMarkup    = []byte("{{<")
    32  	rightDelimScNoMarkup   = []byte(">}}")
    33  	leftDelimScWithMarkup  = []byte("{{%")
    34  	rightDelimScWithMarkup = []byte("%}}")
    35  	leftComment            = []byte("/*") // comments in this context us used to to mark shortcodes as "not really a shortcode"
    36  	rightComment           = []byte("*/")
    37  )
    38  
    39  func (l *pageLexer) isShortCodeStart() bool {
    40  	return l.hasPrefix(leftDelimScWithMarkup) || l.hasPrefix(leftDelimScNoMarkup)
    41  }
    42  
    43  func lexShortcodeLeftDelim(l *pageLexer) stateFunc {
    44  	l.pos += len(l.currentLeftShortcodeDelim())
    45  	if l.hasPrefix(leftComment) {
    46  		return lexShortcodeComment
    47  	}
    48  	l.emit(l.currentLeftShortcodeDelimItem())
    49  	l.elementStepNum = 0
    50  	l.paramElements = 0
    51  	return lexInsideShortcode
    52  }
    53  
    54  func lexShortcodeComment(l *pageLexer) stateFunc {
    55  	posRightComment := l.index(append(rightComment, l.currentRightShortcodeDelim()...))
    56  	if posRightComment <= 1 {
    57  		return l.errorf("comment must be closed")
    58  	}
    59  	// we emit all as text, except the comment markers
    60  	l.emit(tText)
    61  	l.pos += len(leftComment)
    62  	l.ignore()
    63  	l.pos += posRightComment - len(leftComment)
    64  	l.emit(tText)
    65  	l.pos += len(rightComment)
    66  	l.ignore()
    67  	l.pos += len(l.currentRightShortcodeDelim())
    68  	l.emit(tText)
    69  	return lexMainSection
    70  }
    71  
    72  func lexShortcodeRightDelim(l *pageLexer) stateFunc {
    73  	l.closingState = 0
    74  	l.pos += len(l.currentRightShortcodeDelim())
    75  	l.emit(l.currentRightShortcodeDelimItem())
    76  	return lexMainSection
    77  }
    78  
    79  // either:
    80  // 1. param
    81  // 2. "param" or "param\"
    82  // 3. param="123" or param="123\"
    83  // 4. param="Some \"escaped\" text"
    84  // 5. `param`
    85  // 6. param=`123`
    86  func lexShortcodeParam(l *pageLexer, escapedQuoteStart bool) stateFunc {
    87  	first := true
    88  	nextEq := false
    89  
    90  	var r rune
    91  
    92  	for {
    93  		r = l.next()
    94  		if first {
    95  			if r == '"' || (r == '`' && !escapedQuoteStart) {
    96  				// a positional param with quotes
    97  				if l.paramElements == 2 {
    98  					return l.errorf("got quoted positional parameter. Cannot mix named and positional parameters")
    99  				}
   100  				l.paramElements = 1
   101  				l.backup()
   102  				if r == '"' {
   103  					return lexShortcodeQuotedParamVal(l, !escapedQuoteStart, tScParam)
   104  				}
   105  				return lexShortCodeParamRawStringVal(l, tScParam)
   106  
   107  			} else if r == '`' && escapedQuoteStart {
   108  				return l.errorf("unrecognized escape character")
   109  			}
   110  			first = false
   111  		} else if r == '=' {
   112  			// a named param
   113  			l.backup()
   114  			nextEq = true
   115  			break
   116  		}
   117  
   118  		if !isAlphaNumericOrHyphen(r) && r != '.' { // Floats have period
   119  			l.backup()
   120  			break
   121  		}
   122  	}
   123  
   124  	if l.paramElements == 0 {
   125  		l.paramElements++
   126  
   127  		if nextEq {
   128  			l.paramElements++
   129  		}
   130  	} else {
   131  		if nextEq && l.paramElements == 1 {
   132  			return l.errorf("got named parameter '%s'. Cannot mix named and positional parameters", l.current())
   133  		} else if !nextEq && l.paramElements == 2 {
   134  			return l.errorf("got positional parameter '%s'. Cannot mix named and positional parameters", l.current())
   135  		}
   136  	}
   137  
   138  	l.emit(tScParam)
   139  	return lexInsideShortcode
   140  }
   141  
   142  func lexShortcodeParamVal(l *pageLexer) stateFunc {
   143  	l.consumeToSpace()
   144  	l.emit(tScParamVal)
   145  	return lexInsideShortcode
   146  }
   147  
   148  func lexShortCodeParamRawStringVal(l *pageLexer, typ ItemType) stateFunc {
   149  	openBacktickFound := false
   150  
   151  Loop:
   152  	for {
   153  		switch r := l.next(); {
   154  		case r == '`':
   155  			if openBacktickFound {
   156  				l.backup()
   157  				break Loop
   158  			} else {
   159  				openBacktickFound = true
   160  				l.ignore()
   161  			}
   162  		case r == eof:
   163  			return l.errorf("unterminated raw string in shortcode parameter-argument: '%s'", l.current())
   164  		}
   165  	}
   166  
   167  	l.emitString(typ)
   168  	l.next()
   169  	l.ignore()
   170  
   171  	return lexInsideShortcode
   172  }
   173  
   174  func lexShortcodeQuotedParamVal(l *pageLexer, escapedQuotedValuesAllowed bool, typ ItemType) stateFunc {
   175  	openQuoteFound := false
   176  	escapedInnerQuoteFound := false
   177  	escapedQuoteState := 0
   178  
   179  Loop:
   180  	for {
   181  		switch r := l.next(); {
   182  		case r == '\\':
   183  			if l.peek() == '"' {
   184  				if openQuoteFound && !escapedQuotedValuesAllowed {
   185  					l.backup()
   186  					break Loop
   187  				} else if openQuoteFound {
   188  					// the coming quote is inside
   189  					escapedInnerQuoteFound = true
   190  					escapedQuoteState = 1
   191  				}
   192  			} else if l.peek() == '`' {
   193  				return l.errorf("unrecognized escape character")
   194  			}
   195  		case r == eof, r == '\n':
   196  			return l.errorf("unterminated quoted string in shortcode parameter-argument: '%s'", l.current())
   197  		case r == '"':
   198  			if escapedQuoteState == 0 {
   199  				if openQuoteFound {
   200  					l.backup()
   201  					break Loop
   202  
   203  				} else {
   204  					openQuoteFound = true
   205  					l.ignore()
   206  				}
   207  			} else {
   208  				escapedQuoteState = 0
   209  			}
   210  		}
   211  	}
   212  
   213  	if escapedInnerQuoteFound {
   214  		l.ignoreEscapesAndEmit(typ, true)
   215  	} else {
   216  		l.emitString(typ)
   217  	}
   218  
   219  	r := l.next()
   220  
   221  	if r == '\\' {
   222  		if l.peek() == '"' {
   223  			// ignore the escaped closing quote
   224  			l.ignore()
   225  			l.next()
   226  			l.ignore()
   227  		}
   228  	} else if r == '"' {
   229  		// ignore closing quote
   230  		l.ignore()
   231  	} else {
   232  		// handled by next state
   233  		l.backup()
   234  	}
   235  
   236  	return lexInsideShortcode
   237  }
   238  
   239  // Inline shortcodes has the form {{< myshortcode.inline >}}
   240  var inlineIdentifier = []byte("inline ")
   241  
   242  // scans an alphanumeric inside shortcode
   243  func lexIdentifierInShortcode(l *pageLexer) stateFunc {
   244  	lookForEnd := false
   245  Loop:
   246  	for {
   247  		switch r := l.next(); {
   248  		case isAlphaNumericOrHyphen(r):
   249  		// Allow forward slash inside names to make it possible to create namespaces.
   250  		case r == '/':
   251  		case r == '.':
   252  			l.isInline = l.hasPrefix(inlineIdentifier)
   253  			if !l.isInline {
   254  				return l.errorf("period in shortcode name only allowed for inline identifiers")
   255  			}
   256  		default:
   257  			l.backup()
   258  			word := string(l.input[l.start:l.pos])
   259  			if l.closingState > 0 && !l.openShortcodes[word] {
   260  				return l.errorf("closing tag for shortcode '%s' does not match start tag", word)
   261  			} else if l.closingState > 0 {
   262  				l.openShortcodes[word] = false
   263  				lookForEnd = true
   264  			}
   265  
   266  			l.closingState = 0
   267  			l.currShortcodeName = word
   268  			l.openShortcodes[word] = true
   269  			l.elementStepNum++
   270  			if l.isInline {
   271  				l.emit(tScNameInline)
   272  			} else {
   273  				l.emit(tScName)
   274  			}
   275  			break Loop
   276  		}
   277  	}
   278  
   279  	if lookForEnd {
   280  		return lexEndOfShortcode
   281  	}
   282  	return lexInsideShortcode
   283  }
   284  
   285  func lexEndOfShortcode(l *pageLexer) stateFunc {
   286  	l.isInline = false
   287  	if l.hasPrefix(l.currentRightShortcodeDelim()) {
   288  		return lexShortcodeRightDelim
   289  	}
   290  	switch r := l.next(); {
   291  	case isSpace(r):
   292  		l.ignore()
   293  	default:
   294  		return l.errorf("unclosed shortcode")
   295  	}
   296  	return lexEndOfShortcode
   297  }
   298  
   299  // scans the elements inside shortcode tags
   300  func lexInsideShortcode(l *pageLexer) stateFunc {
   301  	if l.hasPrefix(l.currentRightShortcodeDelim()) {
   302  		return lexShortcodeRightDelim
   303  	}
   304  	switch r := l.next(); {
   305  	case r == eof:
   306  		// eol is allowed inside shortcodes; this may go to end of document before it fails
   307  		return l.errorf("unclosed shortcode action")
   308  	case isSpace(r), isEndOfLine(r):
   309  		l.ignore()
   310  	case r == '=':
   311  		l.consumeSpace()
   312  		l.ignore()
   313  		peek := l.peek()
   314  		if peek == '"' || peek == '\\' {
   315  			return lexShortcodeQuotedParamVal(l, peek != '\\', tScParamVal)
   316  		} else if peek == '`' {
   317  			return lexShortCodeParamRawStringVal(l, tScParamVal)
   318  		}
   319  		return lexShortcodeParamVal
   320  	case r == '/':
   321  		if l.currShortcodeName == "" {
   322  			return l.errorf("got closing shortcode, but none is open")
   323  		}
   324  		l.closingState++
   325  		l.isInline = false
   326  		l.emit(tScClose)
   327  	case r == '\\':
   328  		l.ignore()
   329  		if l.peek() == '"' || l.peek() == '`' {
   330  			return lexShortcodeParam(l, true)
   331  		}
   332  	case l.elementStepNum > 0 && (isAlphaNumericOrHyphen(r) || r == '"' || r == '`'): // positional params can have quotes
   333  		l.backup()
   334  		return lexShortcodeParam(l, false)
   335  	case isAlphaNumeric(r):
   336  		l.backup()
   337  		return lexIdentifierInShortcode
   338  	default:
   339  		return l.errorf("unrecognized character in shortcode action: %#U. Note: Parameters with non-alphanumeric args must be quoted", r)
   340  	}
   341  	return lexInsideShortcode
   342  }
   343  
   344  func (l *pageLexer) currentLeftShortcodeDelimItem() ItemType {
   345  	return l.currLeftDelimItem
   346  }
   347  
   348  func (l *pageLexer) currentRightShortcodeDelimItem() ItemType {
   349  	return l.currRightDelimItem
   350  }
   351  
   352  func (l *pageLexer) currentLeftShortcodeDelim() []byte {
   353  	if l.currLeftDelimItem == tLeftDelimScWithMarkup {
   354  		return leftDelimScWithMarkup
   355  	}
   356  	return leftDelimScNoMarkup
   357  }
   358  
   359  func (l *pageLexer) currentRightShortcodeDelim() []byte {
   360  	if l.currRightDelimItem == tRightDelimScWithMarkup {
   361  		return rightDelimScWithMarkup
   362  	}
   363  	return rightDelimScNoMarkup
   364  }