github.com/anakojm/hugo-katex@v0.0.0-20231023141351-42d6f5de9c0b/parser/pageparser/pageparser.go (about)

     1  // Copyright 2019 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package pageparser
    15  
    16  import (
    17  	"bytes"
    18  	"errors"
    19  	"fmt"
    20  	"io"
    21  	"regexp"
    22  	"strings"
    23  
    24  	"github.com/gohugoio/hugo/parser/metadecoders"
    25  )
    26  
    27  // Result holds the parse result.
    28  type Result interface {
    29  	// Iterator returns a new Iterator positioned at the beginning of the parse tree.
    30  	Iterator() *Iterator
    31  	// Input returns the input to Parse.
    32  	Input() []byte
    33  }
    34  
    35  var _ Result = (*pageLexer)(nil)
    36  
    37  // Parse parses the page in the given reader according to the given Config.
    38  func Parse(r io.Reader, cfg Config) (Result, error) {
    39  	return parseSection(r, cfg, lexIntroSection)
    40  }
    41  
    42  type ContentFrontMatter struct {
    43  	Content           []byte
    44  	FrontMatter       map[string]any
    45  	FrontMatterFormat metadecoders.Format
    46  }
    47  
    48  // ParseFrontMatterAndContent is a convenience method to extract front matter
    49  // and content from a content page.
    50  func ParseFrontMatterAndContent(r io.Reader) (ContentFrontMatter, error) {
    51  	var cf ContentFrontMatter
    52  
    53  	psr, err := Parse(r, Config{})
    54  	if err != nil {
    55  		return cf, err
    56  	}
    57  
    58  	var frontMatterSource []byte
    59  
    60  	iter := psr.Iterator()
    61  
    62  	walkFn := func(item Item) bool {
    63  		if frontMatterSource != nil {
    64  			// The rest is content.
    65  			cf.Content = psr.Input()[item.low:]
    66  			// Done
    67  			return false
    68  		} else if item.IsFrontMatter() {
    69  			cf.FrontMatterFormat = FormatFromFrontMatterType(item.Type)
    70  			frontMatterSource = item.Val(psr.Input())
    71  		}
    72  		return true
    73  	}
    74  
    75  	iter.PeekWalk(walkFn)
    76  
    77  	cf.FrontMatter, err = metadecoders.Default.UnmarshalToMap(frontMatterSource, cf.FrontMatterFormat)
    78  	return cf, err
    79  }
    80  
    81  func FormatFromFrontMatterType(typ ItemType) metadecoders.Format {
    82  	switch typ {
    83  	case TypeFrontMatterJSON:
    84  		return metadecoders.JSON
    85  	case TypeFrontMatterORG:
    86  		return metadecoders.ORG
    87  	case TypeFrontMatterTOML:
    88  		return metadecoders.TOML
    89  	case TypeFrontMatterYAML:
    90  		return metadecoders.YAML
    91  	default:
    92  		return ""
    93  	}
    94  }
    95  
    96  // ParseMain parses starting with the main section. Used in tests.
    97  func ParseMain(r io.Reader, cfg Config) (Result, error) {
    98  	return parseSection(r, cfg, lexMainSection)
    99  }
   100  
   101  func parseSection(r io.Reader, cfg Config, start stateFunc) (Result, error) {
   102  	b, err := io.ReadAll(r)
   103  	if err != nil {
   104  		return nil, fmt.Errorf("failed to read page content: %w", err)
   105  	}
   106  	return parseBytes(b, cfg, start)
   107  }
   108  
   109  func parseBytes(b []byte, cfg Config, start stateFunc) (Result, error) {
   110  	lexer := newPageLexer(b, start, cfg)
   111  	lexer.run()
   112  	return lexer, nil
   113  }
   114  
   115  // NewIterator creates a new Iterator.
   116  func NewIterator(items Items) *Iterator {
   117  	return &Iterator{items: items, lastPos: -1}
   118  }
   119  
   120  // An Iterator has methods to iterate a parsed page with support going back
   121  // if needed.
   122  type Iterator struct {
   123  	items   Items
   124  	lastPos int // position of the last item returned by nextItem
   125  }
   126  
   127  // consumes and returns the next item
   128  func (t *Iterator) Next() Item {
   129  	t.lastPos++
   130  	return t.Current()
   131  }
   132  
   133  var errIndexOutOfBounds = Item{Type: tError, Err: errors.New("no more tokens")}
   134  
   135  // Current will repeatably return the current item.
   136  func (t *Iterator) Current() Item {
   137  	if t.lastPos >= len(t.items) {
   138  		return errIndexOutOfBounds
   139  	}
   140  	return t.items[t.lastPos]
   141  }
   142  
   143  // backs up one token.
   144  func (t *Iterator) Backup() {
   145  	if t.lastPos < 0 {
   146  		panic("need to go forward before going back")
   147  	}
   148  	t.lastPos--
   149  }
   150  
   151  // Pos returns the current position in the input.
   152  func (t *Iterator) Pos() int {
   153  	return t.lastPos
   154  }
   155  
   156  // check for non-error and non-EOF types coming next
   157  func (t *Iterator) IsValueNext() bool {
   158  	i := t.Peek()
   159  	return i.Type != tError && i.Type != tEOF
   160  }
   161  
   162  // look at, but do not consume, the next item
   163  // repeated, sequential calls will return the same item
   164  func (t *Iterator) Peek() Item {
   165  	return t.items[t.lastPos+1]
   166  }
   167  
   168  // PeekWalk will feed the next items in the iterator to walkFn
   169  // until it returns false.
   170  func (t *Iterator) PeekWalk(walkFn func(item Item) bool) {
   171  	for i := t.lastPos + 1; i < len(t.items); i++ {
   172  		item := t.items[i]
   173  		if !walkFn(item) {
   174  			break
   175  		}
   176  	}
   177  }
   178  
   179  // Consume is a convenience method to consume the next n tokens,
   180  // but back off Errors and EOF.
   181  func (t *Iterator) Consume(cnt int) {
   182  	for i := 0; i < cnt; i++ {
   183  		token := t.Next()
   184  		if token.Type == tError || token.Type == tEOF {
   185  			t.Backup()
   186  			break
   187  		}
   188  	}
   189  }
   190  
   191  // LineNumber returns the current line number. Used for logging.
   192  func (t *Iterator) LineNumber(source []byte) int {
   193  	return bytes.Count(source[:t.Current().low], lf) + 1
   194  }
   195  
   196  // IsProbablySourceOfItems returns true if the given source looks like original
   197  // source of the items.
   198  // There may be some false positives, but that is highly unlikely and good enough
   199  // for the planned purpose.
   200  // It will also return false if the last item is not EOF (error situations) and
   201  // true if both source and items are empty.
   202  func IsProbablySourceOfItems(source []byte, items Items) bool {
   203  	if len(source) == 0 && len(items) == 0 {
   204  		return false
   205  	}
   206  	if len(items) == 0 {
   207  		return false
   208  	}
   209  
   210  	last := items[len(items)-1]
   211  	if last.Type != tEOF {
   212  		return false
   213  	}
   214  
   215  	if last.Pos() != len(source) {
   216  		return false
   217  	}
   218  
   219  	for _, item := range items {
   220  		if item.Type == tError {
   221  			return false
   222  		}
   223  		if item.Type == tEOF {
   224  			return true
   225  		}
   226  
   227  		if item.Pos() >= len(source) {
   228  			return false
   229  		}
   230  
   231  		if item.firstByte != source[item.Pos()] {
   232  			return false
   233  		}
   234  	}
   235  
   236  	return true
   237  }
   238  
   239  var hasShortcodeRe = regexp.MustCompile(`{{[%,<][^\/]`)
   240  
   241  // HasShortcode returns true if the given string contains a shortcode.
   242  func HasShortcode(s string) bool {
   243  	// Fast path for the common case.
   244  	if !strings.Contains(s, "{{") {
   245  		return false
   246  	}
   247  	return hasShortcodeRe.MatchString(s)
   248  }