github.com/SuCicada/su-hugo@v1.0.0/parser/pageparser/pageparser.go (about)

     1  // Copyright 2019 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package pageparser
    15  
    16  import (
    17  	"bytes"
    18  	"errors"
    19  	"fmt"
    20  	"io"
    21  	"io/ioutil"
    22  
    23  	"github.com/gohugoio/hugo/parser/metadecoders"
    24  )
    25  
    26  // Result holds the parse result.
    27  type Result interface {
    28  	// Iterator returns a new Iterator positioned at the beginning of the parse tree.
    29  	Iterator() *Iterator
    30  	// Input returns the input to Parse.
    31  	Input() []byte
    32  }
    33  
    34  var _ Result = (*pageLexer)(nil)
    35  
    36  // Parse parses the page in the given reader according to the given Config.
    37  func Parse(r io.Reader, cfg Config) (Result, error) {
    38  	return parseSection(r, cfg, lexIntroSection)
    39  }
    40  
    41  type ContentFrontMatter struct {
    42  	Content           []byte
    43  	FrontMatter       map[string]any
    44  	FrontMatterFormat metadecoders.Format
    45  }
    46  
    47  // ParseFrontMatterAndContent is a convenience method to extract front matter
    48  // and content from a content page.
    49  func ParseFrontMatterAndContent(r io.Reader) (ContentFrontMatter, error) {
    50  	var cf ContentFrontMatter
    51  
    52  	psr, err := Parse(r, Config{})
    53  	if err != nil {
    54  		return cf, err
    55  	}
    56  
    57  	var frontMatterSource []byte
    58  
    59  	iter := psr.Iterator()
    60  
    61  	walkFn := func(item Item) bool {
    62  		if frontMatterSource != nil {
    63  			// The rest is content.
    64  			cf.Content = psr.Input()[item.low:]
    65  			// Done
    66  			return false
    67  		} else if item.IsFrontMatter() {
    68  			cf.FrontMatterFormat = FormatFromFrontMatterType(item.Type)
    69  			frontMatterSource = item.Val(psr.Input())
    70  		}
    71  		return true
    72  	}
    73  
    74  	iter.PeekWalk(walkFn)
    75  
    76  	cf.FrontMatter, err = metadecoders.Default.UnmarshalToMap(frontMatterSource, cf.FrontMatterFormat)
    77  	return cf, err
    78  }
    79  
    80  func FormatFromFrontMatterType(typ ItemType) metadecoders.Format {
    81  	switch typ {
    82  	case TypeFrontMatterJSON:
    83  		return metadecoders.JSON
    84  	case TypeFrontMatterORG:
    85  		return metadecoders.ORG
    86  	case TypeFrontMatterTOML:
    87  		return metadecoders.TOML
    88  	case TypeFrontMatterYAML:
    89  		return metadecoders.YAML
    90  	default:
    91  		return ""
    92  	}
    93  }
    94  
    95  // ParseMain parses starting with the main section. Used in tests.
    96  func ParseMain(r io.Reader, cfg Config) (Result, error) {
    97  	return parseSection(r, cfg, lexMainSection)
    98  }
    99  
   100  func parseSection(r io.Reader, cfg Config, start stateFunc) (Result, error) {
   101  	b, err := ioutil.ReadAll(r)
   102  	if err != nil {
   103  		return nil, fmt.Errorf("failed to read page content: %w", err)
   104  	}
   105  	return parseBytes(b, cfg, start)
   106  }
   107  
   108  func parseBytes(b []byte, cfg Config, start stateFunc) (Result, error) {
   109  	lexer := newPageLexer(b, start, cfg)
   110  	lexer.run()
   111  	return lexer, nil
   112  }
   113  
   114  // NewIterator creates a new Iterator.
   115  func NewIterator(items Items) *Iterator {
   116  	return &Iterator{items: items, lastPos: -1}
   117  }
   118  
   119  // An Iterator has methods to iterate a parsed page with support going back
   120  // if needed.
   121  type Iterator struct {
   122  	items   Items
   123  	lastPos int // position of the last item returned by nextItem
   124  }
   125  
   126  // consumes and returns the next item
   127  func (t *Iterator) Next() Item {
   128  	t.lastPos++
   129  	return t.Current()
   130  }
   131  
   132  var errIndexOutOfBounds = Item{Type: tError, Err: errors.New("no more tokens")}
   133  
   134  // Current will repeatably return the current item.
   135  func (t *Iterator) Current() Item {
   136  	if t.lastPos >= len(t.items) {
   137  		return errIndexOutOfBounds
   138  	}
   139  	return t.items[t.lastPos]
   140  }
   141  
   142  // backs up one token.
   143  func (t *Iterator) Backup() {
   144  	if t.lastPos < 0 {
   145  		panic("need to go forward before going back")
   146  	}
   147  	t.lastPos--
   148  }
   149  
   150  // Pos returns the current position in the input.
   151  func (t *Iterator) Pos() int {
   152  	return t.lastPos
   153  }
   154  
   155  // check for non-error and non-EOF types coming next
   156  func (t *Iterator) IsValueNext() bool {
   157  	i := t.Peek()
   158  	return i.Type != tError && i.Type != tEOF
   159  }
   160  
   161  // look at, but do not consume, the next item
   162  // repeated, sequential calls will return the same item
   163  func (t *Iterator) Peek() Item {
   164  	return t.items[t.lastPos+1]
   165  }
   166  
   167  // PeekWalk will feed the next items in the iterator to walkFn
   168  // until it returns false.
   169  func (t *Iterator) PeekWalk(walkFn func(item Item) bool) {
   170  	for i := t.lastPos + 1; i < len(t.items); i++ {
   171  		item := t.items[i]
   172  		if !walkFn(item) {
   173  			break
   174  		}
   175  	}
   176  }
   177  
   178  // Consume is a convenience method to consume the next n tokens,
   179  // but back off Errors and EOF.
   180  func (t *Iterator) Consume(cnt int) {
   181  	for i := 0; i < cnt; i++ {
   182  		token := t.Next()
   183  		if token.Type == tError || token.Type == tEOF {
   184  			t.Backup()
   185  			break
   186  		}
   187  	}
   188  }
   189  
   190  // LineNumber returns the current line number. Used for logging.
   191  func (t *Iterator) LineNumber(source []byte) int {
   192  	return bytes.Count(source[:t.Current().low], lf) + 1
   193  }
   194  
   195  // IsProbablySourceOfItems returns true if the given source looks like original
   196  // source of the items.
   197  // There may be some false positives, but that is highly unlikely and good enough
   198  // for the planned purpose.
   199  // It will also return false if the last item is not EOF (error situations) and
   200  // true if both source and items are empty.
   201  func IsProbablySourceOfItems(source []byte, items Items) bool {
   202  	if len(source) == 0 && len(items) == 0 {
   203  		return false
   204  	}
   205  	if len(items) == 0 {
   206  		return false
   207  	}
   208  
   209  	last := items[len(items)-1]
   210  	if last.Type != tEOF {
   211  		return false
   212  	}
   213  
   214  	if last.Pos() != len(source) {
   215  		return false
   216  	}
   217  
   218  	for _, item := range items {
   219  		if item.Type == tError {
   220  			return false
   221  		}
   222  		if item.Type == tEOF {
   223  			return true
   224  		}
   225  
   226  		if item.Pos() >= len(source) {
   227  			return false
   228  		}
   229  
   230  		if item.firstByte != source[item.Pos()] {
   231  			return false
   232  		}
   233  	}
   234  
   235  	return true
   236  }