github.com/kovansky/hugo@v0.92.3-0.20220224232819-63076e4ff19f/parser/pageparser/pageparser.go (about)

     1  // Copyright 2019 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package pageparser
    15  
    16  import (
    17  	"bytes"
    18  	"io"
    19  	"io/ioutil"
    20  
    21  	"github.com/gohugoio/hugo/parser/metadecoders"
    22  	"github.com/pkg/errors"
    23  )
    24  
    25  // Result holds the parse result.
    26  type Result interface {
    27  	// Iterator returns a new Iterator positioned at the beginning of the parse tree.
    28  	Iterator() *Iterator
    29  	// Input returns the input to Parse.
    30  	Input() []byte
    31  }
    32  
    33  var _ Result = (*pageLexer)(nil)
    34  
    35  // Parse parses the page in the given reader according to the given Config.
    36  // TODO(bep) now that we have improved the "lazy order" init, it *may* be
    37  // some potential saving in doing a buffered approach where the first pass does
    38  // the frontmatter only.
    39  func Parse(r io.Reader, cfg Config) (Result, error) {
    40  	return parseSection(r, cfg, lexIntroSection)
    41  }
    42  
    43  type ContentFrontMatter struct {
    44  	Content           []byte
    45  	FrontMatter       map[string]interface{}
    46  	FrontMatterFormat metadecoders.Format
    47  }
    48  
    49  // ParseFrontMatterAndContent is a convenience method to extract front matter
    50  // and content from a content page.
    51  func ParseFrontMatterAndContent(r io.Reader) (ContentFrontMatter, error) {
    52  	var cf ContentFrontMatter
    53  
    54  	psr, err := Parse(r, Config{})
    55  	if err != nil {
    56  		return cf, err
    57  	}
    58  
    59  	var frontMatterSource []byte
    60  
    61  	iter := psr.Iterator()
    62  
    63  	walkFn := func(item Item) bool {
    64  		if frontMatterSource != nil {
    65  			// The rest is content.
    66  			cf.Content = psr.Input()[item.Pos:]
    67  			// Done
    68  			return false
    69  		} else if item.IsFrontMatter() {
    70  			cf.FrontMatterFormat = FormatFromFrontMatterType(item.Type)
    71  			frontMatterSource = item.Val
    72  		}
    73  		return true
    74  	}
    75  
    76  	iter.PeekWalk(walkFn)
    77  
    78  	cf.FrontMatter, err = metadecoders.Default.UnmarshalToMap(frontMatterSource, cf.FrontMatterFormat)
    79  	return cf, err
    80  }
    81  
    82  func FormatFromFrontMatterType(typ ItemType) metadecoders.Format {
    83  	switch typ {
    84  	case TypeFrontMatterJSON:
    85  		return metadecoders.JSON
    86  	case TypeFrontMatterORG:
    87  		return metadecoders.ORG
    88  	case TypeFrontMatterTOML:
    89  		return metadecoders.TOML
    90  	case TypeFrontMatterYAML:
    91  		return metadecoders.YAML
    92  	default:
    93  		return ""
    94  	}
    95  }
    96  
    97  // ParseMain parses starting with the main section. Used in tests.
    98  func ParseMain(r io.Reader, cfg Config) (Result, error) {
    99  	return parseSection(r, cfg, lexMainSection)
   100  }
   101  
   102  func parseSection(r io.Reader, cfg Config, start stateFunc) (Result, error) {
   103  	b, err := ioutil.ReadAll(r)
   104  	if err != nil {
   105  		return nil, errors.Wrap(err, "failed to read page content")
   106  	}
   107  	return parseBytes(b, cfg, start)
   108  }
   109  
   110  func parseBytes(b []byte, cfg Config, start stateFunc) (Result, error) {
   111  	lexer := newPageLexer(b, start, cfg)
   112  	lexer.run()
   113  	return lexer, nil
   114  }
   115  
   116  // An Iterator has methods to iterate a parsed page with support going back
   117  // if needed.
   118  type Iterator struct {
   119  	l       *pageLexer
   120  	lastPos int // position of the last item returned by nextItem
   121  }
   122  
   123  // consumes and returns the next item
   124  func (t *Iterator) Next() Item {
   125  	t.lastPos++
   126  	return t.Current()
   127  }
   128  
   129  // Input returns the input source.
   130  func (t *Iterator) Input() []byte {
   131  	return t.l.Input()
   132  }
   133  
   134  var errIndexOutOfBounds = Item{tError, 0, []byte("no more tokens"), true}
   135  
   136  // Current will repeatably return the current item.
   137  func (t *Iterator) Current() Item {
   138  	if t.lastPos >= len(t.l.items) {
   139  		return errIndexOutOfBounds
   140  	}
   141  	return t.l.items[t.lastPos]
   142  }
   143  
   144  // backs up one token.
   145  func (t *Iterator) Backup() {
   146  	if t.lastPos < 0 {
   147  		panic("need to go forward before going back")
   148  	}
   149  	t.lastPos--
   150  }
   151  
   152  // check for non-error and non-EOF types coming next
   153  func (t *Iterator) IsValueNext() bool {
   154  	i := t.Peek()
   155  	return i.Type != tError && i.Type != tEOF
   156  }
   157  
   158  // look at, but do not consume, the next item
   159  // repeated, sequential calls will return the same item
   160  func (t *Iterator) Peek() Item {
   161  	return t.l.items[t.lastPos+1]
   162  }
   163  
   164  // PeekWalk will feed the next items in the iterator to walkFn
   165  // until it returns false.
   166  func (t *Iterator) PeekWalk(walkFn func(item Item) bool) {
   167  	for i := t.lastPos + 1; i < len(t.l.items); i++ {
   168  		item := t.l.items[i]
   169  		if !walkFn(item) {
   170  			break
   171  		}
   172  	}
   173  }
   174  
   175  // Consume is a convenience method to consume the next n tokens,
   176  // but back off Errors and EOF.
   177  func (t *Iterator) Consume(cnt int) {
   178  	for i := 0; i < cnt; i++ {
   179  		token := t.Next()
   180  		if token.Type == tError || token.Type == tEOF {
   181  			t.Backup()
   182  			break
   183  		}
   184  	}
   185  }
   186  
   187  // LineNumber returns the current line number. Used for logging.
   188  func (t *Iterator) LineNumber() int {
   189  	return bytes.Count(t.l.input[:t.Current().Pos], lf) + 1
   190  }