github.com/SuCicada/su-hugo@v1.0.0/parser/pageparser/pageparser.go (about) 1 // Copyright 2019 The Hugo Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package pageparser 15 16 import ( 17 "bytes" 18 "errors" 19 "fmt" 20 "io" 21 "io/ioutil" 22 23 "github.com/gohugoio/hugo/parser/metadecoders" 24 ) 25 26 // Result holds the parse result. 27 type Result interface { 28 // Iterator returns a new Iterator positioned at the beginning of the parse tree. 29 Iterator() *Iterator 30 // Input returns the input to Parse. 31 Input() []byte 32 } 33 34 var _ Result = (*pageLexer)(nil) 35 36 // Parse parses the page in the given reader according to the given Config. 37 func Parse(r io.Reader, cfg Config) (Result, error) { 38 return parseSection(r, cfg, lexIntroSection) 39 } 40 41 type ContentFrontMatter struct { 42 Content []byte 43 FrontMatter map[string]any 44 FrontMatterFormat metadecoders.Format 45 } 46 47 // ParseFrontMatterAndContent is a convenience method to extract front matter 48 // and content from a content page. 49 func ParseFrontMatterAndContent(r io.Reader) (ContentFrontMatter, error) { 50 var cf ContentFrontMatter 51 52 psr, err := Parse(r, Config{}) 53 if err != nil { 54 return cf, err 55 } 56 57 var frontMatterSource []byte 58 59 iter := psr.Iterator() 60 61 walkFn := func(item Item) bool { 62 if frontMatterSource != nil { 63 // The rest is content. 64 cf.Content = psr.Input()[item.low:] 65 // Done 66 return false 67 } else if item.IsFrontMatter() { 68 cf.FrontMatterFormat = FormatFromFrontMatterType(item.Type) 69 frontMatterSource = item.Val(psr.Input()) 70 } 71 return true 72 } 73 74 iter.PeekWalk(walkFn) 75 76 cf.FrontMatter, err = metadecoders.Default.UnmarshalToMap(frontMatterSource, cf.FrontMatterFormat) 77 return cf, err 78 } 79 80 func FormatFromFrontMatterType(typ ItemType) metadecoders.Format { 81 switch typ { 82 case TypeFrontMatterJSON: 83 return metadecoders.JSON 84 case TypeFrontMatterORG: 85 return metadecoders.ORG 86 case TypeFrontMatterTOML: 87 return metadecoders.TOML 88 case TypeFrontMatterYAML: 89 return metadecoders.YAML 90 default: 91 return "" 92 } 93 } 94 95 // ParseMain parses starting with the main section. Used in tests. 96 func ParseMain(r io.Reader, cfg Config) (Result, error) { 97 return parseSection(r, cfg, lexMainSection) 98 } 99 100 func parseSection(r io.Reader, cfg Config, start stateFunc) (Result, error) { 101 b, err := ioutil.ReadAll(r) 102 if err != nil { 103 return nil, fmt.Errorf("failed to read page content: %w", err) 104 } 105 return parseBytes(b, cfg, start) 106 } 107 108 func parseBytes(b []byte, cfg Config, start stateFunc) (Result, error) { 109 lexer := newPageLexer(b, start, cfg) 110 lexer.run() 111 return lexer, nil 112 } 113 114 // NewIterator creates a new Iterator. 115 func NewIterator(items Items) *Iterator { 116 return &Iterator{items: items, lastPos: -1} 117 } 118 119 // An Iterator has methods to iterate a parsed page with support going back 120 // if needed. 121 type Iterator struct { 122 items Items 123 lastPos int // position of the last item returned by nextItem 124 } 125 126 // consumes and returns the next item 127 func (t *Iterator) Next() Item { 128 t.lastPos++ 129 return t.Current() 130 } 131 132 var errIndexOutOfBounds = Item{Type: tError, Err: errors.New("no more tokens")} 133 134 // Current will repeatably return the current item. 135 func (t *Iterator) Current() Item { 136 if t.lastPos >= len(t.items) { 137 return errIndexOutOfBounds 138 } 139 return t.items[t.lastPos] 140 } 141 142 // backs up one token. 143 func (t *Iterator) Backup() { 144 if t.lastPos < 0 { 145 panic("need to go forward before going back") 146 } 147 t.lastPos-- 148 } 149 150 // Pos returns the current position in the input. 151 func (t *Iterator) Pos() int { 152 return t.lastPos 153 } 154 155 // check for non-error and non-EOF types coming next 156 func (t *Iterator) IsValueNext() bool { 157 i := t.Peek() 158 return i.Type != tError && i.Type != tEOF 159 } 160 161 // look at, but do not consume, the next item 162 // repeated, sequential calls will return the same item 163 func (t *Iterator) Peek() Item { 164 return t.items[t.lastPos+1] 165 } 166 167 // PeekWalk will feed the next items in the iterator to walkFn 168 // until it returns false. 169 func (t *Iterator) PeekWalk(walkFn func(item Item) bool) { 170 for i := t.lastPos + 1; i < len(t.items); i++ { 171 item := t.items[i] 172 if !walkFn(item) { 173 break 174 } 175 } 176 } 177 178 // Consume is a convenience method to consume the next n tokens, 179 // but back off Errors and EOF. 180 func (t *Iterator) Consume(cnt int) { 181 for i := 0; i < cnt; i++ { 182 token := t.Next() 183 if token.Type == tError || token.Type == tEOF { 184 t.Backup() 185 break 186 } 187 } 188 } 189 190 // LineNumber returns the current line number. Used for logging. 191 func (t *Iterator) LineNumber(source []byte) int { 192 return bytes.Count(source[:t.Current().low], lf) + 1 193 } 194 195 // IsProbablySourceOfItems returns true if the given source looks like original 196 // source of the items. 197 // There may be some false positives, but that is highly unlikely and good enough 198 // for the planned purpose. 199 // It will also return false if the last item is not EOF (error situations) and 200 // true if both source and items are empty. 201 func IsProbablySourceOfItems(source []byte, items Items) bool { 202 if len(source) == 0 && len(items) == 0 { 203 return false 204 } 205 if len(items) == 0 { 206 return false 207 } 208 209 last := items[len(items)-1] 210 if last.Type != tEOF { 211 return false 212 } 213 214 if last.Pos() != len(source) { 215 return false 216 } 217 218 for _, item := range items { 219 if item.Type == tError { 220 return false 221 } 222 if item.Type == tEOF { 223 return true 224 } 225 226 if item.Pos() >= len(source) { 227 return false 228 } 229 230 if item.firstByte != source[item.Pos()] { 231 return false 232 } 233 } 234 235 return true 236 }