github.com/anakojm/hugo-katex@v0.0.0-20231023141351-42d6f5de9c0b/parser/pageparser/pageparser.go (about) 1 // Copyright 2019 The Hugo Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package pageparser 15 16 import ( 17 "bytes" 18 "errors" 19 "fmt" 20 "io" 21 "regexp" 22 "strings" 23 24 "github.com/gohugoio/hugo/parser/metadecoders" 25 ) 26 27 // Result holds the parse result. 28 type Result interface { 29 // Iterator returns a new Iterator positioned at the beginning of the parse tree. 30 Iterator() *Iterator 31 // Input returns the input to Parse. 32 Input() []byte 33 } 34 35 var _ Result = (*pageLexer)(nil) 36 37 // Parse parses the page in the given reader according to the given Config. 38 func Parse(r io.Reader, cfg Config) (Result, error) { 39 return parseSection(r, cfg, lexIntroSection) 40 } 41 42 type ContentFrontMatter struct { 43 Content []byte 44 FrontMatter map[string]any 45 FrontMatterFormat metadecoders.Format 46 } 47 48 // ParseFrontMatterAndContent is a convenience method to extract front matter 49 // and content from a content page. 50 func ParseFrontMatterAndContent(r io.Reader) (ContentFrontMatter, error) { 51 var cf ContentFrontMatter 52 53 psr, err := Parse(r, Config{}) 54 if err != nil { 55 return cf, err 56 } 57 58 var frontMatterSource []byte 59 60 iter := psr.Iterator() 61 62 walkFn := func(item Item) bool { 63 if frontMatterSource != nil { 64 // The rest is content. 65 cf.Content = psr.Input()[item.low:] 66 // Done 67 return false 68 } else if item.IsFrontMatter() { 69 cf.FrontMatterFormat = FormatFromFrontMatterType(item.Type) 70 frontMatterSource = item.Val(psr.Input()) 71 } 72 return true 73 } 74 75 iter.PeekWalk(walkFn) 76 77 cf.FrontMatter, err = metadecoders.Default.UnmarshalToMap(frontMatterSource, cf.FrontMatterFormat) 78 return cf, err 79 } 80 81 func FormatFromFrontMatterType(typ ItemType) metadecoders.Format { 82 switch typ { 83 case TypeFrontMatterJSON: 84 return metadecoders.JSON 85 case TypeFrontMatterORG: 86 return metadecoders.ORG 87 case TypeFrontMatterTOML: 88 return metadecoders.TOML 89 case TypeFrontMatterYAML: 90 return metadecoders.YAML 91 default: 92 return "" 93 } 94 } 95 96 // ParseMain parses starting with the main section. Used in tests. 97 func ParseMain(r io.Reader, cfg Config) (Result, error) { 98 return parseSection(r, cfg, lexMainSection) 99 } 100 101 func parseSection(r io.Reader, cfg Config, start stateFunc) (Result, error) { 102 b, err := io.ReadAll(r) 103 if err != nil { 104 return nil, fmt.Errorf("failed to read page content: %w", err) 105 } 106 return parseBytes(b, cfg, start) 107 } 108 109 func parseBytes(b []byte, cfg Config, start stateFunc) (Result, error) { 110 lexer := newPageLexer(b, start, cfg) 111 lexer.run() 112 return lexer, nil 113 } 114 115 // NewIterator creates a new Iterator. 116 func NewIterator(items Items) *Iterator { 117 return &Iterator{items: items, lastPos: -1} 118 } 119 120 // An Iterator has methods to iterate a parsed page with support going back 121 // if needed. 122 type Iterator struct { 123 items Items 124 lastPos int // position of the last item returned by nextItem 125 } 126 127 // consumes and returns the next item 128 func (t *Iterator) Next() Item { 129 t.lastPos++ 130 return t.Current() 131 } 132 133 var errIndexOutOfBounds = Item{Type: tError, Err: errors.New("no more tokens")} 134 135 // Current will repeatably return the current item. 136 func (t *Iterator) Current() Item { 137 if t.lastPos >= len(t.items) { 138 return errIndexOutOfBounds 139 } 140 return t.items[t.lastPos] 141 } 142 143 // backs up one token. 144 func (t *Iterator) Backup() { 145 if t.lastPos < 0 { 146 panic("need to go forward before going back") 147 } 148 t.lastPos-- 149 } 150 151 // Pos returns the current position in the input. 152 func (t *Iterator) Pos() int { 153 return t.lastPos 154 } 155 156 // check for non-error and non-EOF types coming next 157 func (t *Iterator) IsValueNext() bool { 158 i := t.Peek() 159 return i.Type != tError && i.Type != tEOF 160 } 161 162 // look at, but do not consume, the next item 163 // repeated, sequential calls will return the same item 164 func (t *Iterator) Peek() Item { 165 return t.items[t.lastPos+1] 166 } 167 168 // PeekWalk will feed the next items in the iterator to walkFn 169 // until it returns false. 170 func (t *Iterator) PeekWalk(walkFn func(item Item) bool) { 171 for i := t.lastPos + 1; i < len(t.items); i++ { 172 item := t.items[i] 173 if !walkFn(item) { 174 break 175 } 176 } 177 } 178 179 // Consume is a convenience method to consume the next n tokens, 180 // but back off Errors and EOF. 181 func (t *Iterator) Consume(cnt int) { 182 for i := 0; i < cnt; i++ { 183 token := t.Next() 184 if token.Type == tError || token.Type == tEOF { 185 t.Backup() 186 break 187 } 188 } 189 } 190 191 // LineNumber returns the current line number. Used for logging. 192 func (t *Iterator) LineNumber(source []byte) int { 193 return bytes.Count(source[:t.Current().low], lf) + 1 194 } 195 196 // IsProbablySourceOfItems returns true if the given source looks like original 197 // source of the items. 198 // There may be some false positives, but that is highly unlikely and good enough 199 // for the planned purpose. 200 // It will also return false if the last item is not EOF (error situations) and 201 // true if both source and items are empty. 202 func IsProbablySourceOfItems(source []byte, items Items) bool { 203 if len(source) == 0 && len(items) == 0 { 204 return false 205 } 206 if len(items) == 0 { 207 return false 208 } 209 210 last := items[len(items)-1] 211 if last.Type != tEOF { 212 return false 213 } 214 215 if last.Pos() != len(source) { 216 return false 217 } 218 219 for _, item := range items { 220 if item.Type == tError { 221 return false 222 } 223 if item.Type == tEOF { 224 return true 225 } 226 227 if item.Pos() >= len(source) { 228 return false 229 } 230 231 if item.firstByte != source[item.Pos()] { 232 return false 233 } 234 } 235 236 return true 237 } 238 239 var hasShortcodeRe = regexp.MustCompile(`{{[%,<][^\/]`) 240 241 // HasShortcode returns true if the given string contains a shortcode. 242 func HasShortcode(s string) bool { 243 // Fast path for the common case. 244 if !strings.Contains(s, "{{") { 245 return false 246 } 247 return hasShortcodeRe.MatchString(s) 248 }