github.com/neohugo/neohugo@v0.123.8/common/paths/pathparser.go (about) 1 // Copyright 2024 The Hugo Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package paths 15 16 import ( 17 "path" 18 "path/filepath" 19 "runtime" 20 "strings" 21 "sync" 22 23 "github.com/neohugo/neohugo/common/types" 24 "github.com/neohugo/neohugo/hugofs/files" 25 "github.com/neohugo/neohugo/identity" 26 ) 27 28 var defaultPathParser PathParser 29 30 // PathParser parses a path into a Path. 31 type PathParser struct { 32 // Maps the language code to its index in the languages/sites slice. 33 LanguageIndex map[string]int 34 35 // Reports whether the given language is disabled. 36 IsLangDisabled func(string) bool 37 } 38 39 // Parse parses component c with path s into Path using the default path parser. 40 func Parse(c, s string) *Path { 41 return defaultPathParser.Parse(c, s) 42 } 43 44 // NormalizePathString returns a normalized path string using the very basic Hugo rules. 45 func NormalizePathStringBasic(s string) string { 46 // All lower case. 47 s = strings.ToLower(s) 48 49 // Replace spaces with hyphens. 50 s = strings.ReplaceAll(s, " ", "-") 51 52 return s 53 } 54 55 // ParseIdentity parses component c with path s into a StringIdentity. 56 func (pp *PathParser) ParseIdentity(c, s string) identity.StringIdentity { 57 p := pp.parsePooled(c, s) 58 defer putPath(p) 59 return identity.StringIdentity(p.IdentifierBase()) 60 } 61 62 // ParseBaseAndBaseNameNoIdentifier parses component c with path s into a base and a base name without any identifier. 63 func (pp *PathParser) ParseBaseAndBaseNameNoIdentifier(c, s string) (string, string) { 64 p := pp.parsePooled(c, s) 65 defer putPath(p) 66 return p.Base(), p.BaseNameNoIdentifier() 67 } 68 69 func (pp *PathParser) parsePooled(c, s string) *Path { 70 s = NormalizePathStringBasic(s) 71 p := getPath() 72 p.component = c 73 p, err := pp.doParse(c, s, p) 74 if err != nil { 75 panic(err) 76 } 77 return p 78 } 79 80 // Parse parses component c with path s into Path using Hugo's content path rules. 81 func (pp *PathParser) Parse(c, s string) *Path { 82 p, err := pp.parse(c, s) 83 if err != nil { 84 panic(err) 85 } 86 return p 87 } 88 89 func (pp *PathParser) newPath(component string) *Path { 90 return &Path{ 91 component: component, 92 posContainerLow: -1, 93 posContainerHigh: -1, 94 posSectionHigh: -1, 95 posIdentifierLanguage: -1, 96 } 97 } 98 99 func (pp *PathParser) parse(component, s string) (*Path, error) { 100 ss := NormalizePathStringBasic(s) 101 102 p, err := pp.doParse(component, ss, pp.newPath(component)) 103 if err != nil { 104 return nil, err 105 } 106 107 if s != ss { 108 var err error 109 // Preserve the original case for titles etc. 110 p.unnormalized, err = pp.doParse(component, s, pp.newPath(component)) 111 if err != nil { 112 return nil, err 113 } 114 } else { 115 p.unnormalized = p 116 } 117 118 return p, nil 119 } 120 121 func (pp *PathParser) doParse(component, s string, p *Path) (*Path, error) { 122 hasLang := pp.LanguageIndex != nil 123 hasLang = hasLang && (component == files.ComponentFolderContent || component == files.ComponentFolderLayouts) 124 125 if runtime.GOOS == "windows" { 126 s = path.Clean(filepath.ToSlash(s)) 127 if s == "." { 128 s = "" 129 } 130 } 131 132 if s == "" { 133 s = "/" 134 } 135 136 // Leading slash, no trailing slash. 137 if !strings.HasPrefix(s, "/") { 138 s = "/" + s 139 } 140 141 if s != "/" && s[len(s)-1] == '/' { 142 s = s[:len(s)-1] 143 } 144 145 p.s = s 146 slashCount := 0 147 148 for i := len(s) - 1; i >= 0; i-- { 149 c := s[i] 150 151 switch c { 152 case '.': 153 if p.posContainerHigh == -1 { 154 var high int 155 if len(p.identifiers) > 0 { 156 high = p.identifiers[len(p.identifiers)-1].Low - 1 157 } else { 158 high = len(p.s) 159 } 160 id := types.LowHigh{Low: i + 1, High: high} 161 if len(p.identifiers) == 0 { 162 p.identifiers = append(p.identifiers, id) 163 } else if len(p.identifiers) == 1 { 164 // Check for a valid language. 165 s := p.s[id.Low:id.High] 166 167 if hasLang { 168 var disabled bool 169 _, langFound := pp.LanguageIndex[s] 170 if !langFound { 171 disabled = pp.IsLangDisabled != nil && pp.IsLangDisabled(s) 172 if disabled { 173 p.disabled = true 174 langFound = true 175 } 176 } 177 if langFound { 178 p.posIdentifierLanguage = 1 179 p.identifiers = append(p.identifiers, id) 180 } 181 } 182 } 183 } 184 case '/': 185 slashCount++ 186 if p.posContainerHigh == -1 { 187 p.posContainerHigh = i + 1 188 } else if p.posContainerLow == -1 { 189 p.posContainerLow = i + 1 190 } 191 if i > 0 { 192 p.posSectionHigh = i 193 } 194 } 195 } 196 197 isContentComponent := p.component == files.ComponentFolderContent || p.component == files.ComponentFolderArchetypes 198 isContent := isContentComponent && files.IsContentExt(p.Ext()) 199 200 if isContent { 201 id := p.identifiers[len(p.identifiers)-1] 202 b := p.s[p.posContainerHigh : id.Low-1] 203 switch b { 204 case "index": 205 p.bundleType = PathTypeLeaf 206 case "_index": 207 p.bundleType = PathTypeBranch 208 default: 209 p.bundleType = PathTypeContentSingle 210 } 211 212 if slashCount == 2 && p.IsLeafBundle() { 213 p.posSectionHigh = 0 214 } 215 } 216 217 return p, nil 218 } 219 220 func ModifyPathBundleTypeResource(p *Path) { 221 if p.IsContent() { 222 p.bundleType = PathTypeContentResource 223 } else { 224 p.bundleType = PathTypeFile 225 } 226 } 227 228 type PathType int 229 230 const ( 231 // A generic resource, e.g. a JSON file. 232 PathTypeFile PathType = iota 233 234 // All below are content files. 235 // A resource of a content type with front matter. 236 PathTypeContentResource 237 238 // E.g. /blog/my-post.md 239 PathTypeContentSingle 240 241 // All bewlow are bundled content files. 242 243 // Leaf bundles, e.g. /blog/my-post/index.md 244 PathTypeLeaf 245 246 // Branch bundles, e.g. /blog/_index.md 247 PathTypeBranch 248 ) 249 250 type Path struct { 251 // Note: Any additions to this struct should also be added to the pathPool. 252 s string 253 254 posContainerLow int 255 posContainerHigh int 256 posSectionHigh int 257 258 component string 259 bundleType PathType 260 261 identifiers []types.LowHigh 262 263 posIdentifierLanguage int 264 disabled bool 265 266 trimLeadingSlash bool 267 268 unnormalized *Path 269 } 270 271 var pathPool = &sync.Pool{ 272 New: func() any { 273 p := &Path{} 274 p.reset() 275 return p 276 }, 277 } 278 279 func getPath() *Path { 280 return pathPool.Get().(*Path) 281 } 282 283 func putPath(p *Path) { 284 p.reset() 285 pathPool.Put(p) 286 } 287 288 func (p *Path) reset() { 289 p.s = "" 290 p.posContainerLow = -1 291 p.posContainerHigh = -1 292 p.posSectionHigh = -1 293 p.component = "" 294 p.bundleType = 0 295 p.identifiers = p.identifiers[:0] 296 p.posIdentifierLanguage = -1 297 p.disabled = false 298 p.trimLeadingSlash = false 299 p.unnormalized = nil 300 } 301 302 // TrimLeadingSlash returns a copy of the Path with the leading slash removed. 303 func (p Path) TrimLeadingSlash() *Path { 304 p.trimLeadingSlash = true 305 return &p 306 } 307 308 func (p *Path) norm(s string) string { 309 if p.trimLeadingSlash { 310 s = strings.TrimPrefix(s, "/") 311 } 312 return s 313 } 314 315 // IdentifierBase satifies identity.Identity. 316 func (p *Path) IdentifierBase() string { 317 return p.Base() 318 } 319 320 // Component returns the component for this path (e.g. "content"). 321 func (p *Path) Component() string { 322 return p.component 323 } 324 325 // Container returns the base name of the container directory for this path. 326 func (p *Path) Container() string { 327 if p.posContainerLow == -1 { 328 return "" 329 } 330 return p.norm(p.s[p.posContainerLow : p.posContainerHigh-1]) 331 } 332 333 // ContainerDir returns the container directory for this path. 334 // For content bundles this will be the parent directory. 335 func (p *Path) ContainerDir() string { 336 if p.posContainerLow == -1 || !p.IsBundle() { 337 return p.Dir() 338 } 339 return p.norm(p.s[:p.posContainerLow-1]) 340 } 341 342 // Section returns the first path element (section). 343 func (p *Path) Section() string { 344 if p.posSectionHigh <= 0 { 345 return "" 346 } 347 return p.norm(p.s[1:p.posSectionHigh]) 348 } 349 350 // IsContent returns true if the path is a content file (e.g. mypost.md). 351 // Note that this will also return true for content files in a bundle. 352 func (p *Path) IsContent() bool { 353 return p.BundleType() >= PathTypeContentResource 354 } 355 356 // isContentPage returns true if the path is a content file (e.g. mypost.md), 357 // but nof if inside a leaf bundle. 358 func (p *Path) isContentPage() bool { 359 return p.BundleType() >= PathTypeContentSingle 360 } 361 362 // Name returns the last element of path. 363 func (p *Path) Name() string { 364 if p.posContainerHigh > 0 { 365 return p.s[p.posContainerHigh:] 366 } 367 return p.s 368 } 369 370 // Name returns the last element of path withhout any extension. 371 func (p *Path) NameNoExt() string { 372 if i := p.identifierIndex(0); i != -1 { 373 return p.s[p.posContainerHigh : p.identifiers[i].Low-1] 374 } 375 return p.s[p.posContainerHigh:] 376 } 377 378 // Name returns the last element of path withhout any language identifier. 379 func (p *Path) NameNoLang() string { 380 i := p.identifierIndex(p.posIdentifierLanguage) 381 if i == -1 { 382 return p.Name() 383 } 384 385 return p.s[p.posContainerHigh:p.identifiers[i].Low-1] + p.s[p.identifiers[i].High:] 386 } 387 388 // BaseNameNoIdentifier returns the logcical base name for a resource without any idenifier (e.g. no extension). 389 // For bundles this will be the containing directory's name, e.g. "blog". 390 func (p *Path) BaseNameNoIdentifier() string { 391 if p.IsBundle() { 392 return p.Container() 393 } 394 return p.NameNoIdentifier() 395 } 396 397 // NameNoIdentifier returns the last element of path withhout any identifier (e.g. no extension). 398 func (p *Path) NameNoIdentifier() string { 399 if len(p.identifiers) > 0 { 400 return p.s[p.posContainerHigh : p.identifiers[len(p.identifiers)-1].Low-1] 401 } 402 return p.s[p.posContainerHigh:] 403 } 404 405 // Dir returns all but the last element of path, typically the path's directory. 406 func (p *Path) Dir() (d string) { 407 if p.posContainerHigh > 0 { 408 d = p.s[:p.posContainerHigh-1] 409 } 410 if d == "" { 411 d = "/" 412 } 413 d = p.norm(d) 414 return 415 } 416 417 // Path returns the full path. 418 func (p *Path) Path() (d string) { 419 return p.norm(p.s) 420 } 421 422 // Unnormalized returns the Path with the original case preserved. 423 func (p *Path) Unnormalized() *Path { 424 return p.unnormalized 425 } 426 427 // PathNoLang returns the Path but with any language identifier removed. 428 func (p *Path) PathNoLang() string { 429 return p.base(true, false) 430 } 431 432 // PathNoIdentifier returns the Path but with any identifier (ext, lang) removed. 433 func (p *Path) PathNoIdentifier() string { 434 return p.base(false, false) 435 } 436 437 // PathRel returns the path relativeto the given owner. 438 func (p *Path) PathRel(owner *Path) string { 439 ob := owner.Base() 440 if !strings.HasSuffix(ob, "/") { 441 ob += "/" 442 } 443 return strings.TrimPrefix(p.Path(), ob) 444 } 445 446 // BaseRel returns the base path relative to the given owner. 447 func (p *Path) BaseRel(owner *Path) string { 448 ob := owner.Base() 449 if ob == "/" { 450 ob = "" 451 } 452 return p.Base()[len(ob)+1:] 453 } 454 455 // For content files, Base returns the path without any identifiers (extension, language code etc.). 456 // Any 'index' as the last path element is ignored. 457 // 458 // For other files (Resources), any extension is kept. 459 func (p *Path) Base() string { 460 return p.base(!p.isContentPage(), p.IsBundle()) 461 } 462 463 // BaseNoLeadingSlash returns the base path without the leading slash. 464 func (p *Path) BaseNoLeadingSlash() string { 465 return p.Base()[1:] 466 } 467 468 func (p *Path) base(preserveExt, isBundle bool) string { 469 if len(p.identifiers) == 0 { 470 return p.norm(p.s) 471 } 472 473 if preserveExt && len(p.identifiers) == 1 { 474 // Preserve extension. 475 return p.norm(p.s) 476 } 477 478 id := p.identifiers[len(p.identifiers)-1] 479 high := id.Low - 1 480 481 if isBundle { 482 high = p.posContainerHigh - 1 483 } 484 485 if high == 0 { 486 high++ 487 } 488 489 if !preserveExt { 490 return p.norm(p.s[:high]) 491 } 492 493 // For txt files etc. we want to preserve the extension. 494 id = p.identifiers[0] 495 496 return p.norm(p.s[:high] + p.s[id.Low-1:id.High]) 497 } 498 499 func (p *Path) Ext() string { 500 return p.identifierAsString(0) 501 } 502 503 func (p *Path) Lang() string { 504 return p.identifierAsString(1) 505 } 506 507 func (p *Path) Identifier(i int) string { 508 return p.identifierAsString(i) 509 } 510 511 func (p *Path) Disabled() bool { 512 return p.disabled 513 } 514 515 func (p *Path) Identifiers() []string { 516 ids := make([]string, len(p.identifiers)) 517 for i, id := range p.identifiers { 518 ids[i] = p.s[id.Low:id.High] 519 } 520 return ids 521 } 522 523 func (p *Path) IsHTML() bool { 524 return files.IsHTML(p.Ext()) 525 } 526 527 func (p *Path) BundleType() PathType { 528 return p.bundleType 529 } 530 531 func (p *Path) IsBundle() bool { 532 return p.bundleType >= PathTypeLeaf 533 } 534 535 func (p *Path) IsBranchBundle() bool { 536 return p.bundleType == PathTypeBranch 537 } 538 539 func (p *Path) IsLeafBundle() bool { 540 return p.bundleType == PathTypeLeaf 541 } 542 543 func (p Path) ForBundleType(t PathType) *Path { 544 p.bundleType = t 545 return &p 546 } 547 548 func (p *Path) identifierAsString(i int) string { 549 i = p.identifierIndex(i) 550 if i == -1 { 551 return "" 552 } 553 554 id := p.identifiers[i] 555 return p.s[id.Low:id.High] 556 } 557 558 func (p *Path) identifierIndex(i int) int { 559 if i < 0 || i >= len(p.identifiers) { 560 return -1 561 } 562 return i 563 } 564 565 // HasExt returns true if the Unix styled path has an extension. 566 func HasExt(p string) bool { 567 for i := len(p) - 1; i >= 0; i-- { 568 if p[i] == '.' { 569 return true 570 } 571 if p[i] == '/' { 572 return false 573 } 574 } 575 return false 576 }