github.com/neohugo/neohugo@v0.123.8/common/paths/pathparser.go (about)

     1  // Copyright 2024 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package paths
    15  
    16  import (
    17  	"path"
    18  	"path/filepath"
    19  	"runtime"
    20  	"strings"
    21  	"sync"
    22  
    23  	"github.com/neohugo/neohugo/common/types"
    24  	"github.com/neohugo/neohugo/hugofs/files"
    25  	"github.com/neohugo/neohugo/identity"
    26  )
    27  
    28  var defaultPathParser PathParser
    29  
    30  // PathParser parses a path into a Path.
    31  type PathParser struct {
    32  	// Maps the language code to its index in the languages/sites slice.
    33  	LanguageIndex map[string]int
    34  
    35  	// Reports whether the given language is disabled.
    36  	IsLangDisabled func(string) bool
    37  }
    38  
    39  // Parse parses component c with path s into Path using the default path parser.
    40  func Parse(c, s string) *Path {
    41  	return defaultPathParser.Parse(c, s)
    42  }
    43  
    44  // NormalizePathString returns a normalized path string using the very basic Hugo rules.
    45  func NormalizePathStringBasic(s string) string {
    46  	// All lower case.
    47  	s = strings.ToLower(s)
    48  
    49  	// Replace spaces with hyphens.
    50  	s = strings.ReplaceAll(s, " ", "-")
    51  
    52  	return s
    53  }
    54  
    55  // ParseIdentity parses component c with path s into a StringIdentity.
    56  func (pp *PathParser) ParseIdentity(c, s string) identity.StringIdentity {
    57  	p := pp.parsePooled(c, s)
    58  	defer putPath(p)
    59  	return identity.StringIdentity(p.IdentifierBase())
    60  }
    61  
    62  // ParseBaseAndBaseNameNoIdentifier parses component c with path s into a base and a base name without any identifier.
    63  func (pp *PathParser) ParseBaseAndBaseNameNoIdentifier(c, s string) (string, string) {
    64  	p := pp.parsePooled(c, s)
    65  	defer putPath(p)
    66  	return p.Base(), p.BaseNameNoIdentifier()
    67  }
    68  
    69  func (pp *PathParser) parsePooled(c, s string) *Path {
    70  	s = NormalizePathStringBasic(s)
    71  	p := getPath()
    72  	p.component = c
    73  	p, err := pp.doParse(c, s, p)
    74  	if err != nil {
    75  		panic(err)
    76  	}
    77  	return p
    78  }
    79  
    80  // Parse parses component c with path s into Path using Hugo's content path rules.
    81  func (pp *PathParser) Parse(c, s string) *Path {
    82  	p, err := pp.parse(c, s)
    83  	if err != nil {
    84  		panic(err)
    85  	}
    86  	return p
    87  }
    88  
    89  func (pp *PathParser) newPath(component string) *Path {
    90  	return &Path{
    91  		component:             component,
    92  		posContainerLow:       -1,
    93  		posContainerHigh:      -1,
    94  		posSectionHigh:        -1,
    95  		posIdentifierLanguage: -1,
    96  	}
    97  }
    98  
    99  func (pp *PathParser) parse(component, s string) (*Path, error) {
   100  	ss := NormalizePathStringBasic(s)
   101  
   102  	p, err := pp.doParse(component, ss, pp.newPath(component))
   103  	if err != nil {
   104  		return nil, err
   105  	}
   106  
   107  	if s != ss {
   108  		var err error
   109  		// Preserve the original case for titles etc.
   110  		p.unnormalized, err = pp.doParse(component, s, pp.newPath(component))
   111  		if err != nil {
   112  			return nil, err
   113  		}
   114  	} else {
   115  		p.unnormalized = p
   116  	}
   117  
   118  	return p, nil
   119  }
   120  
   121  func (pp *PathParser) doParse(component, s string, p *Path) (*Path, error) {
   122  	hasLang := pp.LanguageIndex != nil
   123  	hasLang = hasLang && (component == files.ComponentFolderContent || component == files.ComponentFolderLayouts)
   124  
   125  	if runtime.GOOS == "windows" {
   126  		s = path.Clean(filepath.ToSlash(s))
   127  		if s == "." {
   128  			s = ""
   129  		}
   130  	}
   131  
   132  	if s == "" {
   133  		s = "/"
   134  	}
   135  
   136  	// Leading slash, no trailing slash.
   137  	if !strings.HasPrefix(s, "/") {
   138  		s = "/" + s
   139  	}
   140  
   141  	if s != "/" && s[len(s)-1] == '/' {
   142  		s = s[:len(s)-1]
   143  	}
   144  
   145  	p.s = s
   146  	slashCount := 0
   147  
   148  	for i := len(s) - 1; i >= 0; i-- {
   149  		c := s[i]
   150  
   151  		switch c {
   152  		case '.':
   153  			if p.posContainerHigh == -1 {
   154  				var high int
   155  				if len(p.identifiers) > 0 {
   156  					high = p.identifiers[len(p.identifiers)-1].Low - 1
   157  				} else {
   158  					high = len(p.s)
   159  				}
   160  				id := types.LowHigh{Low: i + 1, High: high}
   161  				if len(p.identifiers) == 0 {
   162  					p.identifiers = append(p.identifiers, id)
   163  				} else if len(p.identifiers) == 1 {
   164  					// Check for a valid language.
   165  					s := p.s[id.Low:id.High]
   166  
   167  					if hasLang {
   168  						var disabled bool
   169  						_, langFound := pp.LanguageIndex[s]
   170  						if !langFound {
   171  							disabled = pp.IsLangDisabled != nil && pp.IsLangDisabled(s)
   172  							if disabled {
   173  								p.disabled = true
   174  								langFound = true
   175  							}
   176  						}
   177  						if langFound {
   178  							p.posIdentifierLanguage = 1
   179  							p.identifiers = append(p.identifiers, id)
   180  						}
   181  					}
   182  				}
   183  			}
   184  		case '/':
   185  			slashCount++
   186  			if p.posContainerHigh == -1 {
   187  				p.posContainerHigh = i + 1
   188  			} else if p.posContainerLow == -1 {
   189  				p.posContainerLow = i + 1
   190  			}
   191  			if i > 0 {
   192  				p.posSectionHigh = i
   193  			}
   194  		}
   195  	}
   196  
   197  	isContentComponent := p.component == files.ComponentFolderContent || p.component == files.ComponentFolderArchetypes
   198  	isContent := isContentComponent && files.IsContentExt(p.Ext())
   199  
   200  	if isContent {
   201  		id := p.identifiers[len(p.identifiers)-1]
   202  		b := p.s[p.posContainerHigh : id.Low-1]
   203  		switch b {
   204  		case "index":
   205  			p.bundleType = PathTypeLeaf
   206  		case "_index":
   207  			p.bundleType = PathTypeBranch
   208  		default:
   209  			p.bundleType = PathTypeContentSingle
   210  		}
   211  
   212  		if slashCount == 2 && p.IsLeafBundle() {
   213  			p.posSectionHigh = 0
   214  		}
   215  	}
   216  
   217  	return p, nil
   218  }
   219  
   220  func ModifyPathBundleTypeResource(p *Path) {
   221  	if p.IsContent() {
   222  		p.bundleType = PathTypeContentResource
   223  	} else {
   224  		p.bundleType = PathTypeFile
   225  	}
   226  }
   227  
   228  type PathType int
   229  
   230  const (
   231  	// A generic resource, e.g. a JSON file.
   232  	PathTypeFile PathType = iota
   233  
   234  	// All below are content files.
   235  	// A resource of a content type with front matter.
   236  	PathTypeContentResource
   237  
   238  	// E.g. /blog/my-post.md
   239  	PathTypeContentSingle
   240  
   241  	// All bewlow are bundled content files.
   242  
   243  	// Leaf bundles, e.g. /blog/my-post/index.md
   244  	PathTypeLeaf
   245  
   246  	// Branch bundles, e.g. /blog/_index.md
   247  	PathTypeBranch
   248  )
   249  
   250  type Path struct {
   251  	// Note: Any additions to this struct should also be added to the pathPool.
   252  	s string
   253  
   254  	posContainerLow  int
   255  	posContainerHigh int
   256  	posSectionHigh   int
   257  
   258  	component  string
   259  	bundleType PathType
   260  
   261  	identifiers []types.LowHigh
   262  
   263  	posIdentifierLanguage int
   264  	disabled              bool
   265  
   266  	trimLeadingSlash bool
   267  
   268  	unnormalized *Path
   269  }
   270  
   271  var pathPool = &sync.Pool{
   272  	New: func() any {
   273  		p := &Path{}
   274  		p.reset()
   275  		return p
   276  	},
   277  }
   278  
   279  func getPath() *Path {
   280  	return pathPool.Get().(*Path)
   281  }
   282  
   283  func putPath(p *Path) {
   284  	p.reset()
   285  	pathPool.Put(p)
   286  }
   287  
   288  func (p *Path) reset() {
   289  	p.s = ""
   290  	p.posContainerLow = -1
   291  	p.posContainerHigh = -1
   292  	p.posSectionHigh = -1
   293  	p.component = ""
   294  	p.bundleType = 0
   295  	p.identifiers = p.identifiers[:0]
   296  	p.posIdentifierLanguage = -1
   297  	p.disabled = false
   298  	p.trimLeadingSlash = false
   299  	p.unnormalized = nil
   300  }
   301  
   302  // TrimLeadingSlash returns a copy of the Path with the leading slash removed.
   303  func (p Path) TrimLeadingSlash() *Path {
   304  	p.trimLeadingSlash = true
   305  	return &p
   306  }
   307  
   308  func (p *Path) norm(s string) string {
   309  	if p.trimLeadingSlash {
   310  		s = strings.TrimPrefix(s, "/")
   311  	}
   312  	return s
   313  }
   314  
   315  // IdentifierBase satifies identity.Identity.
   316  func (p *Path) IdentifierBase() string {
   317  	return p.Base()
   318  }
   319  
   320  // Component returns the component for this path (e.g. "content").
   321  func (p *Path) Component() string {
   322  	return p.component
   323  }
   324  
   325  // Container returns the base name of the container directory for this path.
   326  func (p *Path) Container() string {
   327  	if p.posContainerLow == -1 {
   328  		return ""
   329  	}
   330  	return p.norm(p.s[p.posContainerLow : p.posContainerHigh-1])
   331  }
   332  
   333  // ContainerDir returns the container directory for this path.
   334  // For content bundles this will be the parent directory.
   335  func (p *Path) ContainerDir() string {
   336  	if p.posContainerLow == -1 || !p.IsBundle() {
   337  		return p.Dir()
   338  	}
   339  	return p.norm(p.s[:p.posContainerLow-1])
   340  }
   341  
   342  // Section returns the first path element (section).
   343  func (p *Path) Section() string {
   344  	if p.posSectionHigh <= 0 {
   345  		return ""
   346  	}
   347  	return p.norm(p.s[1:p.posSectionHigh])
   348  }
   349  
   350  // IsContent returns true if the path is a content file (e.g. mypost.md).
   351  // Note that this will also return true for content files in a bundle.
   352  func (p *Path) IsContent() bool {
   353  	return p.BundleType() >= PathTypeContentResource
   354  }
   355  
   356  // isContentPage returns true if the path is a content file (e.g. mypost.md),
   357  // but nof if inside a leaf bundle.
   358  func (p *Path) isContentPage() bool {
   359  	return p.BundleType() >= PathTypeContentSingle
   360  }
   361  
   362  // Name returns the last element of path.
   363  func (p *Path) Name() string {
   364  	if p.posContainerHigh > 0 {
   365  		return p.s[p.posContainerHigh:]
   366  	}
   367  	return p.s
   368  }
   369  
   370  // Name returns the last element of path withhout any extension.
   371  func (p *Path) NameNoExt() string {
   372  	if i := p.identifierIndex(0); i != -1 {
   373  		return p.s[p.posContainerHigh : p.identifiers[i].Low-1]
   374  	}
   375  	return p.s[p.posContainerHigh:]
   376  }
   377  
   378  // Name returns the last element of path withhout any language identifier.
   379  func (p *Path) NameNoLang() string {
   380  	i := p.identifierIndex(p.posIdentifierLanguage)
   381  	if i == -1 {
   382  		return p.Name()
   383  	}
   384  
   385  	return p.s[p.posContainerHigh:p.identifiers[i].Low-1] + p.s[p.identifiers[i].High:]
   386  }
   387  
   388  // BaseNameNoIdentifier returns the logcical base name for a resource without any idenifier (e.g. no extension).
   389  // For bundles this will be the containing directory's name, e.g. "blog".
   390  func (p *Path) BaseNameNoIdentifier() string {
   391  	if p.IsBundle() {
   392  		return p.Container()
   393  	}
   394  	return p.NameNoIdentifier()
   395  }
   396  
   397  // NameNoIdentifier returns the last element of path withhout any identifier (e.g. no extension).
   398  func (p *Path) NameNoIdentifier() string {
   399  	if len(p.identifiers) > 0 {
   400  		return p.s[p.posContainerHigh : p.identifiers[len(p.identifiers)-1].Low-1]
   401  	}
   402  	return p.s[p.posContainerHigh:]
   403  }
   404  
   405  // Dir returns all but the last element of path, typically the path's directory.
   406  func (p *Path) Dir() (d string) {
   407  	if p.posContainerHigh > 0 {
   408  		d = p.s[:p.posContainerHigh-1]
   409  	}
   410  	if d == "" {
   411  		d = "/"
   412  	}
   413  	d = p.norm(d)
   414  	return
   415  }
   416  
   417  // Path returns the full path.
   418  func (p *Path) Path() (d string) {
   419  	return p.norm(p.s)
   420  }
   421  
   422  // Unnormalized returns the Path with the original case preserved.
   423  func (p *Path) Unnormalized() *Path {
   424  	return p.unnormalized
   425  }
   426  
   427  // PathNoLang returns the Path but with any language identifier removed.
   428  func (p *Path) PathNoLang() string {
   429  	return p.base(true, false)
   430  }
   431  
   432  // PathNoIdentifier returns the Path but with any identifier (ext, lang) removed.
   433  func (p *Path) PathNoIdentifier() string {
   434  	return p.base(false, false)
   435  }
   436  
   437  // PathRel returns the path relativeto the given owner.
   438  func (p *Path) PathRel(owner *Path) string {
   439  	ob := owner.Base()
   440  	if !strings.HasSuffix(ob, "/") {
   441  		ob += "/"
   442  	}
   443  	return strings.TrimPrefix(p.Path(), ob)
   444  }
   445  
   446  // BaseRel returns the base path relative to the given owner.
   447  func (p *Path) BaseRel(owner *Path) string {
   448  	ob := owner.Base()
   449  	if ob == "/" {
   450  		ob = ""
   451  	}
   452  	return p.Base()[len(ob)+1:]
   453  }
   454  
   455  // For content files, Base returns the path without any identifiers (extension, language code etc.).
   456  // Any 'index' as the last path element is ignored.
   457  //
   458  // For other files (Resources), any extension is kept.
   459  func (p *Path) Base() string {
   460  	return p.base(!p.isContentPage(), p.IsBundle())
   461  }
   462  
   463  // BaseNoLeadingSlash returns the base path without the leading slash.
   464  func (p *Path) BaseNoLeadingSlash() string {
   465  	return p.Base()[1:]
   466  }
   467  
   468  func (p *Path) base(preserveExt, isBundle bool) string {
   469  	if len(p.identifiers) == 0 {
   470  		return p.norm(p.s)
   471  	}
   472  
   473  	if preserveExt && len(p.identifiers) == 1 {
   474  		// Preserve extension.
   475  		return p.norm(p.s)
   476  	}
   477  
   478  	id := p.identifiers[len(p.identifiers)-1]
   479  	high := id.Low - 1
   480  
   481  	if isBundle {
   482  		high = p.posContainerHigh - 1
   483  	}
   484  
   485  	if high == 0 {
   486  		high++
   487  	}
   488  
   489  	if !preserveExt {
   490  		return p.norm(p.s[:high])
   491  	}
   492  
   493  	// For txt files etc. we want to preserve the extension.
   494  	id = p.identifiers[0]
   495  
   496  	return p.norm(p.s[:high] + p.s[id.Low-1:id.High])
   497  }
   498  
   499  func (p *Path) Ext() string {
   500  	return p.identifierAsString(0)
   501  }
   502  
   503  func (p *Path) Lang() string {
   504  	return p.identifierAsString(1)
   505  }
   506  
   507  func (p *Path) Identifier(i int) string {
   508  	return p.identifierAsString(i)
   509  }
   510  
   511  func (p *Path) Disabled() bool {
   512  	return p.disabled
   513  }
   514  
   515  func (p *Path) Identifiers() []string {
   516  	ids := make([]string, len(p.identifiers))
   517  	for i, id := range p.identifiers {
   518  		ids[i] = p.s[id.Low:id.High]
   519  	}
   520  	return ids
   521  }
   522  
   523  func (p *Path) IsHTML() bool {
   524  	return files.IsHTML(p.Ext())
   525  }
   526  
   527  func (p *Path) BundleType() PathType {
   528  	return p.bundleType
   529  }
   530  
   531  func (p *Path) IsBundle() bool {
   532  	return p.bundleType >= PathTypeLeaf
   533  }
   534  
   535  func (p *Path) IsBranchBundle() bool {
   536  	return p.bundleType == PathTypeBranch
   537  }
   538  
   539  func (p *Path) IsLeafBundle() bool {
   540  	return p.bundleType == PathTypeLeaf
   541  }
   542  
   543  func (p Path) ForBundleType(t PathType) *Path {
   544  	p.bundleType = t
   545  	return &p
   546  }
   547  
   548  func (p *Path) identifierAsString(i int) string {
   549  	i = p.identifierIndex(i)
   550  	if i == -1 {
   551  		return ""
   552  	}
   553  
   554  	id := p.identifiers[i]
   555  	return p.s[id.Low:id.High]
   556  }
   557  
   558  func (p *Path) identifierIndex(i int) int {
   559  	if i < 0 || i >= len(p.identifiers) {
   560  		return -1
   561  	}
   562  	return i
   563  }
   564  
   565  // HasExt returns true if the Unix styled path has an extension.
   566  func HasExt(p string) bool {
   567  	for i := len(p) - 1; i >= 0; i-- {
   568  		if p[i] == '.' {
   569  			return true
   570  		}
   571  		if p[i] == '/' {
   572  			return false
   573  		}
   574  	}
   575  	return false
   576  }