github.com/gohugoio/hugo@v0.88.1/hugofs/files/classifier.go (about)

     1  // Copyright 2019 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package files
    15  
    16  import (
    17  	"bufio"
    18  	"fmt"
    19  	"io"
    20  	"os"
    21  	"path/filepath"
    22  	"sort"
    23  	"strings"
    24  	"unicode"
    25  
    26  	"github.com/spf13/afero"
    27  )
    28  
    29  const (
    30  	// The NPM package.json "template" file.
    31  	FilenamePackageHugoJSON = "package.hugo.json"
    32  	// The NPM package file.
    33  	FilenamePackageJSON = "package.json"
    34  )
    35  
    36  var (
    37  	// This should be the only list of valid extensions for content files.
    38  	contentFileExtensions = []string{
    39  		"html", "htm",
    40  		"mdown", "markdown", "md",
    41  		"asciidoc", "adoc", "ad",
    42  		"rest", "rst",
    43  		"mmark",
    44  		"org",
    45  		"pandoc", "pdc",
    46  	}
    47  
    48  	contentFileExtensionsSet map[string]bool
    49  
    50  	htmlFileExtensions = []string{
    51  		"html", "htm",
    52  	}
    53  
    54  	htmlFileExtensionsSet map[string]bool
    55  )
    56  
    57  func init() {
    58  	contentFileExtensionsSet = make(map[string]bool)
    59  	for _, ext := range contentFileExtensions {
    60  		contentFileExtensionsSet[ext] = true
    61  	}
    62  	htmlFileExtensionsSet = make(map[string]bool)
    63  	for _, ext := range htmlFileExtensions {
    64  		htmlFileExtensionsSet[ext] = true
    65  	}
    66  }
    67  
    68  func IsContentFile(filename string) bool {
    69  	return contentFileExtensionsSet[strings.TrimPrefix(filepath.Ext(filename), ".")]
    70  }
    71  
    72  func IsHTMLFile(filename string) bool {
    73  	return htmlFileExtensionsSet[strings.TrimPrefix(filepath.Ext(filename), ".")]
    74  }
    75  
    76  func IsContentExt(ext string) bool {
    77  	return contentFileExtensionsSet[ext]
    78  }
    79  
    80  type ContentClass string
    81  
    82  const (
    83  	ContentClassLeaf    ContentClass = "leaf"
    84  	ContentClassBranch  ContentClass = "branch"
    85  	ContentClassFile    ContentClass = "zfile" // Sort below
    86  	ContentClassContent ContentClass = "zcontent"
    87  )
    88  
    89  func (c ContentClass) IsBundle() bool {
    90  	return c == ContentClassLeaf || c == ContentClassBranch
    91  }
    92  
    93  func ClassifyContentFile(filename string, open func() (afero.File, error)) ContentClass {
    94  	if !IsContentFile(filename) {
    95  		return ContentClassFile
    96  	}
    97  
    98  	if IsHTMLFile(filename) {
    99  		// We need to look inside the file. If the first non-whitespace
   100  		// character is a "<", then we treat it as a regular file.
   101  		// Eearlier we created pages for these files, but that had all sorts
   102  		// of troubles, and isn't what it says in the documentation.
   103  		// See https://github.com/gohugoio/hugo/issues/7030
   104  		if open == nil {
   105  			panic(fmt.Sprintf("no file opener provided for %q", filename))
   106  		}
   107  
   108  		f, err := open()
   109  		if err != nil {
   110  			return ContentClassFile
   111  		}
   112  		ishtml := isHTMLContent(f)
   113  		f.Close()
   114  		if ishtml {
   115  			return ContentClassFile
   116  		}
   117  
   118  	}
   119  
   120  	if strings.HasPrefix(filename, "_index.") {
   121  		return ContentClassBranch
   122  	}
   123  
   124  	if strings.HasPrefix(filename, "index.") {
   125  		return ContentClassLeaf
   126  	}
   127  
   128  	return ContentClassContent
   129  }
   130  
   131  var htmlComment = []rune{'<', '!', '-', '-'}
   132  
   133  func isHTMLContent(r io.Reader) bool {
   134  	br := bufio.NewReader(r)
   135  	i := 0
   136  	for {
   137  		c, _, err := br.ReadRune()
   138  		if err != nil {
   139  			break
   140  		}
   141  
   142  		if i > 0 {
   143  			if i >= len(htmlComment) {
   144  				return false
   145  			}
   146  
   147  			if c != htmlComment[i] {
   148  				return true
   149  			}
   150  
   151  			i++
   152  			continue
   153  		}
   154  
   155  		if !unicode.IsSpace(c) {
   156  			if i == 0 && c != '<' {
   157  				return false
   158  			}
   159  			i++
   160  		}
   161  	}
   162  	return true
   163  }
   164  
   165  const (
   166  	ComponentFolderArchetypes = "archetypes"
   167  	ComponentFolderStatic     = "static"
   168  	ComponentFolderLayouts    = "layouts"
   169  	ComponentFolderContent    = "content"
   170  	ComponentFolderData       = "data"
   171  	ComponentFolderAssets     = "assets"
   172  	ComponentFolderI18n       = "i18n"
   173  
   174  	FolderResources = "resources"
   175  	FolderJSConfig  = "_jsconfig" // Mounted below /assets with postcss.config.js etc.
   176  )
   177  
   178  var (
   179  	JsConfigFolderMountPrefix = filepath.Join(ComponentFolderAssets, FolderJSConfig)
   180  
   181  	ComponentFolders = []string{
   182  		ComponentFolderArchetypes,
   183  		ComponentFolderStatic,
   184  		ComponentFolderLayouts,
   185  		ComponentFolderContent,
   186  		ComponentFolderData,
   187  		ComponentFolderAssets,
   188  		ComponentFolderI18n,
   189  	}
   190  
   191  	componentFoldersSet = make(map[string]bool)
   192  )
   193  
   194  func init() {
   195  	sort.Strings(ComponentFolders)
   196  	for _, f := range ComponentFolders {
   197  		componentFoldersSet[f] = true
   198  	}
   199  }
   200  
   201  // ResolveComponentFolder returns "content" from "content/blog/foo.md" etc.
   202  func ResolveComponentFolder(filename string) string {
   203  	filename = strings.TrimPrefix(filename, string(os.PathSeparator))
   204  	for _, cf := range ComponentFolders {
   205  		if strings.HasPrefix(filename, cf) {
   206  			return cf
   207  		}
   208  	}
   209  
   210  	return ""
   211  }
   212  
   213  func IsComponentFolder(name string) bool {
   214  	return componentFoldersSet[name]
   215  }