github.com/gohugoio/hugo@v0.88.1/commands/import_jekyll.go (about)

     1  // Copyright 2019 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package commands
    15  
    16  import (
    17  	"bytes"
    18  	"errors"
    19  	"fmt"
    20  	"io/ioutil"
    21  	"os"
    22  	"path/filepath"
    23  	"regexp"
    24  	"strconv"
    25  	"strings"
    26  	"time"
    27  	"unicode"
    28  
    29  	"github.com/gohugoio/hugo/parser/pageparser"
    30  
    31  	"github.com/gohugoio/hugo/common/hugio"
    32  
    33  	"github.com/gohugoio/hugo/parser/metadecoders"
    34  
    35  	"github.com/gohugoio/hugo/common/maps"
    36  	"github.com/gohugoio/hugo/helpers"
    37  	"github.com/gohugoio/hugo/hugofs"
    38  	"github.com/gohugoio/hugo/hugolib"
    39  	"github.com/gohugoio/hugo/parser"
    40  	"github.com/spf13/afero"
    41  	"github.com/spf13/cobra"
    42  	jww "github.com/spf13/jwalterweatherman"
    43  )
    44  
    45  var _ cmder = (*importCmd)(nil)
    46  
    47  type importCmd struct {
    48  	*baseCmd
    49  }
    50  
    51  func newImportCmd() *importCmd {
    52  	cc := &importCmd{}
    53  
    54  	cc.baseCmd = newBaseCmd(&cobra.Command{
    55  		Use:   "import",
    56  		Short: "Import your site from others.",
    57  		Long: `Import your site from other web site generators like Jekyll.
    58  
    59  Import requires a subcommand, e.g. ` + "`hugo import jekyll jekyll_root_path target_path`.",
    60  		RunE: nil,
    61  	})
    62  
    63  	importJekyllCmd := &cobra.Command{
    64  		Use:   "jekyll",
    65  		Short: "hugo import from Jekyll",
    66  		Long: `hugo import from Jekyll.
    67  
    68  Import from Jekyll requires two paths, e.g. ` + "`hugo import jekyll jekyll_root_path target_path`.",
    69  		RunE: cc.importFromJekyll,
    70  	}
    71  
    72  	importJekyllCmd.Flags().Bool("force", false, "allow import into non-empty target directory")
    73  
    74  	cc.cmd.AddCommand(importJekyllCmd)
    75  
    76  	return cc
    77  }
    78  
    79  func (i *importCmd) importFromJekyll(cmd *cobra.Command, args []string) error {
    80  	if len(args) < 2 {
    81  		return newUserError(`import from jekyll requires two paths, e.g. ` + "`hugo import jekyll jekyll_root_path target_path`.")
    82  	}
    83  
    84  	jekyllRoot, err := filepath.Abs(filepath.Clean(args[0]))
    85  	if err != nil {
    86  		return newUserError("path error:", args[0])
    87  	}
    88  
    89  	targetDir, err := filepath.Abs(filepath.Clean(args[1]))
    90  	if err != nil {
    91  		return newUserError("path error:", args[1])
    92  	}
    93  
    94  	jww.INFO.Println("Import Jekyll from:", jekyllRoot, "to:", targetDir)
    95  
    96  	if strings.HasPrefix(filepath.Dir(targetDir), jekyllRoot) {
    97  		return newUserError("abort: target path should not be inside the Jekyll root")
    98  	}
    99  
   100  	forceImport, _ := cmd.Flags().GetBool("force")
   101  
   102  	fs := afero.NewOsFs()
   103  	jekyllPostDirs, hasAnyPost := i.getJekyllDirInfo(fs, jekyllRoot)
   104  	if !hasAnyPost {
   105  		return errors.New("abort: jekyll root contains neither posts nor drafts")
   106  	}
   107  
   108  	err = i.createSiteFromJekyll(jekyllRoot, targetDir, jekyllPostDirs, forceImport)
   109  
   110  	if err != nil {
   111  		return newUserError(err)
   112  	}
   113  
   114  	jww.FEEDBACK.Println("Importing...")
   115  
   116  	fileCount := 0
   117  	callback := func(path string, fi hugofs.FileMetaInfo, err error) error {
   118  		if err != nil {
   119  			return err
   120  		}
   121  
   122  		if fi.IsDir() {
   123  			return nil
   124  		}
   125  
   126  		relPath, err := filepath.Rel(jekyllRoot, path)
   127  		if err != nil {
   128  			return newUserError("get rel path error:", path)
   129  		}
   130  
   131  		relPath = filepath.ToSlash(relPath)
   132  		draft := false
   133  
   134  		switch {
   135  		case strings.Contains(relPath, "_posts/"):
   136  			relPath = filepath.Join("content/post", strings.Replace(relPath, "_posts/", "", -1))
   137  		case strings.Contains(relPath, "_drafts/"):
   138  			relPath = filepath.Join("content/draft", strings.Replace(relPath, "_drafts/", "", -1))
   139  			draft = true
   140  		default:
   141  			return nil
   142  		}
   143  
   144  		fileCount++
   145  		return convertJekyllPost(path, relPath, targetDir, draft)
   146  	}
   147  
   148  	for jekyllPostDir, hasAnyPostInDir := range jekyllPostDirs {
   149  		if hasAnyPostInDir {
   150  			if err = helpers.SymbolicWalk(hugofs.Os, filepath.Join(jekyllRoot, jekyllPostDir), callback); err != nil {
   151  				return err
   152  			}
   153  		}
   154  	}
   155  
   156  	jww.FEEDBACK.Println("Congratulations!", fileCount, "post(s) imported!")
   157  	jww.FEEDBACK.Println("Now, start Hugo by yourself:\n" +
   158  		"$ git clone https://github.com/spf13/herring-cove.git " + args[1] + "/themes/herring-cove")
   159  	jww.FEEDBACK.Println("$ cd " + args[1] + "\n$ hugo server --theme=herring-cove")
   160  
   161  	return nil
   162  }
   163  
   164  func (i *importCmd) getJekyllDirInfo(fs afero.Fs, jekyllRoot string) (map[string]bool, bool) {
   165  	postDirs := make(map[string]bool)
   166  	hasAnyPost := false
   167  	if entries, err := ioutil.ReadDir(jekyllRoot); err == nil {
   168  		for _, entry := range entries {
   169  			if entry.IsDir() {
   170  				subDir := filepath.Join(jekyllRoot, entry.Name())
   171  				if isPostDir, hasAnyPostInDir := i.retrieveJekyllPostDir(fs, subDir); isPostDir {
   172  					postDirs[entry.Name()] = hasAnyPostInDir
   173  					if hasAnyPostInDir {
   174  						hasAnyPost = true
   175  					}
   176  				}
   177  			}
   178  		}
   179  	}
   180  	return postDirs, hasAnyPost
   181  }
   182  
   183  func (i *importCmd) retrieveJekyllPostDir(fs afero.Fs, dir string) (bool, bool) {
   184  	if strings.HasSuffix(dir, "_posts") || strings.HasSuffix(dir, "_drafts") {
   185  		isEmpty, _ := helpers.IsEmpty(dir, fs)
   186  		return true, !isEmpty
   187  	}
   188  
   189  	if entries, err := ioutil.ReadDir(dir); err == nil {
   190  		for _, entry := range entries {
   191  			if entry.IsDir() {
   192  				subDir := filepath.Join(dir, entry.Name())
   193  				if isPostDir, hasAnyPost := i.retrieveJekyllPostDir(fs, subDir); isPostDir {
   194  					return isPostDir, hasAnyPost
   195  				}
   196  			}
   197  		}
   198  	}
   199  
   200  	return false, true
   201  }
   202  
   203  func (i *importCmd) createSiteFromJekyll(jekyllRoot, targetDir string, jekyllPostDirs map[string]bool, force bool) error {
   204  	s, err := hugolib.NewSiteDefaultLang()
   205  	if err != nil {
   206  		return err
   207  	}
   208  
   209  	fs := s.Fs.Source
   210  	if exists, _ := helpers.Exists(targetDir, fs); exists {
   211  		if isDir, _ := helpers.IsDir(targetDir, fs); !isDir {
   212  			return errors.New("target path \"" + targetDir + "\" exists but is not a directory")
   213  		}
   214  
   215  		isEmpty, _ := helpers.IsEmpty(targetDir, fs)
   216  
   217  		if !isEmpty && !force {
   218  			return errors.New("target path \"" + targetDir + "\" exists and is not empty")
   219  		}
   220  	}
   221  
   222  	jekyllConfig := i.loadJekyllConfig(fs, jekyllRoot)
   223  
   224  	mkdir(targetDir, "layouts")
   225  	mkdir(targetDir, "content")
   226  	mkdir(targetDir, "archetypes")
   227  	mkdir(targetDir, "static")
   228  	mkdir(targetDir, "data")
   229  	mkdir(targetDir, "themes")
   230  
   231  	i.createConfigFromJekyll(fs, targetDir, "yaml", jekyllConfig)
   232  
   233  	i.copyJekyllFilesAndFolders(jekyllRoot, filepath.Join(targetDir, "static"), jekyllPostDirs)
   234  
   235  	return nil
   236  }
   237  
   238  func (i *importCmd) loadJekyllConfig(fs afero.Fs, jekyllRoot string) map[string]interface{} {
   239  	path := filepath.Join(jekyllRoot, "_config.yml")
   240  
   241  	exists, err := helpers.Exists(path, fs)
   242  
   243  	if err != nil || !exists {
   244  		jww.WARN.Println("_config.yaml not found: Is the specified Jekyll root correct?")
   245  		return nil
   246  	}
   247  
   248  	f, err := fs.Open(path)
   249  	if err != nil {
   250  		return nil
   251  	}
   252  
   253  	defer f.Close()
   254  
   255  	b, err := ioutil.ReadAll(f)
   256  	if err != nil {
   257  		return nil
   258  	}
   259  
   260  	c, err := metadecoders.Default.UnmarshalToMap(b, metadecoders.YAML)
   261  	if err != nil {
   262  		return nil
   263  	}
   264  
   265  	return c
   266  }
   267  
   268  func (i *importCmd) createConfigFromJekyll(fs afero.Fs, inpath string, kind metadecoders.Format, jekyllConfig map[string]interface{}) (err error) {
   269  	title := "My New Hugo Site"
   270  	baseURL := "http://example.org/"
   271  
   272  	for key, value := range jekyllConfig {
   273  		lowerKey := strings.ToLower(key)
   274  
   275  		switch lowerKey {
   276  		case "title":
   277  			if str, ok := value.(string); ok {
   278  				title = str
   279  			}
   280  
   281  		case "url":
   282  			if str, ok := value.(string); ok {
   283  				baseURL = str
   284  			}
   285  		}
   286  	}
   287  
   288  	in := map[string]interface{}{
   289  		"baseURL":            baseURL,
   290  		"title":              title,
   291  		"languageCode":       "en-us",
   292  		"disablePathToLower": true,
   293  	}
   294  
   295  	var buf bytes.Buffer
   296  	err = parser.InterfaceToConfig(in, kind, &buf)
   297  	if err != nil {
   298  		return err
   299  	}
   300  
   301  	return helpers.WriteToDisk(filepath.Join(inpath, "config."+string(kind)), &buf, fs)
   302  }
   303  
   304  func (i *importCmd) copyJekyllFilesAndFolders(jekyllRoot, dest string, jekyllPostDirs map[string]bool) (err error) {
   305  	fs := hugofs.Os
   306  
   307  	fi, err := fs.Stat(jekyllRoot)
   308  	if err != nil {
   309  		return err
   310  	}
   311  	if !fi.IsDir() {
   312  		return errors.New(jekyllRoot + " is not a directory")
   313  	}
   314  	err = os.MkdirAll(dest, fi.Mode())
   315  	if err != nil {
   316  		return err
   317  	}
   318  	entries, err := ioutil.ReadDir(jekyllRoot)
   319  	if err != nil {
   320  		return err
   321  	}
   322  
   323  	for _, entry := range entries {
   324  		sfp := filepath.Join(jekyllRoot, entry.Name())
   325  		dfp := filepath.Join(dest, entry.Name())
   326  		if entry.IsDir() {
   327  			if entry.Name()[0] != '_' && entry.Name()[0] != '.' {
   328  				if _, ok := jekyllPostDirs[entry.Name()]; !ok {
   329  					err = hugio.CopyDir(fs, sfp, dfp, nil)
   330  					if err != nil {
   331  						jww.ERROR.Println(err)
   332  					}
   333  				}
   334  			}
   335  		} else {
   336  			lowerEntryName := strings.ToLower(entry.Name())
   337  			exceptSuffix := []string{
   338  				".md", ".markdown", ".html", ".htm",
   339  				".xml", ".textile", "rakefile", "gemfile", ".lock",
   340  			}
   341  			isExcept := false
   342  			for _, suffix := range exceptSuffix {
   343  				if strings.HasSuffix(lowerEntryName, suffix) {
   344  					isExcept = true
   345  					break
   346  				}
   347  			}
   348  
   349  			if !isExcept && entry.Name()[0] != '.' && entry.Name()[0] != '_' {
   350  				err = hugio.CopyFile(fs, sfp, dfp)
   351  				if err != nil {
   352  					jww.ERROR.Println(err)
   353  				}
   354  			}
   355  		}
   356  
   357  	}
   358  	return nil
   359  }
   360  
   361  func parseJekyllFilename(filename string) (time.Time, string, error) {
   362  	re := regexp.MustCompile(`(\d+-\d+-\d+)-(.+)\..*`)
   363  	r := re.FindAllStringSubmatch(filename, -1)
   364  	if len(r) == 0 {
   365  		return time.Now(), "", errors.New("filename not match")
   366  	}
   367  
   368  	postDate, err := time.Parse("2006-1-2", r[0][1])
   369  	if err != nil {
   370  		return time.Now(), "", err
   371  	}
   372  
   373  	postName := r[0][2]
   374  
   375  	return postDate, postName, nil
   376  }
   377  
   378  func convertJekyllPost(path, relPath, targetDir string, draft bool) error {
   379  	jww.TRACE.Println("Converting", path)
   380  
   381  	filename := filepath.Base(path)
   382  	postDate, postName, err := parseJekyllFilename(filename)
   383  	if err != nil {
   384  		jww.WARN.Printf("Failed to parse filename '%s': %s. Skipping.", filename, err)
   385  		return nil
   386  	}
   387  
   388  	jww.TRACE.Println(filename, postDate, postName)
   389  
   390  	targetFile := filepath.Join(targetDir, relPath)
   391  	targetParentDir := filepath.Dir(targetFile)
   392  	os.MkdirAll(targetParentDir, 0777)
   393  
   394  	contentBytes, err := ioutil.ReadFile(path)
   395  	if err != nil {
   396  		jww.ERROR.Println("Read file error:", path)
   397  		return err
   398  	}
   399  
   400  	pf, err := pageparser.ParseFrontMatterAndContent(bytes.NewReader(contentBytes))
   401  	if err != nil {
   402  		jww.ERROR.Println("Parse file error:", path)
   403  		return err
   404  	}
   405  
   406  	newmetadata, err := convertJekyllMetaData(pf.FrontMatter, postName, postDate, draft)
   407  	if err != nil {
   408  		jww.ERROR.Println("Convert metadata error:", path)
   409  		return err
   410  	}
   411  
   412  	content, err := convertJekyllContent(newmetadata, string(pf.Content))
   413  	if err != nil {
   414  		jww.ERROR.Println("Converting Jekyll error:", path)
   415  		return err
   416  	}
   417  
   418  	fs := hugofs.Os
   419  	if err := helpers.WriteToDisk(targetFile, strings.NewReader(content), fs); err != nil {
   420  		return fmt.Errorf("failed to save file %q: %s", filename, err)
   421  	}
   422  
   423  	return nil
   424  }
   425  
   426  func convertJekyllMetaData(m interface{}, postName string, postDate time.Time, draft bool) (interface{}, error) {
   427  	metadata, err := maps.ToStringMapE(m)
   428  	if err != nil {
   429  		return nil, err
   430  	}
   431  
   432  	if draft {
   433  		metadata["draft"] = true
   434  	}
   435  
   436  	for key, value := range metadata {
   437  		lowerKey := strings.ToLower(key)
   438  
   439  		switch lowerKey {
   440  		case "layout":
   441  			delete(metadata, key)
   442  		case "permalink":
   443  			if str, ok := value.(string); ok {
   444  				metadata["url"] = str
   445  			}
   446  			delete(metadata, key)
   447  		case "category":
   448  			if str, ok := value.(string); ok {
   449  				metadata["categories"] = []string{str}
   450  			}
   451  			delete(metadata, key)
   452  		case "excerpt_separator":
   453  			if key != lowerKey {
   454  				delete(metadata, key)
   455  				metadata[lowerKey] = value
   456  			}
   457  		case "date":
   458  			if str, ok := value.(string); ok {
   459  				re := regexp.MustCompile(`(\d+):(\d+):(\d+)`)
   460  				r := re.FindAllStringSubmatch(str, -1)
   461  				if len(r) > 0 {
   462  					hour, _ := strconv.Atoi(r[0][1])
   463  					minute, _ := strconv.Atoi(r[0][2])
   464  					second, _ := strconv.Atoi(r[0][3])
   465  					postDate = time.Date(postDate.Year(), postDate.Month(), postDate.Day(), hour, minute, second, 0, time.UTC)
   466  				}
   467  			}
   468  			delete(metadata, key)
   469  		}
   470  
   471  	}
   472  
   473  	metadata["date"] = postDate.Format(time.RFC3339)
   474  
   475  	return metadata, nil
   476  }
   477  
   478  func convertJekyllContent(m interface{}, content string) (string, error) {
   479  	metadata, _ := maps.ToStringMapE(m)
   480  
   481  	lines := strings.Split(content, "\n")
   482  	var resultLines []string
   483  	for _, line := range lines {
   484  		resultLines = append(resultLines, strings.Trim(line, "\r\n"))
   485  	}
   486  
   487  	content = strings.Join(resultLines, "\n")
   488  
   489  	excerptSep := "<!--more-->"
   490  	if value, ok := metadata["excerpt_separator"]; ok {
   491  		if str, strOk := value.(string); strOk {
   492  			content = strings.Replace(content, strings.TrimSpace(str), excerptSep, -1)
   493  		}
   494  	}
   495  
   496  	replaceList := []struct {
   497  		re      *regexp.Regexp
   498  		replace string
   499  	}{
   500  		{regexp.MustCompile("(?i)<!-- more -->"), "<!--more-->"},
   501  		{regexp.MustCompile(`\{%\s*raw\s*%\}\s*(.*?)\s*\{%\s*endraw\s*%\}`), "$1"},
   502  		{regexp.MustCompile(`{%\s*endhighlight\s*%}`), "{{< / highlight >}}"},
   503  	}
   504  
   505  	for _, replace := range replaceList {
   506  		content = replace.re.ReplaceAllString(content, replace.replace)
   507  	}
   508  
   509  	replaceListFunc := []struct {
   510  		re      *regexp.Regexp
   511  		replace func(string) string
   512  	}{
   513  		// Octopress image tag: http://octopress.org/docs/plugins/image-tag/
   514  		{regexp.MustCompile(`{%\s+img\s*(.*?)\s*%}`), replaceImageTag},
   515  		{regexp.MustCompile(`{%\s*highlight\s*(.*?)\s*%}`), replaceHighlightTag},
   516  	}
   517  
   518  	for _, replace := range replaceListFunc {
   519  		content = replace.re.ReplaceAllStringFunc(content, replace.replace)
   520  	}
   521  
   522  	var buf bytes.Buffer
   523  	if len(metadata) != 0 {
   524  		err := parser.InterfaceToFrontMatter(m, metadecoders.YAML, &buf)
   525  		if err != nil {
   526  			return "", err
   527  		}
   528  	}
   529  	buf.WriteString(content)
   530  
   531  	return buf.String(), nil
   532  }
   533  
   534  func replaceHighlightTag(match string) string {
   535  	r := regexp.MustCompile(`{%\s*highlight\s*(.*?)\s*%}`)
   536  	parts := r.FindStringSubmatch(match)
   537  	lastQuote := rune(0)
   538  	f := func(c rune) bool {
   539  		switch {
   540  		case c == lastQuote:
   541  			lastQuote = rune(0)
   542  			return false
   543  		case lastQuote != rune(0):
   544  			return false
   545  		case unicode.In(c, unicode.Quotation_Mark):
   546  			lastQuote = c
   547  			return false
   548  		default:
   549  			return unicode.IsSpace(c)
   550  		}
   551  	}
   552  	// splitting string by space but considering quoted section
   553  	items := strings.FieldsFunc(parts[1], f)
   554  
   555  	result := bytes.NewBufferString("{{< highlight ")
   556  	result.WriteString(items[0]) // language
   557  	options := items[1:]
   558  	for i, opt := range options {
   559  		opt = strings.Replace(opt, "\"", "", -1)
   560  		if opt == "linenos" {
   561  			opt = "linenos=table"
   562  		}
   563  		if i == 0 {
   564  			opt = " \"" + opt
   565  		}
   566  		if i < len(options)-1 {
   567  			opt += ","
   568  		} else if i == len(options)-1 {
   569  			opt += "\""
   570  		}
   571  		result.WriteString(opt)
   572  	}
   573  
   574  	result.WriteString(" >}}")
   575  	return result.String()
   576  }
   577  
   578  func replaceImageTag(match string) string {
   579  	r := regexp.MustCompile(`{%\s+img\s*(\p{L}*)\s+([\S]*/[\S]+)\s+(\d*)\s*(\d*)\s*(.*?)\s*%}`)
   580  	result := bytes.NewBufferString("{{< figure ")
   581  	parts := r.FindStringSubmatch(match)
   582  	// Index 0 is the entire string, ignore
   583  	replaceOptionalPart(result, "class", parts[1])
   584  	replaceOptionalPart(result, "src", parts[2])
   585  	replaceOptionalPart(result, "width", parts[3])
   586  	replaceOptionalPart(result, "height", parts[4])
   587  	// title + alt
   588  	part := parts[5]
   589  	if len(part) > 0 {
   590  		splits := strings.Split(part, "'")
   591  		lenSplits := len(splits)
   592  		if lenSplits == 1 {
   593  			replaceOptionalPart(result, "title", splits[0])
   594  		} else if lenSplits == 3 {
   595  			replaceOptionalPart(result, "title", splits[1])
   596  		} else if lenSplits == 5 {
   597  			replaceOptionalPart(result, "title", splits[1])
   598  			replaceOptionalPart(result, "alt", splits[3])
   599  		}
   600  	}
   601  	result.WriteString(">}}")
   602  	return result.String()
   603  }
   604  
   605  func replaceOptionalPart(buffer *bytes.Buffer, partName string, part string) {
   606  	if len(part) > 0 {
   607  		buffer.WriteString(partName + "=\"" + part + "\" ")
   608  	}
   609  }