github.com/shohhei1126/hugo@v0.42.2-0.20180623210752-3d5928889ad7/commands/import_jekyll.go (about)

     1  // Copyright 2016 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package commands
    15  
    16  import (
    17  	"bytes"
    18  	"errors"
    19  	"io"
    20  	"io/ioutil"
    21  	"os"
    22  	"path/filepath"
    23  	"regexp"
    24  	"strconv"
    25  	"strings"
    26  	"time"
    27  
    28  	"github.com/gohugoio/hugo/helpers"
    29  	"github.com/gohugoio/hugo/hugofs"
    30  	"github.com/gohugoio/hugo/hugolib"
    31  	"github.com/gohugoio/hugo/parser"
    32  	"github.com/spf13/afero"
    33  	"github.com/spf13/cast"
    34  	"github.com/spf13/cobra"
    35  	jww "github.com/spf13/jwalterweatherman"
    36  )
    37  
    38  var _ cmder = (*importCmd)(nil)
    39  
    40  type importCmd struct {
    41  	*baseCmd
    42  }
    43  
    44  func newImportCmd() *importCmd {
    45  	cc := &importCmd{}
    46  
    47  	cc.baseCmd = newBaseCmd(&cobra.Command{
    48  		Use:   "import",
    49  		Short: "Import your site from others.",
    50  		Long: `Import your site from other web site generators like Jekyll.
    51  
    52  Import requires a subcommand, e.g. ` + "`hugo import jekyll jekyll_root_path target_path`.",
    53  		RunE: nil,
    54  	})
    55  
    56  	importJekyllCmd := &cobra.Command{
    57  		Use:   "jekyll",
    58  		Short: "hugo import from Jekyll",
    59  		Long: `hugo import from Jekyll.
    60  
    61  Import from Jekyll requires two paths, e.g. ` + "`hugo import jekyll jekyll_root_path target_path`.",
    62  		RunE: cc.importFromJekyll,
    63  	}
    64  
    65  	importJekyllCmd.Flags().Bool("force", false, "allow import into non-empty target directory")
    66  
    67  	cc.cmd.AddCommand(importJekyllCmd)
    68  
    69  	return cc
    70  
    71  }
    72  
    73  func (i *importCmd) importFromJekyll(cmd *cobra.Command, args []string) error {
    74  
    75  	if len(args) < 2 {
    76  		return newUserError(`Import from Jekyll requires two paths, e.g. ` + "`hugo import jekyll jekyll_root_path target_path`.")
    77  	}
    78  
    79  	jekyllRoot, err := filepath.Abs(filepath.Clean(args[0]))
    80  	if err != nil {
    81  		return newUserError("Path error:", args[0])
    82  	}
    83  
    84  	targetDir, err := filepath.Abs(filepath.Clean(args[1]))
    85  	if err != nil {
    86  		return newUserError("Path error:", args[1])
    87  	}
    88  
    89  	jww.INFO.Println("Import Jekyll from:", jekyllRoot, "to:", targetDir)
    90  
    91  	if strings.HasPrefix(filepath.Dir(targetDir), jekyllRoot) {
    92  		return newUserError("Target path should not be inside the Jekyll root, aborting.")
    93  	}
    94  
    95  	forceImport, _ := cmd.Flags().GetBool("force")
    96  
    97  	fs := afero.NewOsFs()
    98  	jekyllPostDirs, hasAnyPost := i.getJekyllDirInfo(fs, jekyllRoot)
    99  	if !hasAnyPost {
   100  		return errors.New("Your Jekyll root contains neither posts nor drafts, aborting.")
   101  	}
   102  
   103  	site, err := i.createSiteFromJekyll(jekyllRoot, targetDir, jekyllPostDirs, forceImport)
   104  
   105  	if err != nil {
   106  		return newUserError(err)
   107  	}
   108  
   109  	jww.FEEDBACK.Println("Importing...")
   110  
   111  	fileCount := 0
   112  	callback := func(path string, fi os.FileInfo, err error) error {
   113  		if err != nil {
   114  			return err
   115  		}
   116  
   117  		if fi.IsDir() {
   118  			return nil
   119  		}
   120  
   121  		relPath, err := filepath.Rel(jekyllRoot, path)
   122  		if err != nil {
   123  			return newUserError("Get rel path error:", path)
   124  		}
   125  
   126  		relPath = filepath.ToSlash(relPath)
   127  		draft := false
   128  
   129  		switch {
   130  		case strings.Contains(relPath, "_posts/"):
   131  			relPath = filepath.Join("content/post", strings.Replace(relPath, "_posts/", "", -1))
   132  		case strings.Contains(relPath, "_drafts/"):
   133  			relPath = filepath.Join("content/draft", strings.Replace(relPath, "_drafts/", "", -1))
   134  			draft = true
   135  		default:
   136  			return nil
   137  		}
   138  
   139  		fileCount++
   140  		return convertJekyllPost(site, path, relPath, targetDir, draft)
   141  	}
   142  
   143  	for jekyllPostDir, hasAnyPostInDir := range jekyllPostDirs {
   144  		if hasAnyPostInDir {
   145  			if err = helpers.SymbolicWalk(hugofs.Os, filepath.Join(jekyllRoot, jekyllPostDir), callback); err != nil {
   146  				return err
   147  			}
   148  		}
   149  	}
   150  
   151  	jww.FEEDBACK.Println("Congratulations!", fileCount, "post(s) imported!")
   152  	jww.FEEDBACK.Println("Now, start Hugo by yourself:\n" +
   153  		"$ git clone https://github.com/spf13/herring-cove.git " + args[1] + "/themes/herring-cove")
   154  	jww.FEEDBACK.Println("$ cd " + args[1] + "\n$ hugo server --theme=herring-cove")
   155  
   156  	return nil
   157  }
   158  
   159  func (i *importCmd) getJekyllDirInfo(fs afero.Fs, jekyllRoot string) (map[string]bool, bool) {
   160  	postDirs := make(map[string]bool)
   161  	hasAnyPost := false
   162  	if entries, err := ioutil.ReadDir(jekyllRoot); err == nil {
   163  		for _, entry := range entries {
   164  			if entry.IsDir() {
   165  				subDir := filepath.Join(jekyllRoot, entry.Name())
   166  				if isPostDir, hasAnyPostInDir := i.retrieveJekyllPostDir(fs, subDir); isPostDir {
   167  					postDirs[entry.Name()] = hasAnyPostInDir
   168  					if hasAnyPostInDir {
   169  						hasAnyPost = true
   170  					}
   171  				}
   172  			}
   173  		}
   174  	}
   175  	return postDirs, hasAnyPost
   176  }
   177  
   178  func (i *importCmd) retrieveJekyllPostDir(fs afero.Fs, dir string) (bool, bool) {
   179  	if strings.HasSuffix(dir, "_posts") || strings.HasSuffix(dir, "_drafts") {
   180  		isEmpty, _ := helpers.IsEmpty(dir, fs)
   181  		return true, !isEmpty
   182  	}
   183  
   184  	if entries, err := ioutil.ReadDir(dir); err == nil {
   185  		for _, entry := range entries {
   186  			if entry.IsDir() {
   187  				subDir := filepath.Join(dir, entry.Name())
   188  				if isPostDir, hasAnyPost := i.retrieveJekyllPostDir(fs, subDir); isPostDir {
   189  					return isPostDir, hasAnyPost
   190  				}
   191  			}
   192  		}
   193  	}
   194  
   195  	return false, true
   196  }
   197  
   198  func (i *importCmd) createSiteFromJekyll(jekyllRoot, targetDir string, jekyllPostDirs map[string]bool, force bool) (*hugolib.Site, error) {
   199  	s, err := hugolib.NewSiteDefaultLang()
   200  	if err != nil {
   201  		return nil, err
   202  	}
   203  
   204  	fs := s.Fs.Source
   205  	if exists, _ := helpers.Exists(targetDir, fs); exists {
   206  		if isDir, _ := helpers.IsDir(targetDir, fs); !isDir {
   207  			return nil, errors.New("Target path \"" + targetDir + "\" already exists but not a directory")
   208  		}
   209  
   210  		isEmpty, _ := helpers.IsEmpty(targetDir, fs)
   211  
   212  		if !isEmpty && !force {
   213  			return nil, errors.New("Target path \"" + targetDir + "\" already exists and is not empty")
   214  		}
   215  	}
   216  
   217  	jekyllConfig := i.loadJekyllConfig(fs, jekyllRoot)
   218  
   219  	mkdir(targetDir, "layouts")
   220  	mkdir(targetDir, "content")
   221  	mkdir(targetDir, "archetypes")
   222  	mkdir(targetDir, "static")
   223  	mkdir(targetDir, "data")
   224  	mkdir(targetDir, "themes")
   225  
   226  	i.createConfigFromJekyll(fs, targetDir, "yaml", jekyllConfig)
   227  
   228  	i.copyJekyllFilesAndFolders(jekyllRoot, filepath.Join(targetDir, "static"), jekyllPostDirs)
   229  
   230  	return s, nil
   231  }
   232  
   233  func (i *importCmd) loadJekyllConfig(fs afero.Fs, jekyllRoot string) map[string]interface{} {
   234  	path := filepath.Join(jekyllRoot, "_config.yml")
   235  
   236  	exists, err := helpers.Exists(path, fs)
   237  
   238  	if err != nil || !exists {
   239  		jww.WARN.Println("_config.yaml not found: Is the specified Jekyll root correct?")
   240  		return nil
   241  	}
   242  
   243  	f, err := fs.Open(path)
   244  	if err != nil {
   245  		return nil
   246  	}
   247  
   248  	defer f.Close()
   249  
   250  	b, err := ioutil.ReadAll(f)
   251  
   252  	if err != nil {
   253  		return nil
   254  	}
   255  
   256  	c, err := parser.HandleYAMLMetaData(b)
   257  
   258  	if err != nil {
   259  		return nil
   260  	}
   261  
   262  	return c
   263  }
   264  
   265  func (i *importCmd) createConfigFromJekyll(fs afero.Fs, inpath string, kind string, jekyllConfig map[string]interface{}) (err error) {
   266  	title := "My New Hugo Site"
   267  	baseURL := "http://example.org/"
   268  
   269  	for key, value := range jekyllConfig {
   270  		lowerKey := strings.ToLower(key)
   271  
   272  		switch lowerKey {
   273  		case "title":
   274  			if str, ok := value.(string); ok {
   275  				title = str
   276  			}
   277  
   278  		case "url":
   279  			if str, ok := value.(string); ok {
   280  				baseURL = str
   281  			}
   282  		}
   283  	}
   284  
   285  	in := map[string]interface{}{
   286  		"baseURL":            baseURL,
   287  		"title":              title,
   288  		"languageCode":       "en-us",
   289  		"disablePathToLower": true,
   290  	}
   291  	kind = parser.FormatSanitize(kind)
   292  
   293  	var buf bytes.Buffer
   294  	err = parser.InterfaceToConfig(in, parser.FormatToLeadRune(kind), &buf)
   295  	if err != nil {
   296  		return err
   297  	}
   298  
   299  	return helpers.WriteToDisk(filepath.Join(inpath, "config."+kind), &buf, fs)
   300  }
   301  
   302  func copyFile(source string, dest string) error {
   303  	sf, err := os.Open(source)
   304  	if err != nil {
   305  		return err
   306  	}
   307  	defer sf.Close()
   308  	df, err := os.Create(dest)
   309  	if err != nil {
   310  		return err
   311  	}
   312  	defer df.Close()
   313  	_, err = io.Copy(df, sf)
   314  	if err == nil {
   315  		si, err := os.Stat(source)
   316  		if err != nil {
   317  			err = os.Chmod(dest, si.Mode())
   318  
   319  			if err != nil {
   320  				return err
   321  			}
   322  		}
   323  
   324  	}
   325  	return nil
   326  }
   327  
   328  func copyDir(source string, dest string) error {
   329  	fi, err := os.Stat(source)
   330  	if err != nil {
   331  		return err
   332  	}
   333  	if !fi.IsDir() {
   334  		return errors.New(source + " is not a directory")
   335  	}
   336  	err = os.MkdirAll(dest, fi.Mode())
   337  	if err != nil {
   338  		return err
   339  	}
   340  	entries, err := ioutil.ReadDir(source)
   341  	for _, entry := range entries {
   342  		sfp := filepath.Join(source, entry.Name())
   343  		dfp := filepath.Join(dest, entry.Name())
   344  		if entry.IsDir() {
   345  			err = copyDir(sfp, dfp)
   346  			if err != nil {
   347  				jww.ERROR.Println(err)
   348  			}
   349  		} else {
   350  			err = copyFile(sfp, dfp)
   351  			if err != nil {
   352  				jww.ERROR.Println(err)
   353  			}
   354  		}
   355  
   356  	}
   357  	return nil
   358  }
   359  
   360  func (i *importCmd) copyJekyllFilesAndFolders(jekyllRoot, dest string, jekyllPostDirs map[string]bool) (err error) {
   361  	fi, err := os.Stat(jekyllRoot)
   362  	if err != nil {
   363  		return err
   364  	}
   365  	if !fi.IsDir() {
   366  		return errors.New(jekyllRoot + " is not a directory")
   367  	}
   368  	err = os.MkdirAll(dest, fi.Mode())
   369  	if err != nil {
   370  		return err
   371  	}
   372  	entries, err := ioutil.ReadDir(jekyllRoot)
   373  	for _, entry := range entries {
   374  		sfp := filepath.Join(jekyllRoot, entry.Name())
   375  		dfp := filepath.Join(dest, entry.Name())
   376  		if entry.IsDir() {
   377  			if entry.Name()[0] != '_' && entry.Name()[0] != '.' {
   378  				if _, ok := jekyllPostDirs[entry.Name()]; !ok {
   379  					err = copyDir(sfp, dfp)
   380  					if err != nil {
   381  						jww.ERROR.Println(err)
   382  					}
   383  				}
   384  			}
   385  		} else {
   386  			lowerEntryName := strings.ToLower(entry.Name())
   387  			exceptSuffix := []string{".md", ".markdown", ".html", ".htm",
   388  				".xml", ".textile", "rakefile", "gemfile", ".lock"}
   389  			isExcept := false
   390  			for _, suffix := range exceptSuffix {
   391  				if strings.HasSuffix(lowerEntryName, suffix) {
   392  					isExcept = true
   393  					break
   394  				}
   395  			}
   396  
   397  			if !isExcept && entry.Name()[0] != '.' && entry.Name()[0] != '_' {
   398  				err = copyFile(sfp, dfp)
   399  				if err != nil {
   400  					jww.ERROR.Println(err)
   401  				}
   402  			}
   403  		}
   404  
   405  	}
   406  	return nil
   407  }
   408  
   409  func parseJekyllFilename(filename string) (time.Time, string, error) {
   410  	re := regexp.MustCompile(`(\d+-\d+-\d+)-(.+)\..*`)
   411  	r := re.FindAllStringSubmatch(filename, -1)
   412  	if len(r) == 0 {
   413  		return time.Now(), "", errors.New("filename not match")
   414  	}
   415  
   416  	postDate, err := time.Parse("2006-1-2", r[0][1])
   417  	if err != nil {
   418  		return time.Now(), "", err
   419  	}
   420  
   421  	postName := r[0][2]
   422  
   423  	return postDate, postName, nil
   424  }
   425  
   426  func convertJekyllPost(s *hugolib.Site, path, relPath, targetDir string, draft bool) error {
   427  	jww.TRACE.Println("Converting", path)
   428  
   429  	filename := filepath.Base(path)
   430  	postDate, postName, err := parseJekyllFilename(filename)
   431  	if err != nil {
   432  		jww.WARN.Printf("Failed to parse filename '%s': %s. Skipping.", filename, err)
   433  		return nil
   434  	}
   435  
   436  	jww.TRACE.Println(filename, postDate, postName)
   437  
   438  	targetFile := filepath.Join(targetDir, relPath)
   439  	targetParentDir := filepath.Dir(targetFile)
   440  	os.MkdirAll(targetParentDir, 0777)
   441  
   442  	contentBytes, err := ioutil.ReadFile(path)
   443  	if err != nil {
   444  		jww.ERROR.Println("Read file error:", path)
   445  		return err
   446  	}
   447  
   448  	psr, err := parser.ReadFrom(bytes.NewReader(contentBytes))
   449  	if err != nil {
   450  		jww.ERROR.Println("Parse file error:", path)
   451  		return err
   452  	}
   453  
   454  	metadata, err := psr.Metadata()
   455  	if err != nil {
   456  		jww.ERROR.Println("Processing file error:", path)
   457  		return err
   458  	}
   459  
   460  	newmetadata, err := convertJekyllMetaData(metadata, postName, postDate, draft)
   461  	if err != nil {
   462  		jww.ERROR.Println("Convert metadata error:", path)
   463  		return err
   464  	}
   465  
   466  	jww.TRACE.Println(newmetadata)
   467  	content := convertJekyllContent(newmetadata, string(psr.Content()))
   468  
   469  	page, err := s.NewPage(filename)
   470  	if err != nil {
   471  		jww.ERROR.Println("New page error", filename)
   472  		return err
   473  	}
   474  
   475  	page.SetSourceContent([]byte(content))
   476  	page.SetSourceMetaData(newmetadata, parser.FormatToLeadRune("yaml"))
   477  	page.SaveSourceAs(targetFile)
   478  
   479  	jww.TRACE.Println("Target file:", targetFile)
   480  
   481  	return nil
   482  }
   483  
   484  func convertJekyllMetaData(m interface{}, postName string, postDate time.Time, draft bool) (interface{}, error) {
   485  	metadata, err := cast.ToStringMapE(m)
   486  	if err != nil {
   487  		return nil, err
   488  	}
   489  
   490  	if draft {
   491  		metadata["draft"] = true
   492  	}
   493  
   494  	for key, value := range metadata {
   495  		lowerKey := strings.ToLower(key)
   496  
   497  		switch lowerKey {
   498  		case "layout":
   499  			delete(metadata, key)
   500  		case "permalink":
   501  			if str, ok := value.(string); ok {
   502  				metadata["url"] = str
   503  			}
   504  			delete(metadata, key)
   505  		case "category":
   506  			if str, ok := value.(string); ok {
   507  				metadata["categories"] = []string{str}
   508  			}
   509  			delete(metadata, key)
   510  		case "excerpt_separator":
   511  			if key != lowerKey {
   512  				delete(metadata, key)
   513  				metadata[lowerKey] = value
   514  			}
   515  		case "date":
   516  			if str, ok := value.(string); ok {
   517  				re := regexp.MustCompile(`(\d+):(\d+):(\d+)`)
   518  				r := re.FindAllStringSubmatch(str, -1)
   519  				if len(r) > 0 {
   520  					hour, _ := strconv.Atoi(r[0][1])
   521  					minute, _ := strconv.Atoi(r[0][2])
   522  					second, _ := strconv.Atoi(r[0][3])
   523  					postDate = time.Date(postDate.Year(), postDate.Month(), postDate.Day(), hour, minute, second, 0, time.UTC)
   524  				}
   525  			}
   526  			delete(metadata, key)
   527  		}
   528  
   529  	}
   530  
   531  	metadata["date"] = postDate.Format(time.RFC3339)
   532  
   533  	return metadata, nil
   534  }
   535  
   536  func convertJekyllContent(m interface{}, content string) string {
   537  	metadata, _ := cast.ToStringMapE(m)
   538  
   539  	lines := strings.Split(content, "\n")
   540  	var resultLines []string
   541  	for _, line := range lines {
   542  		resultLines = append(resultLines, strings.Trim(line, "\r\n"))
   543  	}
   544  
   545  	content = strings.Join(resultLines, "\n")
   546  
   547  	excerptSep := "<!--more-->"
   548  	if value, ok := metadata["excerpt_separator"]; ok {
   549  		if str, strOk := value.(string); strOk {
   550  			content = strings.Replace(content, strings.TrimSpace(str), excerptSep, -1)
   551  		}
   552  	}
   553  
   554  	replaceList := []struct {
   555  		re      *regexp.Regexp
   556  		replace string
   557  	}{
   558  		{regexp.MustCompile("(?i)<!-- more -->"), "<!--more-->"},
   559  		{regexp.MustCompile(`\{%\s*raw\s*%\}\s*(.*?)\s*\{%\s*endraw\s*%\}`), "$1"},
   560  		{regexp.MustCompile(`{%\s*highlight\s*(.*?)\s*%}`), "{{< highlight $1 >}}"},
   561  		{regexp.MustCompile(`{%\s*endhighlight\s*%}`), "{{< / highlight >}}"},
   562  	}
   563  
   564  	for _, replace := range replaceList {
   565  		content = replace.re.ReplaceAllString(content, replace.replace)
   566  	}
   567  
   568  	replaceListFunc := []struct {
   569  		re      *regexp.Regexp
   570  		replace func(string) string
   571  	}{
   572  		// Octopress image tag: http://octopress.org/docs/plugins/image-tag/
   573  		{regexp.MustCompile(`{%\s+img\s*(.*?)\s*%}`), replaceImageTag},
   574  	}
   575  
   576  	for _, replace := range replaceListFunc {
   577  		content = replace.re.ReplaceAllStringFunc(content, replace.replace)
   578  	}
   579  
   580  	return content
   581  }
   582  
   583  func replaceImageTag(match string) string {
   584  	r := regexp.MustCompile(`{%\s+img\s*(\p{L}*)\s+([\S]*/[\S]+)\s+(\d*)\s*(\d*)\s*(.*?)\s*%}`)
   585  	result := bytes.NewBufferString("{{< figure ")
   586  	parts := r.FindStringSubmatch(match)
   587  	// Index 0 is the entire string, ignore
   588  	replaceOptionalPart(result, "class", parts[1])
   589  	replaceOptionalPart(result, "src", parts[2])
   590  	replaceOptionalPart(result, "width", parts[3])
   591  	replaceOptionalPart(result, "height", parts[4])
   592  	// title + alt
   593  	part := parts[5]
   594  	if len(part) > 0 {
   595  		splits := strings.Split(part, "'")
   596  		lenSplits := len(splits)
   597  		if lenSplits == 1 {
   598  			replaceOptionalPart(result, "title", splits[0])
   599  		} else if lenSplits == 3 {
   600  			replaceOptionalPart(result, "title", splits[1])
   601  		} else if lenSplits == 5 {
   602  			replaceOptionalPart(result, "title", splits[1])
   603  			replaceOptionalPart(result, "alt", splits[3])
   604  		}
   605  	}
   606  	result.WriteString(">}}")
   607  	return result.String()
   608  
   609  }
   610  func replaceOptionalPart(buffer *bytes.Buffer, partName string, part string) {
   611  	if len(part) > 0 {
   612  		buffer.WriteString(partName + "=\"" + part + "\" ")
   613  	}
   614  }