github.com/linchen2chris/hugo@v0.0.0-20230307053224-cec209389705/markup/asciidocext/convert.go (about)

     1  // Copyright 2020 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  // Package asciidocext converts AsciiDoc to HTML using Asciidoctor
    15  // external binary. The `asciidoc` module is reserved for a future golang
    16  // implementation.
    17  package asciidocext
    18  
    19  import (
    20  	"bytes"
    21  	"path/filepath"
    22  	"strings"
    23  
    24  	"github.com/gohugoio/hugo/common/hexec"
    25  	"github.com/gohugoio/hugo/htesting"
    26  
    27  	"github.com/gohugoio/hugo/identity"
    28  	"github.com/gohugoio/hugo/markup/asciidocext/asciidocext_config"
    29  	"github.com/gohugoio/hugo/markup/converter"
    30  	"github.com/gohugoio/hugo/markup/internal"
    31  	"github.com/gohugoio/hugo/markup/tableofcontents"
    32  	"golang.org/x/net/html"
    33  )
    34  
    35  /* ToDo: RelPermalink patch for svg posts not working*/
    36  type pageSubset interface {
    37  	RelPermalink() string
    38  }
    39  
    40  // Provider is the package entry point.
    41  var Provider converter.ProviderProvider = provider{}
    42  
    43  type provider struct{}
    44  
    45  func (p provider) New(cfg converter.ProviderConfig) (converter.Provider, error) {
    46  	return converter.NewProvider("asciidocext", func(ctx converter.DocumentContext) (converter.Converter, error) {
    47  		return &asciidocConverter{
    48  			ctx: ctx,
    49  			cfg: cfg,
    50  		}, nil
    51  	}), nil
    52  }
    53  
    54  type asciidocResult struct {
    55  	converter.ResultRender
    56  	toc *tableofcontents.Fragments
    57  }
    58  
    59  func (r asciidocResult) TableOfContents() *tableofcontents.Fragments {
    60  	return r.toc
    61  }
    62  
    63  type asciidocConverter struct {
    64  	ctx converter.DocumentContext
    65  	cfg converter.ProviderConfig
    66  }
    67  
    68  func (a *asciidocConverter) Convert(ctx converter.RenderContext) (converter.ResultRender, error) {
    69  	b, err := a.getAsciidocContent(ctx.Src, a.ctx)
    70  	if err != nil {
    71  		return nil, err
    72  	}
    73  	content, toc, err := a.extractTOC(b)
    74  	if err != nil {
    75  		return nil, err
    76  	}
    77  	return asciidocResult{
    78  		ResultRender: converter.Bytes(content),
    79  		toc:          toc,
    80  	}, nil
    81  }
    82  
    83  func (a *asciidocConverter) Supports(_ identity.Identity) bool {
    84  	return false
    85  }
    86  
    87  // getAsciidocContent calls asciidoctor as an external helper
    88  // to convert AsciiDoc content to HTML.
    89  func (a *asciidocConverter) getAsciidocContent(src []byte, ctx converter.DocumentContext) ([]byte, error) {
    90  	if !hasAsciiDoc() {
    91  		a.cfg.Logger.Errorln("asciidoctor not found in $PATH: Please install.\n",
    92  			"                 Leaving AsciiDoc content unrendered.")
    93  		return src, nil
    94  	}
    95  
    96  	args := a.parseArgs(ctx)
    97  	args = append(args, "-")
    98  
    99  	a.cfg.Logger.Infoln("Rendering", ctx.DocumentName, " using asciidoctor args", args, "...")
   100  
   101  	return internal.ExternallyRenderContent(a.cfg, ctx, src, asciiDocBinaryName, args)
   102  }
   103  
   104  func (a *asciidocConverter) parseArgs(ctx converter.DocumentContext) []string {
   105  	cfg := a.cfg.MarkupConfig.AsciidocExt
   106  	args := []string{}
   107  
   108  	args = a.appendArg(args, "-b", cfg.Backend, asciidocext_config.CliDefault.Backend, asciidocext_config.AllowedBackend)
   109  
   110  	for _, extension := range cfg.Extensions {
   111  		if strings.LastIndexAny(extension, `\/.`) > -1 {
   112  			a.cfg.Logger.Errorln("Unsupported asciidoctor extension was passed in. Extension `" + extension + "` ignored. Only installed asciidoctor extensions are allowed.")
   113  			continue
   114  		}
   115  		args = append(args, "-r", extension)
   116  	}
   117  
   118  	for attributeKey, attributeValue := range cfg.Attributes {
   119  		if asciidocext_config.DisallowedAttributes[attributeKey] {
   120  			a.cfg.Logger.Errorln("Unsupported asciidoctor attribute was passed in. Attribute `" + attributeKey + "` ignored.")
   121  			continue
   122  		}
   123  
   124  		args = append(args, "-a", attributeKey+"="+attributeValue)
   125  	}
   126  
   127  	if cfg.WorkingFolderCurrent {
   128  		contentDir := filepath.Dir(ctx.Filename)
   129  		sourceDir := a.cfg.Cfg.GetString("source")
   130  		destinationDir := a.cfg.Cfg.GetString("destination")
   131  
   132  		if destinationDir == "" {
   133  			a.cfg.Logger.Errorln("markup.asciidocext.workingFolderCurrent requires hugo command option --destination to be set")
   134  		}
   135  		if !filepath.IsAbs(destinationDir) && sourceDir != "" {
   136  			destinationDir = filepath.Join(sourceDir, destinationDir)
   137  		}
   138  
   139  		var outDir string
   140  		var err error
   141  
   142  		file := filepath.Base(ctx.Filename)
   143  		if a.cfg.Cfg.GetBool("uglyUrls") || file == "_index.adoc" || file == "index.adoc" {
   144  			outDir, err = filepath.Abs(filepath.Dir(filepath.Join(destinationDir, ctx.DocumentName)))
   145  		} else {
   146  			postDir := ""
   147  			page, ok := ctx.Document.(pageSubset)
   148  			if ok {
   149  				postDir = filepath.Base(page.RelPermalink())
   150  			} else {
   151  				a.cfg.Logger.Errorln("unable to cast interface to pageSubset")
   152  			}
   153  
   154  			outDir, err = filepath.Abs(filepath.Join(destinationDir, filepath.Dir(ctx.DocumentName), postDir))
   155  		}
   156  
   157  		if err != nil {
   158  			a.cfg.Logger.Errorln("asciidoctor outDir: ", err)
   159  		}
   160  
   161  		args = append(args, "--base-dir", contentDir, "-a", "outdir="+outDir)
   162  	}
   163  
   164  	if cfg.NoHeaderOrFooter {
   165  		args = append(args, "--no-header-footer")
   166  	} else {
   167  		a.cfg.Logger.Warnln("asciidoctor parameter NoHeaderOrFooter is expected for correct html rendering")
   168  	}
   169  
   170  	if cfg.SectionNumbers {
   171  		args = append(args, "--section-numbers")
   172  	}
   173  
   174  	if cfg.Verbose {
   175  		args = append(args, "--verbose")
   176  	}
   177  
   178  	if cfg.Trace {
   179  		args = append(args, "--trace")
   180  	}
   181  
   182  	args = a.appendArg(args, "--failure-level", cfg.FailureLevel, asciidocext_config.CliDefault.FailureLevel, asciidocext_config.AllowedFailureLevel)
   183  
   184  	args = a.appendArg(args, "--safe-mode", cfg.SafeMode, asciidocext_config.CliDefault.SafeMode, asciidocext_config.AllowedSafeMode)
   185  
   186  	return args
   187  }
   188  
   189  func (a *asciidocConverter) appendArg(args []string, option, value, defaultValue string, allowedValues map[string]bool) []string {
   190  	if value != defaultValue {
   191  		if allowedValues[value] {
   192  			args = append(args, option, value)
   193  		} else {
   194  			a.cfg.Logger.Errorln("Unsupported asciidoctor value `" + value + "` for option " + option + " was passed in and will be ignored.")
   195  		}
   196  	}
   197  	return args
   198  }
   199  
   200  const asciiDocBinaryName = "asciidoctor"
   201  
   202  func hasAsciiDoc() bool {
   203  	return hexec.InPath(asciiDocBinaryName)
   204  }
   205  
   206  // extractTOC extracts the toc from the given src html.
   207  // It returns the html without the TOC, and the TOC data
   208  func (a *asciidocConverter) extractTOC(src []byte) ([]byte, *tableofcontents.Fragments, error) {
   209  	var buf bytes.Buffer
   210  	buf.Write(src)
   211  	node, err := html.Parse(&buf)
   212  	if err != nil {
   213  		return nil, nil, err
   214  	}
   215  	var (
   216  		f       func(*html.Node) bool
   217  		toc     *tableofcontents.Fragments
   218  		toVisit []*html.Node
   219  	)
   220  	f = func(n *html.Node) bool {
   221  		if n.Type == html.ElementNode && n.Data == "div" && attr(n, "id") == "toc" {
   222  			toc = parseTOC(n)
   223  			if !a.cfg.MarkupConfig.AsciidocExt.PreserveTOC {
   224  				n.Parent.RemoveChild(n)
   225  			}
   226  			return true
   227  		}
   228  		if n.FirstChild != nil {
   229  			toVisit = append(toVisit, n.FirstChild)
   230  		}
   231  		if n.NextSibling != nil && f(n.NextSibling) {
   232  			return true
   233  		}
   234  		for len(toVisit) > 0 {
   235  			nv := toVisit[0]
   236  			toVisit = toVisit[1:]
   237  			if f(nv) {
   238  				return true
   239  			}
   240  		}
   241  		return false
   242  	}
   243  	f(node)
   244  	if err != nil {
   245  		return nil, nil, err
   246  	}
   247  	buf.Reset()
   248  	err = html.Render(&buf, node)
   249  	if err != nil {
   250  		return nil, nil, err
   251  	}
   252  	// ltrim <html><head></head><body> and rtrim </body></html> which are added by html.Render
   253  	res := buf.Bytes()[25:]
   254  	res = res[:len(res)-14]
   255  	return res, toc, nil
   256  }
   257  
   258  // parseTOC returns a TOC root from the given toc Node
   259  func parseTOC(doc *html.Node) *tableofcontents.Fragments {
   260  	var (
   261  		toc tableofcontents.Builder
   262  		f   func(*html.Node, int, int)
   263  	)
   264  	f = func(n *html.Node, row, level int) {
   265  		if n.Type == html.ElementNode {
   266  			switch n.Data {
   267  			case "ul":
   268  				if level == 0 {
   269  					row++
   270  				}
   271  				level++
   272  				f(n.FirstChild, row, level)
   273  			case "li":
   274  				for c := n.FirstChild; c != nil; c = c.NextSibling {
   275  					if c.Type != html.ElementNode || c.Data != "a" {
   276  						continue
   277  					}
   278  					href := attr(c, "href")[1:]
   279  					toc.AddAt(&tableofcontents.Heading{
   280  						Title: nodeContent(c),
   281  						ID:    href,
   282  					}, row, level)
   283  				}
   284  				f(n.FirstChild, row, level)
   285  			}
   286  		}
   287  		if n.NextSibling != nil {
   288  			f(n.NextSibling, row, level)
   289  		}
   290  	}
   291  	f(doc.FirstChild, -1, 0)
   292  	return toc.Build()
   293  }
   294  
   295  func attr(node *html.Node, key string) string {
   296  	for _, a := range node.Attr {
   297  		if a.Key == key {
   298  			return a.Val
   299  		}
   300  	}
   301  	return ""
   302  }
   303  
   304  func nodeContent(node *html.Node) string {
   305  	var buf bytes.Buffer
   306  	for c := node.FirstChild; c != nil; c = c.NextSibling {
   307  		html.Render(&buf, c)
   308  	}
   309  	return buf.String()
   310  }
   311  
   312  // Supports returns whether Asciidoctor is installed on this computer.
   313  func Supports() bool {
   314  	hasBin := hasAsciiDoc()
   315  	if htesting.SupportsAll() {
   316  		if !hasBin {
   317  			panic("asciidoctor not installed")
   318  		}
   319  		return true
   320  	}
   321  	return hasBin
   322  }