github.com/jhump/golang-x-tools@v0.0.0-20220218190644-4958d6d39439/cmd/present2md/main.go (about)

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Present2md converts legacy-syntax present files to Markdown-syntax present files.
     6  //
     7  // Usage:
     8  //
     9  //	present2md [-w] [file ...]
    10  //
    11  // By default, present2md prints the Markdown-syntax form of each input file to standard output.
    12  // If no input file is listed, standard input is used.
    13  //
    14  // The -w flag causes present2md to update the files in place, overwriting each with its
    15  // Markdown-syntax equivalent.
    16  //
    17  // Examples
    18  //
    19  //	present2md your.article
    20  //	present2md -w *.article
    21  //
    22  package main
    23  
    24  import (
    25  	"bytes"
    26  	"flag"
    27  	"fmt"
    28  	"io"
    29  	"io/ioutil"
    30  	"log"
    31  	"net/url"
    32  	"os"
    33  	"strings"
    34  	"unicode"
    35  	"unicode/utf8"
    36  
    37  	"github.com/jhump/golang-x-tools/present"
    38  )
    39  
    40  func usage() {
    41  	fmt.Fprintf(os.Stderr, "usage: present2md [-w] [file ...]\n")
    42  	os.Exit(2)
    43  }
    44  
    45  var (
    46  	writeBack  = flag.Bool("w", false, "write conversions back to original files")
    47  	exitStatus = 0
    48  )
    49  
    50  func main() {
    51  	log.SetPrefix("present2md: ")
    52  	log.SetFlags(0)
    53  	flag.Usage = usage
    54  	flag.Parse()
    55  
    56  	args := flag.Args()
    57  	if len(args) == 0 {
    58  		if *writeBack {
    59  			log.Fatalf("cannot use -w with standard input")
    60  		}
    61  		convert(os.Stdin, "stdin", false)
    62  		return
    63  	}
    64  
    65  	for _, arg := range args {
    66  		f, err := os.Open(arg)
    67  		if err != nil {
    68  			log.Print(err)
    69  			exitStatus = 1
    70  			continue
    71  		}
    72  		err = convert(f, arg, *writeBack)
    73  		f.Close()
    74  		if err != nil {
    75  			log.Print(err)
    76  			exitStatus = 1
    77  		}
    78  	}
    79  	os.Exit(exitStatus)
    80  }
    81  
    82  // convert reads the data from r, parses it as legacy present,
    83  // and converts it to Markdown-enabled present.
    84  // If any errors occur, the data is reported as coming from file.
    85  // If writeBack is true, the converted version is written back to file.
    86  // If writeBack is false, the converted version is printed to standard output.
    87  func convert(r io.Reader, file string, writeBack bool) error {
    88  	data, err := ioutil.ReadAll(r)
    89  	if err != nil {
    90  		return err
    91  	}
    92  	if bytes.HasPrefix(data, []byte("# ")) {
    93  		return fmt.Errorf("%v: already markdown", file)
    94  	}
    95  
    96  	// Convert all comments before parsing the document.
    97  	// The '//' comment is treated as normal text and so
    98  	// is passed through the translation unaltered.
    99  	data = bytes.Replace(data, []byte("\n#"), []byte("\n//"), -1)
   100  
   101  	doc, err := present.Parse(bytes.NewReader(data), file, 0)
   102  	if err != nil {
   103  		return err
   104  	}
   105  
   106  	// Title and Subtitle, Time, Tags.
   107  	var md bytes.Buffer
   108  	fmt.Fprintf(&md, "# %s\n", doc.Title)
   109  	if doc.Subtitle != "" {
   110  		fmt.Fprintf(&md, "%s\n", doc.Subtitle)
   111  	}
   112  	if !doc.Time.IsZero() {
   113  		fmt.Fprintf(&md, "%s\n", doc.Time.Format("2 Jan 2006"))
   114  	}
   115  	if len(doc.Tags) > 0 {
   116  		fmt.Fprintf(&md, "Tags: %s\n", strings.Join(doc.Tags, ", "))
   117  	}
   118  
   119  	// Summary, defaulting to first paragraph of section.
   120  	// (Summaries must be explicit for Markdown-enabled present,
   121  	// and the expectation is that they will be shorter than the
   122  	// whole first paragraph. But this is what the blog does today.)
   123  	if strings.HasSuffix(file, ".article") && len(doc.Sections) > 0 {
   124  		for _, elem := range doc.Sections[0].Elem {
   125  			text, ok := elem.(present.Text)
   126  			if !ok || text.Pre {
   127  				// skip everything but non-text elements
   128  				continue
   129  			}
   130  			fmt.Fprintf(&md, "Summary:")
   131  			for i, line := range text.Lines {
   132  				fmt.Fprintf(&md, " ")
   133  				printStyled(&md, line, i == 0)
   134  			}
   135  			fmt.Fprintf(&md, "\n")
   136  			break
   137  		}
   138  	}
   139  
   140  	// Authors
   141  	for _, a := range doc.Authors {
   142  		fmt.Fprintf(&md, "\n")
   143  		for _, elem := range a.Elem {
   144  			switch elem := elem.(type) {
   145  			default:
   146  				// Can only happen if this type switch is incomplete, which is a bug.
   147  				log.Fatalf("%s: unexpected author type %T", file, elem)
   148  			case present.Text:
   149  				for _, line := range elem.Lines {
   150  					fmt.Fprintf(&md, "%s\n", markdownEscape(line, true))
   151  				}
   152  			case present.Link:
   153  				fmt.Fprintf(&md, "%s\n", markdownEscape(elem.Label, true))
   154  			}
   155  		}
   156  	}
   157  
   158  	// Invariant: the output ends in non-blank line now,
   159  	// and after printing any piece of the file below,
   160  	// the output should still end in a non-blank line.
   161  	// If a blank line separator is needed, it should be printed
   162  	// before the block that needs separating, not after.
   163  
   164  	if len(doc.TitleNotes) > 0 {
   165  		fmt.Fprintf(&md, "\n")
   166  		for _, line := range doc.TitleNotes {
   167  			fmt.Fprintf(&md, ": %s\n", line)
   168  		}
   169  	}
   170  
   171  	if len(doc.Sections) == 1 && strings.HasSuffix(file, ".article") {
   172  		// Blog drops section headers when there is only one section.
   173  		// Don't print a title in this case, to make clear that it's being dropped.
   174  		fmt.Fprintf(&md, "\n##\n")
   175  		printSectionBody(file, 1, &md, doc.Sections[0].Elem)
   176  	} else {
   177  		for _, s := range doc.Sections {
   178  			fmt.Fprintf(&md, "\n")
   179  			fmt.Fprintf(&md, "## %s\n", markdownEscape(s.Title, false))
   180  			printSectionBody(file, 1, &md, s.Elem)
   181  		}
   182  	}
   183  
   184  	if !writeBack {
   185  		os.Stdout.Write(md.Bytes())
   186  		return nil
   187  	}
   188  	return ioutil.WriteFile(file, md.Bytes(), 0666)
   189  }
   190  
   191  func printSectionBody(file string, depth int, w *bytes.Buffer, elems []present.Elem) {
   192  	for _, elem := range elems {
   193  		switch elem := elem.(type) {
   194  		default:
   195  			// Can only happen if this type switch is incomplete, which is a bug.
   196  			log.Fatalf("%s: unexpected present element type %T", file, elem)
   197  
   198  		case present.Text:
   199  			fmt.Fprintf(w, "\n")
   200  			lines := elem.Lines
   201  			for len(lines) > 0 && lines[0] == "" {
   202  				lines = lines[1:]
   203  			}
   204  			if elem.Pre {
   205  				for _, line := range strings.Split(strings.TrimRight(elem.Raw, "\n"), "\n") {
   206  					if line == "" {
   207  						fmt.Fprintf(w, "\n")
   208  					} else {
   209  						fmt.Fprintf(w, "\t%s\n", line)
   210  					}
   211  				}
   212  			} else {
   213  				for _, line := range elem.Lines {
   214  					printStyled(w, line, true)
   215  					fmt.Fprintf(w, "\n")
   216  				}
   217  			}
   218  
   219  		case present.List:
   220  			fmt.Fprintf(w, "\n")
   221  			for _, item := range elem.Bullet {
   222  				fmt.Fprintf(w, "  - ")
   223  				for i, line := range strings.Split(item, "\n") {
   224  					if i > 0 {
   225  						fmt.Fprintf(w, "    ")
   226  					}
   227  					printStyled(w, line, false)
   228  					fmt.Fprintf(w, "\n")
   229  				}
   230  			}
   231  
   232  		case present.Section:
   233  			fmt.Fprintf(w, "\n")
   234  			sep := " "
   235  			if elem.Title == "" {
   236  				sep = ""
   237  			}
   238  			fmt.Fprintf(w, "%s%s%s\n", strings.Repeat("#", depth+2), sep, markdownEscape(elem.Title, false))
   239  			printSectionBody(file, depth+1, w, elem.Elem)
   240  
   241  		case interface{ PresentCmd() string }:
   242  			// If there are multiple present commands in a row, don't print a blank line before the second etc.
   243  			b := w.Bytes()
   244  			sep := "\n"
   245  			if len(b) > 0 {
   246  				i := bytes.LastIndexByte(b[:len(b)-1], '\n')
   247  				if b[i+1] == '.' {
   248  					sep = ""
   249  				}
   250  			}
   251  			fmt.Fprintf(w, "%s%s\n", sep, elem.PresentCmd())
   252  		}
   253  	}
   254  }
   255  
   256  func markdownEscape(s string, startLine bool) string {
   257  	var b strings.Builder
   258  	for i, r := range s {
   259  		switch {
   260  		case r == '#' && i == 0,
   261  			r == '*',
   262  			r == '_',
   263  			r == '<' && (i == 0 || s[i-1] != ' ') && i+1 < len(s) && s[i+1] != ' ',
   264  			r == '[' && strings.Contains(s[i:], "]("):
   265  			b.WriteRune('\\')
   266  		}
   267  		b.WriteRune(r)
   268  	}
   269  	return b.String()
   270  }
   271  
   272  // Copy of ../../present/style.go adjusted to produce Markdown instead of HTML.
   273  
   274  /*
   275  	Fonts are demarcated by an initial and final char bracketing a
   276  	space-delimited word, plus possibly some terminal punctuation.
   277  	The chars are
   278  		_ for italic
   279  		* for bold
   280  		` (back quote) for fixed width.
   281  	Inner appearances of the char become spaces. For instance,
   282  		_this_is_italic_!
   283  	becomes
   284  		<i>this is italic</i>!
   285  */
   286  
   287  func printStyled(w *bytes.Buffer, text string, startLine bool) {
   288  	w.WriteString(font(text, startLine))
   289  }
   290  
   291  // font returns s with font indicators turned into HTML font tags.
   292  func font(s string, startLine bool) string {
   293  	if !strings.ContainsAny(s, "[`_*") {
   294  		return markdownEscape(s, startLine)
   295  	}
   296  	words := split(s)
   297  	var b bytes.Buffer
   298  Word:
   299  	for w, word := range words {
   300  		words[w] = markdownEscape(word, startLine && w == 0) // for all the continue Word
   301  		if len(word) < 2 {
   302  			continue Word
   303  		}
   304  		if link, _ := parseInlineLink(word); link != "" {
   305  			words[w] = link
   306  			continue Word
   307  		}
   308  		const marker = "_*`"
   309  		// Initial punctuation is OK but must be peeled off.
   310  		first := strings.IndexAny(word, marker)
   311  		if first == -1 {
   312  			continue Word
   313  		}
   314  		// Opening marker must be at the beginning of the token or else preceded by punctuation.
   315  		if first != 0 {
   316  			r, _ := utf8.DecodeLastRuneInString(word[:first])
   317  			if !unicode.IsPunct(r) {
   318  				continue Word
   319  			}
   320  		}
   321  		open, word := markdownEscape(word[:first], startLine && w == 0), word[first:]
   322  		char := word[0] // ASCII is OK.
   323  		close := ""
   324  		switch char {
   325  		default:
   326  			continue Word
   327  		case '_':
   328  			open += "_"
   329  			close = "_"
   330  		case '*':
   331  			open += "**"
   332  			close = "**"
   333  		case '`':
   334  			open += "`"
   335  			close = "`"
   336  		}
   337  		// Closing marker must be at the end of the token or else followed by punctuation.
   338  		last := strings.LastIndex(word, word[:1])
   339  		if last == 0 {
   340  			continue Word
   341  		}
   342  		if last+1 != len(word) {
   343  			r, _ := utf8.DecodeRuneInString(word[last+1:])
   344  			if !unicode.IsPunct(r) {
   345  				continue Word
   346  			}
   347  		}
   348  		head, tail := word[:last+1], word[last+1:]
   349  		b.Reset()
   350  		var wid int
   351  		for i := 1; i < len(head)-1; i += wid {
   352  			var r rune
   353  			r, wid = utf8.DecodeRuneInString(head[i:])
   354  			if r != rune(char) {
   355  				// Ordinary character.
   356  				b.WriteRune(r)
   357  				continue
   358  			}
   359  			if head[i+1] != char {
   360  				// Inner char becomes space.
   361  				b.WriteRune(' ')
   362  				continue
   363  			}
   364  			// Doubled char becomes real char.
   365  			// Not worth worrying about "_x__".
   366  			b.WriteByte(char)
   367  			wid++ // Consumed two chars, both ASCII.
   368  		}
   369  		text := b.String()
   370  		if close == "`" {
   371  			for strings.Contains(text, close) {
   372  				open += "`"
   373  				close += "`"
   374  			}
   375  		} else {
   376  			text = markdownEscape(text, false)
   377  		}
   378  		words[w] = open + text + close + tail
   379  	}
   380  	return strings.Join(words, "")
   381  }
   382  
   383  // split is like strings.Fields but also returns the runs of spaces
   384  // and treats inline links as distinct words.
   385  func split(s string) []string {
   386  	var (
   387  		words = make([]string, 0, 10)
   388  		start = 0
   389  	)
   390  
   391  	// appendWord appends the string s[start:end] to the words slice.
   392  	// If the word contains the beginning of a link, the non-link portion
   393  	// of the word and the entire link are appended as separate words,
   394  	// and the start index is advanced to the end of the link.
   395  	appendWord := func(end int) {
   396  		if j := strings.Index(s[start:end], "[["); j > -1 {
   397  			if _, l := parseInlineLink(s[start+j:]); l > 0 {
   398  				// Append portion before link, if any.
   399  				if j > 0 {
   400  					words = append(words, s[start:start+j])
   401  				}
   402  				// Append link itself.
   403  				words = append(words, s[start+j:start+j+l])
   404  				// Advance start index to end of link.
   405  				start = start + j + l
   406  				return
   407  			}
   408  		}
   409  		// No link; just add the word.
   410  		words = append(words, s[start:end])
   411  		start = end
   412  	}
   413  
   414  	wasSpace := false
   415  	for i, r := range s {
   416  		isSpace := unicode.IsSpace(r)
   417  		if i > start && isSpace != wasSpace {
   418  			appendWord(i)
   419  		}
   420  		wasSpace = isSpace
   421  	}
   422  	for start < len(s) {
   423  		appendWord(len(s))
   424  	}
   425  	return words
   426  }
   427  
   428  // parseInlineLink parses an inline link at the start of s, and returns
   429  // a rendered Markdown link and the total length of the raw inline link.
   430  // If no inline link is present, it returns all zeroes.
   431  func parseInlineLink(s string) (link string, length int) {
   432  	if !strings.HasPrefix(s, "[[") {
   433  		return
   434  	}
   435  	end := strings.Index(s, "]]")
   436  	if end == -1 {
   437  		return
   438  	}
   439  	urlEnd := strings.Index(s, "]")
   440  	rawURL := s[2:urlEnd]
   441  	const badURLChars = `<>"{}|\^[] ` + "`" // per RFC2396 section 2.4.3
   442  	if strings.ContainsAny(rawURL, badURLChars) {
   443  		return
   444  	}
   445  	if urlEnd == end {
   446  		simpleURL := ""
   447  		url, err := url.Parse(rawURL)
   448  		if err == nil {
   449  			// If the URL is http://foo.com, drop the http://
   450  			// In other words, render [[http://golang.org]] as:
   451  			//   <a href="http://golang.org">golang.org</a>
   452  			if strings.HasPrefix(rawURL, url.Scheme+"://") {
   453  				simpleURL = strings.TrimPrefix(rawURL, url.Scheme+"://")
   454  			} else if strings.HasPrefix(rawURL, url.Scheme+":") {
   455  				simpleURL = strings.TrimPrefix(rawURL, url.Scheme+":")
   456  			}
   457  		}
   458  		return renderLink(rawURL, simpleURL), end + 2
   459  	}
   460  	if s[urlEnd:urlEnd+2] != "][" {
   461  		return
   462  	}
   463  	text := s[urlEnd+2 : end]
   464  	return renderLink(rawURL, text), end + 2
   465  }
   466  
   467  func renderLink(href, text string) string {
   468  	text = font(text, false)
   469  	if text == "" {
   470  		text = markdownEscape(href, false)
   471  	}
   472  	return "[" + text + "](" + href + ")"
   473  }