github.com/cockroachdb/tools@v0.0.0-20230222021103-a6d27438930d/cmd/present2md/main.go (about) 1 // Copyright 2020 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Present2md converts legacy-syntax present files to Markdown-syntax present files. 6 // 7 // Usage: 8 // 9 // present2md [-w] [file ...] 10 // 11 // By default, present2md prints the Markdown-syntax form of each input file to standard output. 12 // If no input file is listed, standard input is used. 13 // 14 // The -w flag causes present2md to update the files in place, overwriting each with its 15 // Markdown-syntax equivalent. 16 // 17 // Examples 18 // 19 // present2md your.article 20 // present2md -w *.article 21 package main 22 23 import ( 24 "bytes" 25 "flag" 26 "fmt" 27 "io" 28 "io/ioutil" 29 "log" 30 "net/url" 31 "os" 32 "strings" 33 "unicode" 34 "unicode/utf8" 35 36 "golang.org/x/tools/present" 37 ) 38 39 func usage() { 40 fmt.Fprintf(os.Stderr, "usage: present2md [-w] [file ...]\n") 41 os.Exit(2) 42 } 43 44 var ( 45 writeBack = flag.Bool("w", false, "write conversions back to original files") 46 exitStatus = 0 47 ) 48 49 func main() { 50 log.SetPrefix("present2md: ") 51 log.SetFlags(0) 52 flag.Usage = usage 53 flag.Parse() 54 55 args := flag.Args() 56 if len(args) == 0 { 57 if *writeBack { 58 log.Fatalf("cannot use -w with standard input") 59 } 60 convert(os.Stdin, "stdin", false) 61 return 62 } 63 64 for _, arg := range args { 65 f, err := os.Open(arg) 66 if err != nil { 67 log.Print(err) 68 exitStatus = 1 69 continue 70 } 71 err = convert(f, arg, *writeBack) 72 f.Close() 73 if err != nil { 74 log.Print(err) 75 exitStatus = 1 76 } 77 } 78 os.Exit(exitStatus) 79 } 80 81 // convert reads the data from r, parses it as legacy present, 82 // and converts it to Markdown-enabled present. 83 // If any errors occur, the data is reported as coming from file. 84 // If writeBack is true, the converted version is written back to file. 85 // If writeBack is false, the converted version is printed to standard output. 86 func convert(r io.Reader, file string, writeBack bool) error { 87 data, err := ioutil.ReadAll(r) 88 if err != nil { 89 return err 90 } 91 if bytes.HasPrefix(data, []byte("# ")) { 92 return fmt.Errorf("%v: already markdown", file) 93 } 94 95 // Convert all comments before parsing the document. 96 // The '//' comment is treated as normal text and so 97 // is passed through the translation unaltered. 98 data = bytes.Replace(data, []byte("\n#"), []byte("\n//"), -1) 99 100 doc, err := present.Parse(bytes.NewReader(data), file, 0) 101 if err != nil { 102 return err 103 } 104 105 // Title and Subtitle, Time, Tags. 106 var md bytes.Buffer 107 fmt.Fprintf(&md, "# %s\n", doc.Title) 108 if doc.Subtitle != "" { 109 fmt.Fprintf(&md, "%s\n", doc.Subtitle) 110 } 111 if !doc.Time.IsZero() { 112 fmt.Fprintf(&md, "%s\n", doc.Time.Format("2 Jan 2006")) 113 } 114 if len(doc.Tags) > 0 { 115 fmt.Fprintf(&md, "Tags: %s\n", strings.Join(doc.Tags, ", ")) 116 } 117 118 // Summary, defaulting to first paragraph of section. 119 // (Summaries must be explicit for Markdown-enabled present, 120 // and the expectation is that they will be shorter than the 121 // whole first paragraph. But this is what the blog does today.) 122 if strings.HasSuffix(file, ".article") && len(doc.Sections) > 0 { 123 for _, elem := range doc.Sections[0].Elem { 124 text, ok := elem.(present.Text) 125 if !ok || text.Pre { 126 // skip everything but non-text elements 127 continue 128 } 129 fmt.Fprintf(&md, "Summary:") 130 for i, line := range text.Lines { 131 fmt.Fprintf(&md, " ") 132 printStyled(&md, line, i == 0) 133 } 134 fmt.Fprintf(&md, "\n") 135 break 136 } 137 } 138 139 // Authors 140 for _, a := range doc.Authors { 141 fmt.Fprintf(&md, "\n") 142 for _, elem := range a.Elem { 143 switch elem := elem.(type) { 144 default: 145 // Can only happen if this type switch is incomplete, which is a bug. 146 log.Fatalf("%s: unexpected author type %T", file, elem) 147 case present.Text: 148 for _, line := range elem.Lines { 149 fmt.Fprintf(&md, "%s\n", markdownEscape(line, true)) 150 } 151 case present.Link: 152 fmt.Fprintf(&md, "%s\n", markdownEscape(elem.Label, true)) 153 } 154 } 155 } 156 157 // Invariant: the output ends in non-blank line now, 158 // and after printing any piece of the file below, 159 // the output should still end in a non-blank line. 160 // If a blank line separator is needed, it should be printed 161 // before the block that needs separating, not after. 162 163 if len(doc.TitleNotes) > 0 { 164 fmt.Fprintf(&md, "\n") 165 for _, line := range doc.TitleNotes { 166 fmt.Fprintf(&md, ": %s\n", line) 167 } 168 } 169 170 if len(doc.Sections) == 1 && strings.HasSuffix(file, ".article") { 171 // Blog drops section headers when there is only one section. 172 // Don't print a title in this case, to make clear that it's being dropped. 173 fmt.Fprintf(&md, "\n##\n") 174 printSectionBody(file, 1, &md, doc.Sections[0].Elem) 175 } else { 176 for _, s := range doc.Sections { 177 fmt.Fprintf(&md, "\n") 178 fmt.Fprintf(&md, "## %s\n", markdownEscape(s.Title, false)) 179 printSectionBody(file, 1, &md, s.Elem) 180 } 181 } 182 183 if !writeBack { 184 os.Stdout.Write(md.Bytes()) 185 return nil 186 } 187 return ioutil.WriteFile(file, md.Bytes(), 0666) 188 } 189 190 func printSectionBody(file string, depth int, w *bytes.Buffer, elems []present.Elem) { 191 for _, elem := range elems { 192 switch elem := elem.(type) { 193 default: 194 // Can only happen if this type switch is incomplete, which is a bug. 195 log.Fatalf("%s: unexpected present element type %T", file, elem) 196 197 case present.Text: 198 fmt.Fprintf(w, "\n") 199 lines := elem.Lines 200 for len(lines) > 0 && lines[0] == "" { 201 lines = lines[1:] 202 } 203 if elem.Pre { 204 for _, line := range strings.Split(strings.TrimRight(elem.Raw, "\n"), "\n") { 205 if line == "" { 206 fmt.Fprintf(w, "\n") 207 } else { 208 fmt.Fprintf(w, "\t%s\n", line) 209 } 210 } 211 } else { 212 for _, line := range elem.Lines { 213 printStyled(w, line, true) 214 fmt.Fprintf(w, "\n") 215 } 216 } 217 218 case present.List: 219 fmt.Fprintf(w, "\n") 220 for _, item := range elem.Bullet { 221 fmt.Fprintf(w, " - ") 222 for i, line := range strings.Split(item, "\n") { 223 if i > 0 { 224 fmt.Fprintf(w, " ") 225 } 226 printStyled(w, line, false) 227 fmt.Fprintf(w, "\n") 228 } 229 } 230 231 case present.Section: 232 fmt.Fprintf(w, "\n") 233 sep := " " 234 if elem.Title == "" { 235 sep = "" 236 } 237 fmt.Fprintf(w, "%s%s%s\n", strings.Repeat("#", depth+2), sep, markdownEscape(elem.Title, false)) 238 printSectionBody(file, depth+1, w, elem.Elem) 239 240 case interface{ PresentCmd() string }: 241 // If there are multiple present commands in a row, don't print a blank line before the second etc. 242 b := w.Bytes() 243 sep := "\n" 244 if len(b) > 0 { 245 i := bytes.LastIndexByte(b[:len(b)-1], '\n') 246 if b[i+1] == '.' { 247 sep = "" 248 } 249 } 250 fmt.Fprintf(w, "%s%s\n", sep, elem.PresentCmd()) 251 } 252 } 253 } 254 255 func markdownEscape(s string, startLine bool) string { 256 var b strings.Builder 257 for i, r := range s { 258 switch { 259 case r == '#' && i == 0, 260 r == '*', 261 r == '_', 262 r == '<' && (i == 0 || s[i-1] != ' ') && i+1 < len(s) && s[i+1] != ' ', 263 r == '[' && strings.Contains(s[i:], "]("): 264 b.WriteRune('\\') 265 } 266 b.WriteRune(r) 267 } 268 return b.String() 269 } 270 271 // Copy of ../../present/style.go adjusted to produce Markdown instead of HTML. 272 273 /* 274 Fonts are demarcated by an initial and final char bracketing a 275 space-delimited word, plus possibly some terminal punctuation. 276 The chars are 277 _ for italic 278 * for bold 279 ` (back quote) for fixed width. 280 Inner appearances of the char become spaces. For instance, 281 _this_is_italic_! 282 becomes 283 <i>this is italic</i>! 284 */ 285 286 func printStyled(w *bytes.Buffer, text string, startLine bool) { 287 w.WriteString(font(text, startLine)) 288 } 289 290 // font returns s with font indicators turned into HTML font tags. 291 func font(s string, startLine bool) string { 292 if !strings.ContainsAny(s, "[`_*") { 293 return markdownEscape(s, startLine) 294 } 295 words := split(s) 296 var b bytes.Buffer 297 Word: 298 for w, word := range words { 299 words[w] = markdownEscape(word, startLine && w == 0) // for all the continue Word 300 if len(word) < 2 { 301 continue Word 302 } 303 if link, _ := parseInlineLink(word); link != "" { 304 words[w] = link 305 continue Word 306 } 307 const marker = "_*`" 308 // Initial punctuation is OK but must be peeled off. 309 first := strings.IndexAny(word, marker) 310 if first == -1 { 311 continue Word 312 } 313 // Opening marker must be at the beginning of the token or else preceded by punctuation. 314 if first != 0 { 315 r, _ := utf8.DecodeLastRuneInString(word[:first]) 316 if !unicode.IsPunct(r) { 317 continue Word 318 } 319 } 320 open, word := markdownEscape(word[:first], startLine && w == 0), word[first:] 321 char := word[0] // ASCII is OK. 322 close := "" 323 switch char { 324 default: 325 continue Word 326 case '_': 327 open += "_" 328 close = "_" 329 case '*': 330 open += "**" 331 close = "**" 332 case '`': 333 open += "`" 334 close = "`" 335 } 336 // Closing marker must be at the end of the token or else followed by punctuation. 337 last := strings.LastIndex(word, word[:1]) 338 if last == 0 { 339 continue Word 340 } 341 if last+1 != len(word) { 342 r, _ := utf8.DecodeRuneInString(word[last+1:]) 343 if !unicode.IsPunct(r) { 344 continue Word 345 } 346 } 347 head, tail := word[:last+1], word[last+1:] 348 b.Reset() 349 var wid int 350 for i := 1; i < len(head)-1; i += wid { 351 var r rune 352 r, wid = utf8.DecodeRuneInString(head[i:]) 353 if r != rune(char) { 354 // Ordinary character. 355 b.WriteRune(r) 356 continue 357 } 358 if head[i+1] != char { 359 // Inner char becomes space. 360 b.WriteRune(' ') 361 continue 362 } 363 // Doubled char becomes real char. 364 // Not worth worrying about "_x__". 365 b.WriteByte(char) 366 wid++ // Consumed two chars, both ASCII. 367 } 368 text := b.String() 369 if close == "`" { 370 for strings.Contains(text, close) { 371 open += "`" 372 close += "`" 373 } 374 } else { 375 text = markdownEscape(text, false) 376 } 377 words[w] = open + text + close + tail 378 } 379 return strings.Join(words, "") 380 } 381 382 // split is like strings.Fields but also returns the runs of spaces 383 // and treats inline links as distinct words. 384 func split(s string) []string { 385 var ( 386 words = make([]string, 0, 10) 387 start = 0 388 ) 389 390 // appendWord appends the string s[start:end] to the words slice. 391 // If the word contains the beginning of a link, the non-link portion 392 // of the word and the entire link are appended as separate words, 393 // and the start index is advanced to the end of the link. 394 appendWord := func(end int) { 395 if j := strings.Index(s[start:end], "[["); j > -1 { 396 if _, l := parseInlineLink(s[start+j:]); l > 0 { 397 // Append portion before link, if any. 398 if j > 0 { 399 words = append(words, s[start:start+j]) 400 } 401 // Append link itself. 402 words = append(words, s[start+j:start+j+l]) 403 // Advance start index to end of link. 404 start = start + j + l 405 return 406 } 407 } 408 // No link; just add the word. 409 words = append(words, s[start:end]) 410 start = end 411 } 412 413 wasSpace := false 414 for i, r := range s { 415 isSpace := unicode.IsSpace(r) 416 if i > start && isSpace != wasSpace { 417 appendWord(i) 418 } 419 wasSpace = isSpace 420 } 421 for start < len(s) { 422 appendWord(len(s)) 423 } 424 return words 425 } 426 427 // parseInlineLink parses an inline link at the start of s, and returns 428 // a rendered Markdown link and the total length of the raw inline link. 429 // If no inline link is present, it returns all zeroes. 430 func parseInlineLink(s string) (link string, length int) { 431 if !strings.HasPrefix(s, "[[") { 432 return 433 } 434 end := strings.Index(s, "]]") 435 if end == -1 { 436 return 437 } 438 urlEnd := strings.Index(s, "]") 439 rawURL := s[2:urlEnd] 440 const badURLChars = `<>"{}|\^[] ` + "`" // per RFC2396 section 2.4.3 441 if strings.ContainsAny(rawURL, badURLChars) { 442 return 443 } 444 if urlEnd == end { 445 simpleURL := "" 446 url, err := url.Parse(rawURL) 447 if err == nil { 448 // If the URL is http://foo.com, drop the http:// 449 // In other words, render [[http://golang.org]] as: 450 // <a href="http://golang.org">golang.org</a> 451 if strings.HasPrefix(rawURL, url.Scheme+"://") { 452 simpleURL = strings.TrimPrefix(rawURL, url.Scheme+"://") 453 } else if strings.HasPrefix(rawURL, url.Scheme+":") { 454 simpleURL = strings.TrimPrefix(rawURL, url.Scheme+":") 455 } 456 } 457 return renderLink(rawURL, simpleURL), end + 2 458 } 459 if s[urlEnd:urlEnd+2] != "][" { 460 return 461 } 462 text := s[urlEnd+2 : end] 463 return renderLink(rawURL, text), end + 2 464 } 465 466 func renderLink(href, text string) string { 467 text = font(text, false) 468 if text == "" { 469 text = markdownEscape(href, false) 470 } 471 return "[" + text + "](" + href + ")" 472 }