golang.org/x/tools@v0.21.1-0.20240520172518-788d39e776b1/cmd/present2md/main.go (about) 1 // Copyright 2020 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Present2md converts legacy-syntax present files to Markdown-syntax present files. 6 // 7 // Usage: 8 // 9 // present2md [-w] [file ...] 10 // 11 // By default, present2md prints the Markdown-syntax form of each input file to standard output. 12 // If no input file is listed, standard input is used. 13 // 14 // The -w flag causes present2md to update the files in place, overwriting each with its 15 // Markdown-syntax equivalent. 16 // 17 // Examples 18 // 19 // present2md your.article 20 // present2md -w *.article 21 package main 22 23 import ( 24 "bytes" 25 "flag" 26 "fmt" 27 "io" 28 "log" 29 "net/url" 30 "os" 31 "strings" 32 "unicode" 33 "unicode/utf8" 34 35 "golang.org/x/tools/present" 36 ) 37 38 func usage() { 39 fmt.Fprintf(os.Stderr, "usage: present2md [-w] [file ...]\n") 40 os.Exit(2) 41 } 42 43 var ( 44 writeBack = flag.Bool("w", false, "write conversions back to original files") 45 exitStatus = 0 46 ) 47 48 func main() { 49 log.SetPrefix("present2md: ") 50 log.SetFlags(0) 51 flag.Usage = usage 52 flag.Parse() 53 54 args := flag.Args() 55 if len(args) == 0 { 56 if *writeBack { 57 log.Fatalf("cannot use -w with standard input") 58 } 59 convert(os.Stdin, "stdin", false) 60 return 61 } 62 63 for _, arg := range args { 64 f, err := os.Open(arg) 65 if err != nil { 66 log.Print(err) 67 exitStatus = 1 68 continue 69 } 70 err = convert(f, arg, *writeBack) 71 f.Close() 72 if err != nil { 73 log.Print(err) 74 exitStatus = 1 75 } 76 } 77 os.Exit(exitStatus) 78 } 79 80 // convert reads the data from r, parses it as legacy present, 81 // and converts it to Markdown-enabled present. 82 // If any errors occur, the data is reported as coming from file. 83 // If writeBack is true, the converted version is written back to file. 84 // If writeBack is false, the converted version is printed to standard output. 85 func convert(r io.Reader, file string, writeBack bool) error { 86 data, err := io.ReadAll(r) 87 if err != nil { 88 return err 89 } 90 if bytes.HasPrefix(data, []byte("# ")) { 91 return fmt.Errorf("%v: already markdown", file) 92 } 93 94 // Convert all comments before parsing the document. 95 // The '//' comment is treated as normal text and so 96 // is passed through the translation unaltered. 97 data = bytes.Replace(data, []byte("\n#"), []byte("\n//"), -1) 98 99 doc, err := present.Parse(bytes.NewReader(data), file, 0) 100 if err != nil { 101 return err 102 } 103 104 // Title and Subtitle, Time, Tags. 105 var md bytes.Buffer 106 fmt.Fprintf(&md, "# %s\n", doc.Title) 107 if doc.Subtitle != "" { 108 fmt.Fprintf(&md, "%s\n", doc.Subtitle) 109 } 110 if !doc.Time.IsZero() { 111 fmt.Fprintf(&md, "%s\n", doc.Time.Format("2 Jan 2006")) 112 } 113 if len(doc.Tags) > 0 { 114 fmt.Fprintf(&md, "Tags: %s\n", strings.Join(doc.Tags, ", ")) 115 } 116 117 // Summary, defaulting to first paragraph of section. 118 // (Summaries must be explicit for Markdown-enabled present, 119 // and the expectation is that they will be shorter than the 120 // whole first paragraph. But this is what the blog does today.) 121 if strings.HasSuffix(file, ".article") && len(doc.Sections) > 0 { 122 for _, elem := range doc.Sections[0].Elem { 123 text, ok := elem.(present.Text) 124 if !ok || text.Pre { 125 // skip everything but non-text elements 126 continue 127 } 128 fmt.Fprintf(&md, "Summary:") 129 for i, line := range text.Lines { 130 fmt.Fprintf(&md, " ") 131 printStyled(&md, line, i == 0) 132 } 133 fmt.Fprintf(&md, "\n") 134 break 135 } 136 } 137 138 // Authors 139 for _, a := range doc.Authors { 140 fmt.Fprintf(&md, "\n") 141 for _, elem := range a.Elem { 142 switch elem := elem.(type) { 143 default: 144 // Can only happen if this type switch is incomplete, which is a bug. 145 log.Fatalf("%s: unexpected author type %T", file, elem) 146 case present.Text: 147 for _, line := range elem.Lines { 148 fmt.Fprintf(&md, "%s\n", markdownEscape(line, true)) 149 } 150 case present.Link: 151 fmt.Fprintf(&md, "%s\n", markdownEscape(elem.Label, true)) 152 } 153 } 154 } 155 156 // Invariant: the output ends in non-blank line now, 157 // and after printing any piece of the file below, 158 // the output should still end in a non-blank line. 159 // If a blank line separator is needed, it should be printed 160 // before the block that needs separating, not after. 161 162 if len(doc.TitleNotes) > 0 { 163 fmt.Fprintf(&md, "\n") 164 for _, line := range doc.TitleNotes { 165 fmt.Fprintf(&md, ": %s\n", line) 166 } 167 } 168 169 if len(doc.Sections) == 1 && strings.HasSuffix(file, ".article") { 170 // Blog drops section headers when there is only one section. 171 // Don't print a title in this case, to make clear that it's being dropped. 172 fmt.Fprintf(&md, "\n##\n") 173 printSectionBody(file, 1, &md, doc.Sections[0].Elem) 174 } else { 175 for _, s := range doc.Sections { 176 fmt.Fprintf(&md, "\n") 177 fmt.Fprintf(&md, "## %s\n", markdownEscape(s.Title, false)) 178 printSectionBody(file, 1, &md, s.Elem) 179 } 180 } 181 182 if !writeBack { 183 os.Stdout.Write(md.Bytes()) 184 return nil 185 } 186 return os.WriteFile(file, md.Bytes(), 0666) 187 } 188 189 func printSectionBody(file string, depth int, w *bytes.Buffer, elems []present.Elem) { 190 for _, elem := range elems { 191 switch elem := elem.(type) { 192 default: 193 // Can only happen if this type switch is incomplete, which is a bug. 194 log.Fatalf("%s: unexpected present element type %T", file, elem) 195 196 case present.Text: 197 fmt.Fprintf(w, "\n") 198 lines := elem.Lines 199 for len(lines) > 0 && lines[0] == "" { 200 lines = lines[1:] 201 } 202 if elem.Pre { 203 for _, line := range strings.Split(strings.TrimRight(elem.Raw, "\n"), "\n") { 204 if line == "" { 205 fmt.Fprintf(w, "\n") 206 } else { 207 fmt.Fprintf(w, "\t%s\n", line) 208 } 209 } 210 } else { 211 for _, line := range elem.Lines { 212 printStyled(w, line, true) 213 fmt.Fprintf(w, "\n") 214 } 215 } 216 217 case present.List: 218 fmt.Fprintf(w, "\n") 219 for _, item := range elem.Bullet { 220 fmt.Fprintf(w, " - ") 221 for i, line := range strings.Split(item, "\n") { 222 if i > 0 { 223 fmt.Fprintf(w, " ") 224 } 225 printStyled(w, line, false) 226 fmt.Fprintf(w, "\n") 227 } 228 } 229 230 case present.Section: 231 fmt.Fprintf(w, "\n") 232 sep := " " 233 if elem.Title == "" { 234 sep = "" 235 } 236 fmt.Fprintf(w, "%s%s%s\n", strings.Repeat("#", depth+2), sep, markdownEscape(elem.Title, false)) 237 printSectionBody(file, depth+1, w, elem.Elem) 238 239 case interface{ PresentCmd() string }: 240 // If there are multiple present commands in a row, don't print a blank line before the second etc. 241 b := w.Bytes() 242 sep := "\n" 243 if len(b) > 0 { 244 i := bytes.LastIndexByte(b[:len(b)-1], '\n') 245 if b[i+1] == '.' { 246 sep = "" 247 } 248 } 249 fmt.Fprintf(w, "%s%s\n", sep, elem.PresentCmd()) 250 } 251 } 252 } 253 254 func markdownEscape(s string, startLine bool) string { 255 var b strings.Builder 256 for i, r := range s { 257 switch { 258 case r == '#' && i == 0, 259 r == '*', 260 r == '_', 261 r == '<' && (i == 0 || s[i-1] != ' ') && i+1 < len(s) && s[i+1] != ' ', 262 r == '[' && strings.Contains(s[i:], "]("): 263 b.WriteRune('\\') 264 } 265 b.WriteRune(r) 266 } 267 return b.String() 268 } 269 270 // Copy of ../../present/style.go adjusted to produce Markdown instead of HTML. 271 272 /* 273 Fonts are demarcated by an initial and final char bracketing a 274 space-delimited word, plus possibly some terminal punctuation. 275 The chars are 276 _ for italic 277 * for bold 278 ` (back quote) for fixed width. 279 Inner appearances of the char become spaces. For instance, 280 _this_is_italic_! 281 becomes 282 <i>this is italic</i>! 283 */ 284 285 func printStyled(w *bytes.Buffer, text string, startLine bool) { 286 w.WriteString(font(text, startLine)) 287 } 288 289 // font returns s with font indicators turned into HTML font tags. 290 func font(s string, startLine bool) string { 291 if !strings.ContainsAny(s, "[`_*") { 292 return markdownEscape(s, startLine) 293 } 294 words := split(s) 295 var b bytes.Buffer 296 Word: 297 for w, word := range words { 298 words[w] = markdownEscape(word, startLine && w == 0) // for all the continue Word 299 if len(word) < 2 { 300 continue Word 301 } 302 if link, _ := parseInlineLink(word); link != "" { 303 words[w] = link 304 continue Word 305 } 306 const marker = "_*`" 307 // Initial punctuation is OK but must be peeled off. 308 first := strings.IndexAny(word, marker) 309 if first == -1 { 310 continue Word 311 } 312 // Opening marker must be at the beginning of the token or else preceded by punctuation. 313 if first != 0 { 314 r, _ := utf8.DecodeLastRuneInString(word[:first]) 315 if !unicode.IsPunct(r) { 316 continue Word 317 } 318 } 319 open, word := markdownEscape(word[:first], startLine && w == 0), word[first:] 320 char := word[0] // ASCII is OK. 321 close := "" 322 switch char { 323 default: 324 continue Word 325 case '_': 326 open += "_" 327 close = "_" 328 case '*': 329 open += "**" 330 close = "**" 331 case '`': 332 open += "`" 333 close = "`" 334 } 335 // Closing marker must be at the end of the token or else followed by punctuation. 336 last := strings.LastIndex(word, word[:1]) 337 if last == 0 { 338 continue Word 339 } 340 if last+1 != len(word) { 341 r, _ := utf8.DecodeRuneInString(word[last+1:]) 342 if !unicode.IsPunct(r) { 343 continue Word 344 } 345 } 346 head, tail := word[:last+1], word[last+1:] 347 b.Reset() 348 var wid int 349 for i := 1; i < len(head)-1; i += wid { 350 var r rune 351 r, wid = utf8.DecodeRuneInString(head[i:]) 352 if r != rune(char) { 353 // Ordinary character. 354 b.WriteRune(r) 355 continue 356 } 357 if head[i+1] != char { 358 // Inner char becomes space. 359 b.WriteRune(' ') 360 continue 361 } 362 // Doubled char becomes real char. 363 // Not worth worrying about "_x__". 364 b.WriteByte(char) 365 wid++ // Consumed two chars, both ASCII. 366 } 367 text := b.String() 368 if close == "`" { 369 for strings.Contains(text, close) { 370 open += "`" 371 close += "`" 372 } 373 } else { 374 text = markdownEscape(text, false) 375 } 376 words[w] = open + text + close + tail 377 } 378 return strings.Join(words, "") 379 } 380 381 // split is like strings.Fields but also returns the runs of spaces 382 // and treats inline links as distinct words. 383 func split(s string) []string { 384 var ( 385 words = make([]string, 0, 10) 386 start = 0 387 ) 388 389 // appendWord appends the string s[start:end] to the words slice. 390 // If the word contains the beginning of a link, the non-link portion 391 // of the word and the entire link are appended as separate words, 392 // and the start index is advanced to the end of the link. 393 appendWord := func(end int) { 394 if j := strings.Index(s[start:end], "[["); j > -1 { 395 if _, l := parseInlineLink(s[start+j:]); l > 0 { 396 // Append portion before link, if any. 397 if j > 0 { 398 words = append(words, s[start:start+j]) 399 } 400 // Append link itself. 401 words = append(words, s[start+j:start+j+l]) 402 // Advance start index to end of link. 403 start = start + j + l 404 return 405 } 406 } 407 // No link; just add the word. 408 words = append(words, s[start:end]) 409 start = end 410 } 411 412 wasSpace := false 413 for i, r := range s { 414 isSpace := unicode.IsSpace(r) 415 if i > start && isSpace != wasSpace { 416 appendWord(i) 417 } 418 wasSpace = isSpace 419 } 420 for start < len(s) { 421 appendWord(len(s)) 422 } 423 return words 424 } 425 426 // parseInlineLink parses an inline link at the start of s, and returns 427 // a rendered Markdown link and the total length of the raw inline link. 428 // If no inline link is present, it returns all zeroes. 429 func parseInlineLink(s string) (link string, length int) { 430 if !strings.HasPrefix(s, "[[") { 431 return 432 } 433 end := strings.Index(s, "]]") 434 if end == -1 { 435 return 436 } 437 urlEnd := strings.Index(s, "]") 438 rawURL := s[2:urlEnd] 439 const badURLChars = `<>"{}|\^[] ` + "`" // per RFC2396 section 2.4.3 440 if strings.ContainsAny(rawURL, badURLChars) { 441 return 442 } 443 if urlEnd == end { 444 simpleURL := "" 445 url, err := url.Parse(rawURL) 446 if err == nil { 447 // If the URL is http://foo.com, drop the http:// 448 // In other words, render [[http://golang.org]] as: 449 // <a href="http://golang.org">golang.org</a> 450 if strings.HasPrefix(rawURL, url.Scheme+"://") { 451 simpleURL = strings.TrimPrefix(rawURL, url.Scheme+"://") 452 } else if strings.HasPrefix(rawURL, url.Scheme+":") { 453 simpleURL = strings.TrimPrefix(rawURL, url.Scheme+":") 454 } 455 } 456 return renderLink(rawURL, simpleURL), end + 2 457 } 458 if s[urlEnd:urlEnd+2] != "][" { 459 return 460 } 461 text := s[urlEnd+2 : end] 462 return renderLink(rawURL, text), end + 2 463 } 464 465 func renderLink(href, text string) string { 466 text = font(text, false) 467 if text == "" { 468 text = markdownEscape(href, false) 469 } 470 return "[" + text + "](" + href + ")" 471 }