github.com/powerman/golang-tools@v0.1.11-0.20220410185822-5ad214d8d803/present/parse.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package present 6 7 import ( 8 "bufio" 9 "bytes" 10 "errors" 11 "fmt" 12 "html/template" 13 "io" 14 "io/ioutil" 15 "log" 16 "net/url" 17 "regexp" 18 "strings" 19 "time" 20 "unicode" 21 "unicode/utf8" 22 23 "github.com/yuin/goldmark" 24 "github.com/yuin/goldmark/ast" 25 "github.com/yuin/goldmark/renderer/html" 26 "github.com/yuin/goldmark/text" 27 ) 28 29 var ( 30 parsers = make(map[string]ParseFunc) 31 funcs = template.FuncMap{} 32 ) 33 34 // Template returns an empty template with the action functions in its FuncMap. 35 func Template() *template.Template { 36 return template.New("").Funcs(funcs) 37 } 38 39 // Render renders the doc to the given writer using the provided template. 40 func (d *Doc) Render(w io.Writer, t *template.Template) error { 41 data := struct { 42 *Doc 43 Template *template.Template 44 PlayEnabled bool 45 NotesEnabled bool 46 }{d, t, PlayEnabled, NotesEnabled} 47 return t.ExecuteTemplate(w, "root", data) 48 } 49 50 // Render renders the section to the given writer using the provided template. 51 func (s *Section) Render(w io.Writer, t *template.Template) error { 52 data := struct { 53 *Section 54 Template *template.Template 55 PlayEnabled bool 56 }{s, t, PlayEnabled} 57 return t.ExecuteTemplate(w, "section", data) 58 } 59 60 type ParseFunc func(ctx *Context, fileName string, lineNumber int, inputLine string) (Elem, error) 61 62 // Register binds the named action, which does not begin with a period, to the 63 // specified parser to be invoked when the name, with a period, appears in the 64 // present input text. 65 func Register(name string, parser ParseFunc) { 66 if len(name) == 0 || name[0] == ';' { 67 panic("bad name in Register: " + name) 68 } 69 parsers["."+name] = parser 70 } 71 72 // Doc represents an entire document. 73 type Doc struct { 74 Title string 75 Subtitle string 76 Summary string 77 Time time.Time 78 Authors []Author 79 TitleNotes []string 80 Sections []Section 81 Tags []string 82 OldURL []string 83 } 84 85 // Author represents the person who wrote and/or is presenting the document. 86 type Author struct { 87 Elem []Elem 88 } 89 90 // TextElem returns the first text elements of the author details. 91 // This is used to display the author' name, job title, and company 92 // without the contact details. 93 func (p *Author) TextElem() (elems []Elem) { 94 for _, el := range p.Elem { 95 if _, ok := el.(Text); !ok { 96 break 97 } 98 elems = append(elems, el) 99 } 100 return 101 } 102 103 // Section represents a section of a document (such as a presentation slide) 104 // comprising a title and a list of elements. 105 type Section struct { 106 Number []int 107 Title string 108 ID string // HTML anchor ID 109 Elem []Elem 110 Notes []string 111 Classes []string 112 Styles []string 113 } 114 115 // HTMLAttributes for the section 116 func (s Section) HTMLAttributes() template.HTMLAttr { 117 if len(s.Classes) == 0 && len(s.Styles) == 0 { 118 return "" 119 } 120 121 var class string 122 if len(s.Classes) > 0 { 123 class = fmt.Sprintf(`class=%q`, strings.Join(s.Classes, " ")) 124 } 125 var style string 126 if len(s.Styles) > 0 { 127 style = fmt.Sprintf(`style=%q`, strings.Join(s.Styles, " ")) 128 } 129 return template.HTMLAttr(strings.Join([]string{class, style}, " ")) 130 } 131 132 // Sections contained within the section. 133 func (s Section) Sections() (sections []Section) { 134 for _, e := range s.Elem { 135 if section, ok := e.(Section); ok { 136 sections = append(sections, section) 137 } 138 } 139 return 140 } 141 142 // Level returns the level of the given section. 143 // The document title is level 1, main section 2, etc. 144 func (s Section) Level() int { 145 return len(s.Number) + 1 146 } 147 148 // FormattedNumber returns a string containing the concatenation of the 149 // numbers identifying a Section. 150 func (s Section) FormattedNumber() string { 151 b := &bytes.Buffer{} 152 for _, n := range s.Number { 153 fmt.Fprintf(b, "%v.", n) 154 } 155 return b.String() 156 } 157 158 func (s Section) TemplateName() string { return "section" } 159 160 // Elem defines the interface for a present element. That is, something that 161 // can provide the name of the template used to render the element. 162 type Elem interface { 163 TemplateName() string 164 } 165 166 // renderElem implements the elem template function, used to render 167 // sub-templates. 168 func renderElem(t *template.Template, e Elem) (template.HTML, error) { 169 var data interface{} = e 170 if s, ok := e.(Section); ok { 171 data = struct { 172 Section 173 Template *template.Template 174 }{s, t} 175 } 176 return execTemplate(t, e.TemplateName(), data) 177 } 178 179 // pageNum derives a page number from a section. 180 func pageNum(s Section, offset int) int { 181 if len(s.Number) == 0 { 182 return offset 183 } 184 return s.Number[0] + offset 185 } 186 187 func init() { 188 funcs["elem"] = renderElem 189 funcs["pagenum"] = pageNum 190 } 191 192 // execTemplate is a helper to execute a template and return the output as a 193 // template.HTML value. 194 func execTemplate(t *template.Template, name string, data interface{}) (template.HTML, error) { 195 b := new(bytes.Buffer) 196 err := t.ExecuteTemplate(b, name, data) 197 if err != nil { 198 return "", err 199 } 200 return template.HTML(b.String()), nil 201 } 202 203 // Text represents an optionally preformatted paragraph. 204 type Text struct { 205 Lines []string 206 Pre bool 207 Raw string // original text, for Pre==true 208 } 209 210 func (t Text) TemplateName() string { return "text" } 211 212 // List represents a bulleted list. 213 type List struct { 214 Bullet []string 215 } 216 217 func (l List) TemplateName() string { return "list" } 218 219 // Lines is a helper for parsing line-based input. 220 type Lines struct { 221 line int // 0 indexed, so has 1-indexed number of last line returned 222 text []string 223 comment string 224 } 225 226 func readLines(r io.Reader) (*Lines, error) { 227 var lines []string 228 s := bufio.NewScanner(r) 229 for s.Scan() { 230 lines = append(lines, s.Text()) 231 } 232 if err := s.Err(); err != nil { 233 return nil, err 234 } 235 return &Lines{0, lines, "#"}, nil 236 } 237 238 func (l *Lines) next() (text string, ok bool) { 239 for { 240 current := l.line 241 l.line++ 242 if current >= len(l.text) { 243 return "", false 244 } 245 text = l.text[current] 246 // Lines starting with l.comment are comments. 247 if l.comment == "" || !strings.HasPrefix(text, l.comment) { 248 ok = true 249 break 250 } 251 } 252 return 253 } 254 255 func (l *Lines) back() { 256 l.line-- 257 } 258 259 func (l *Lines) nextNonEmpty() (text string, ok bool) { 260 for { 261 text, ok = l.next() 262 if !ok { 263 return 264 } 265 if len(text) > 0 { 266 break 267 } 268 } 269 return 270 } 271 272 // A Context specifies the supporting context for parsing a presentation. 273 type Context struct { 274 // ReadFile reads the file named by filename and returns the contents. 275 ReadFile func(filename string) ([]byte, error) 276 } 277 278 // ParseMode represents flags for the Parse function. 279 type ParseMode int 280 281 const ( 282 // If set, parse only the title and subtitle. 283 TitlesOnly ParseMode = 1 284 ) 285 286 // Parse parses a document from r. 287 func (ctx *Context) Parse(r io.Reader, name string, mode ParseMode) (*Doc, error) { 288 doc := new(Doc) 289 lines, err := readLines(r) 290 if err != nil { 291 return nil, err 292 } 293 294 // Detect Markdown-enabled vs legacy present file. 295 // Markdown-enabled files have a title line beginning with "# " 296 // (like preprocessed C files of yore). 297 isMarkdown := false 298 for i := lines.line; i < len(lines.text); i++ { 299 line := lines.text[i] 300 if line == "" { 301 continue 302 } 303 isMarkdown = strings.HasPrefix(line, "# ") 304 break 305 } 306 307 sectionPrefix := "*" 308 if isMarkdown { 309 sectionPrefix = "##" 310 lines.comment = "//" 311 } 312 313 for i := lines.line; i < len(lines.text); i++ { 314 if strings.HasPrefix(lines.text[i], sectionPrefix) { 315 break 316 } 317 318 if isSpeakerNote(lines.text[i]) { 319 doc.TitleNotes = append(doc.TitleNotes, trimSpeakerNote(lines.text[i])) 320 } 321 } 322 323 err = parseHeader(doc, isMarkdown, lines) 324 if err != nil { 325 return nil, err 326 } 327 if mode&TitlesOnly != 0 { 328 return doc, nil 329 } 330 331 // Authors 332 if doc.Authors, err = parseAuthors(name, sectionPrefix, lines); err != nil { 333 return nil, err 334 } 335 336 // Sections 337 if doc.Sections, err = parseSections(ctx, name, sectionPrefix, lines, []int{}); err != nil { 338 return nil, err 339 } 340 341 return doc, nil 342 } 343 344 // Parse parses a document from r. Parse reads assets used by the presentation 345 // from the file system using ioutil.ReadFile. 346 func Parse(r io.Reader, name string, mode ParseMode) (*Doc, error) { 347 ctx := Context{ReadFile: ioutil.ReadFile} 348 return ctx.Parse(r, name, mode) 349 } 350 351 // isHeading matches any section heading. 352 var ( 353 isHeadingLegacy = regexp.MustCompile(`^\*+( |$)`) 354 isHeadingMarkdown = regexp.MustCompile(`^\#+( |$)`) 355 ) 356 357 // lesserHeading returns true if text is a heading of a lesser or equal level 358 // than that denoted by prefix. 359 func lesserHeading(isHeading *regexp.Regexp, text, prefix string) bool { 360 return isHeading.MatchString(text) && !strings.HasPrefix(text, prefix+prefix[:1]) 361 } 362 363 // parseSections parses Sections from lines for the section level indicated by 364 // number (a nil number indicates the top level). 365 func parseSections(ctx *Context, name, prefix string, lines *Lines, number []int) ([]Section, error) { 366 isMarkdown := prefix[0] == '#' 367 isHeading := isHeadingLegacy 368 if isMarkdown { 369 isHeading = isHeadingMarkdown 370 } 371 var sections []Section 372 for i := 1; ; i++ { 373 // Next non-empty line is title. 374 text, ok := lines.nextNonEmpty() 375 for ok && text == "" { 376 text, ok = lines.next() 377 } 378 if !ok { 379 break 380 } 381 if text != prefix && !strings.HasPrefix(text, prefix+" ") { 382 lines.back() 383 break 384 } 385 // Markdown sections can end in {#id} to set the HTML anchor for the section. 386 // This is nicer than the default #TOC_1_2-style anchor. 387 title := strings.TrimSpace(text[len(prefix):]) 388 id := "" 389 if isMarkdown && strings.HasSuffix(title, "}") { 390 j := strings.LastIndex(title, "{#") 391 if j >= 0 { 392 id = title[j+2 : len(title)-1] 393 title = strings.TrimSpace(title[:j]) 394 } 395 } 396 section := Section{ 397 Number: append(append([]int{}, number...), i), 398 Title: title, 399 ID: id, 400 } 401 text, ok = lines.nextNonEmpty() 402 for ok && !lesserHeading(isHeading, text, prefix) { 403 var e Elem 404 r, _ := utf8.DecodeRuneInString(text) 405 switch { 406 case !isMarkdown && unicode.IsSpace(r): 407 i := strings.IndexFunc(text, func(r rune) bool { 408 return !unicode.IsSpace(r) 409 }) 410 if i < 0 { 411 break 412 } 413 indent := text[:i] 414 var s []string 415 for ok && (strings.HasPrefix(text, indent) || text == "") { 416 if text != "" { 417 text = text[i:] 418 } 419 s = append(s, text) 420 text, ok = lines.next() 421 } 422 lines.back() 423 pre := strings.Join(s, "\n") 424 raw := pre 425 pre = strings.Replace(pre, "\t", " ", -1) // browsers treat tabs badly 426 pre = strings.TrimRightFunc(pre, unicode.IsSpace) 427 e = Text{Lines: []string{pre}, Pre: true, Raw: raw} 428 case !isMarkdown && strings.HasPrefix(text, "- "): 429 var b []string 430 for { 431 if strings.HasPrefix(text, "- ") { 432 b = append(b, text[2:]) 433 } else if len(b) > 0 && strings.HasPrefix(text, " ") { 434 b[len(b)-1] += "\n" + strings.TrimSpace(text) 435 } else { 436 break 437 } 438 if text, ok = lines.next(); !ok { 439 break 440 } 441 } 442 lines.back() 443 e = List{Bullet: b} 444 case isSpeakerNote(text): 445 section.Notes = append(section.Notes, trimSpeakerNote(text)) 446 case strings.HasPrefix(text, prefix+prefix[:1]+" ") || text == prefix+prefix[:1]: 447 lines.back() 448 subsecs, err := parseSections(ctx, name, prefix+prefix[:1], lines, section.Number) 449 if err != nil { 450 return nil, err 451 } 452 for _, ss := range subsecs { 453 section.Elem = append(section.Elem, ss) 454 } 455 case strings.HasPrefix(text, prefix+prefix[:1]): 456 return nil, fmt.Errorf("%s:%d: badly nested section inside %s: %s", name, lines.line, prefix, text) 457 case strings.HasPrefix(text, "."): 458 args := strings.Fields(text) 459 if args[0] == ".background" { 460 section.Classes = append(section.Classes, "background") 461 section.Styles = append(section.Styles, "background-image: url('"+args[1]+"')") 462 break 463 } 464 parser := parsers[args[0]] 465 if parser == nil { 466 return nil, fmt.Errorf("%s:%d: unknown command %q", name, lines.line, text) 467 } 468 t, err := parser(ctx, name, lines.line, text) 469 if err != nil { 470 return nil, err 471 } 472 e = t 473 474 case isMarkdown: 475 // Collect Markdown lines, including blank lines and indented text. 476 var block []string 477 endLine, endBlock := lines.line-1, -1 // end is last non-empty line 478 for ok { 479 trim := strings.TrimSpace(text) 480 if trim != "" { 481 // Command breaks text block. 482 // Section heading breaks text block in markdown. 483 if text[0] == '.' || text[0] == '#' || isSpeakerNote(text) { 484 break 485 } 486 if strings.HasPrefix(text, `\.`) { // Backslash escapes initial period. 487 text = text[1:] 488 } 489 endLine, endBlock = lines.line, len(block) 490 } 491 block = append(block, text) 492 text, ok = lines.next() 493 } 494 block = block[:endBlock+1] 495 lines.line = endLine + 1 496 if len(block) == 0 { 497 break 498 } 499 500 // Replace all leading tabs with 4 spaces, 501 // which render better in code blocks. 502 // CommonMark defines that for parsing the structure of the file 503 // a tab is equivalent to 4 spaces, so this change won't 504 // affect the later parsing at all. 505 // An alternative would be to apply this to code blocks after parsing, 506 // at the same time that we update <a> targets, but that turns out 507 // to be quite difficult to modify in the AST. 508 for i, line := range block { 509 if len(line) > 0 && line[0] == '\t' { 510 short := strings.TrimLeft(line, "\t") 511 line = strings.Repeat(" ", len(line)-len(short)) + short 512 block[i] = line 513 } 514 } 515 html, err := renderMarkdown([]byte(strings.Join(block, "\n"))) 516 if err != nil { 517 return nil, err 518 } 519 e = HTML{HTML: html} 520 521 default: 522 // Collect text lines. 523 var block []string 524 for ok && strings.TrimSpace(text) != "" { 525 // Command breaks text block. 526 // Section heading breaks text block in markdown. 527 if text[0] == '.' || isSpeakerNote(text) { 528 lines.back() 529 break 530 } 531 if strings.HasPrefix(text, `\.`) { // Backslash escapes initial period. 532 text = text[1:] 533 } 534 block = append(block, text) 535 text, ok = lines.next() 536 } 537 if len(block) == 0 { 538 break 539 } 540 e = Text{Lines: block} 541 } 542 if e != nil { 543 section.Elem = append(section.Elem, e) 544 } 545 text, ok = lines.nextNonEmpty() 546 } 547 if isHeading.MatchString(text) { 548 lines.back() 549 } 550 sections = append(sections, section) 551 } 552 553 if len(sections) == 0 { 554 return nil, fmt.Errorf("%s:%d: unexpected line: %s", name, lines.line+1, lines.text[lines.line]) 555 } 556 return sections, nil 557 } 558 559 func parseHeader(doc *Doc, isMarkdown bool, lines *Lines) error { 560 var ok bool 561 // First non-empty line starts header. 562 doc.Title, ok = lines.nextNonEmpty() 563 if !ok { 564 return errors.New("unexpected EOF; expected title") 565 } 566 if isMarkdown { 567 doc.Title = strings.TrimSpace(strings.TrimPrefix(doc.Title, "#")) 568 } 569 570 for { 571 text, ok := lines.next() 572 if !ok { 573 return errors.New("unexpected EOF") 574 } 575 if text == "" { 576 break 577 } 578 if isSpeakerNote(text) { 579 continue 580 } 581 if strings.HasPrefix(text, "Tags:") { 582 tags := strings.Split(text[len("Tags:"):], ",") 583 for i := range tags { 584 tags[i] = strings.TrimSpace(tags[i]) 585 } 586 doc.Tags = append(doc.Tags, tags...) 587 } else if strings.HasPrefix(text, "Summary:") { 588 doc.Summary = strings.TrimSpace(text[len("Summary:"):]) 589 } else if strings.HasPrefix(text, "OldURL:") { 590 doc.OldURL = append(doc.OldURL, strings.TrimSpace(text[len("OldURL:"):])) 591 } else if t, ok := parseTime(text); ok { 592 doc.Time = t 593 } else if doc.Subtitle == "" { 594 doc.Subtitle = text 595 } else { 596 return fmt.Errorf("unexpected header line: %q", text) 597 } 598 } 599 return nil 600 } 601 602 func parseAuthors(name, sectionPrefix string, lines *Lines) (authors []Author, err error) { 603 // This grammar demarcates authors with blanks. 604 605 // Skip blank lines. 606 if _, ok := lines.nextNonEmpty(); !ok { 607 return nil, errors.New("unexpected EOF") 608 } 609 lines.back() 610 611 var a *Author 612 for { 613 text, ok := lines.next() 614 if !ok { 615 return nil, errors.New("unexpected EOF") 616 } 617 618 // If we find a section heading, we're done. 619 if strings.HasPrefix(text, sectionPrefix) { 620 lines.back() 621 break 622 } 623 624 if isSpeakerNote(text) { 625 continue 626 } 627 628 // If we encounter a blank we're done with this author. 629 if a != nil && len(text) == 0 { 630 authors = append(authors, *a) 631 a = nil 632 continue 633 } 634 if a == nil { 635 a = new(Author) 636 } 637 638 // Parse the line. Those that 639 // - begin with @ are twitter names, 640 // - contain slashes are links, or 641 // - contain an @ symbol are an email address. 642 // The rest is just text. 643 var el Elem 644 switch { 645 case strings.HasPrefix(text, "@"): 646 el = parseAuthorURL(name, "http://twitter.com/"+text[1:]) 647 case strings.Contains(text, ":"): 648 el = parseAuthorURL(name, text) 649 case strings.Contains(text, "@"): 650 el = parseAuthorURL(name, "mailto:"+text) 651 } 652 if l, ok := el.(Link); ok { 653 l.Label = text 654 el = l 655 } 656 if el == nil { 657 el = Text{Lines: []string{text}} 658 } 659 a.Elem = append(a.Elem, el) 660 } 661 if a != nil { 662 authors = append(authors, *a) 663 } 664 return authors, nil 665 } 666 667 func parseAuthorURL(name, text string) Elem { 668 u, err := url.Parse(text) 669 if err != nil { 670 log.Printf("parsing %s author block: invalid URL %q: %v", name, text, err) 671 return nil 672 } 673 return Link{URL: u} 674 } 675 676 func parseTime(text string) (t time.Time, ok bool) { 677 t, err := time.Parse("15:04 2 Jan 2006", text) 678 if err == nil { 679 return t, true 680 } 681 t, err = time.Parse("2 Jan 2006", text) 682 if err == nil { 683 // at 11am UTC it is the same date everywhere 684 t = t.Add(time.Hour * 11) 685 return t, true 686 } 687 return time.Time{}, false 688 } 689 690 func isSpeakerNote(s string) bool { 691 return strings.HasPrefix(s, ": ") || s == ":" 692 } 693 694 func trimSpeakerNote(s string) string { 695 if s == ":" { 696 return "" 697 } 698 return strings.TrimPrefix(s, ": ") 699 } 700 701 func renderMarkdown(input []byte) (template.HTML, error) { 702 md := goldmark.New(goldmark.WithRendererOptions(html.WithUnsafe())) 703 reader := text.NewReader(input) 704 doc := md.Parser().Parse(reader) 705 fixupMarkdown(doc) 706 var b strings.Builder 707 if err := md.Renderer().Render(&b, input, doc); err != nil { 708 return "", err 709 } 710 return template.HTML(b.String()), nil 711 } 712 713 func fixupMarkdown(n ast.Node) { 714 ast.Walk(n, func(n ast.Node, entering bool) (ast.WalkStatus, error) { 715 if entering { 716 switch n := n.(type) { 717 case *ast.Link: 718 n.SetAttributeString("target", []byte("_blank")) 719 // https://developers.google.com/web/tools/lighthouse/audits/noopener 720 n.SetAttributeString("rel", []byte("noopener")) 721 } 722 } 723 return ast.WalkContinue, nil 724 }) 725 }