golang.org/x/tools@v0.21.0/present/parse.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     5  package present
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"errors"
    11  	"fmt"
    12  	"html/template"
    13  	"io"
    14  	"log"
    15  	"net/url"
    16  	"os"
    17  	"regexp"
    18  	"strings"
    19  	"time"
    20  	"unicode"
    21  	"unicode/utf8"
    23  	"github.com/yuin/goldmark"
    24  	"github.com/yuin/goldmark/ast"
    25  	"github.com/yuin/goldmark/renderer/html"
    26  	"github.com/yuin/goldmark/text"
    27  )
    29  var (
    30  	parsers = make(map[string]ParseFunc)
    31  	funcs   = template.FuncMap{}
    32  )
    34  // Template returns an empty template with the action functions in its FuncMap.
    35  func Template() *template.Template {
    36  	return template.New("").Funcs(funcs)
    37  }
    39  // Render renders the doc to the given writer using the provided template.
    40  func (d *Doc) Render(w io.Writer, t *template.Template) error {
    41  	data := struct {
    42  		*Doc
    43  		Template     *template.Template
    44  		PlayEnabled  bool
    45  		NotesEnabled bool
    46  	}{d, t, PlayEnabled, NotesEnabled}
    47  	return t.ExecuteTemplate(w, "root", data)
    48  }
    50  // Render renders the section to the given writer using the provided template.
    51  func (s *Section) Render(w io.Writer, t *template.Template) error {
    52  	data := struct {
    53  		*Section
    54  		Template    *template.Template
    55  		PlayEnabled bool
    56  	}{s, t, PlayEnabled}
    57  	return t.ExecuteTemplate(w, "section", data)
    58  }
    60  type ParseFunc func(ctx *Context, fileName string, lineNumber int, inputLine string) (Elem, error)
    62  // Register binds the named action, which does not begin with a period, to the
    63  // specified parser to be invoked when the name, with a period, appears in the
    64  // present input text.
    65  func Register(name string, parser ParseFunc) {
    66  	if len(name) == 0 || name[0] == ';' {
    67  		panic("bad name in Register: " + name)
    68  	}
    69  	parsers["."+name] = parser
    70  }
    72  // Doc represents an entire document.
    73  type Doc struct {
    74  	Title      string
    75  	Subtitle   string
    76  	Summary    string
    77  	Time       time.Time
    78  	Authors    []Author
    79  	TitleNotes []string
    80  	Sections   []Section
    81  	Tags       []string
    82  	OldURL     []string
    83  }
    85  // Author represents the person who wrote and/or is presenting the document.
    86  type Author struct {
    87  	Elem []Elem
    88  }
    90  // TextElem returns the first text elements of the author details.
    91  // This is used to display the author' name, job title, and company
    92  // without the contact details.
    93  func (p *Author) TextElem() (elems []Elem) {
    94  	for _, el := range p.Elem {
    95  		if _, ok := el.(Text); !ok {
    96  			break
    97  		}
    98  		elems = append(elems, el)
    99  	}
   100  	return
   101  }
   103  // Section represents a section of a document (such as a presentation slide)
   104  // comprising a title and a list of elements.
   105  type Section struct {
   106  	Number  []int
   107  	Title   string
   108  	ID      string // HTML anchor ID
   109  	Elem    []Elem
   110  	Notes   []string
   111  	Classes []string
   112  	Styles  []string
   113  }
   115  // HTMLAttributes for the section
   116  func (s Section) HTMLAttributes() template.HTMLAttr {
   117  	if len(s.Classes) == 0 && len(s.Styles) == 0 {
   118  		return ""
   119  	}
   121  	var class string
   122  	if len(s.Classes) > 0 {
   123  		class = fmt.Sprintf(`class=%q`, strings.Join(s.Classes, " "))
   124  	}
   125  	var style string
   126  	if len(s.Styles) > 0 {
   127  		style = fmt.Sprintf(`style=%q`, strings.Join(s.Styles, " "))
   128  	}
   129  	return template.HTMLAttr(strings.Join([]string{class, style}, " "))
   130  }
   132  // Sections contained within the section.
   133  func (s Section) Sections() (sections []Section) {
   134  	for _, e := range s.Elem {
   135  		if section, ok := e.(Section); ok {
   136  			sections = append(sections, section)
   137  		}
   138  	}
   139  	return
   140  }
   142  // Level returns the level of the given section.
   143  // The document title is level 1, main section 2, etc.
   144  func (s Section) Level() int {
   145  	return len(s.Number) + 1
   146  }
   148  // FormattedNumber returns a string containing the concatenation of the
   149  // numbers identifying a Section.
   150  func (s Section) FormattedNumber() string {
   151  	b := &bytes.Buffer{}
   152  	for _, n := range s.Number {
   153  		fmt.Fprintf(b, "%v.", n)
   154  	}
   155  	return b.String()
   156  }
   158  func (s Section) TemplateName() string { return "section" }
   160  // Elem defines the interface for a present element. That is, something that
   161  // can provide the name of the template used to render the element.
   162  type Elem interface {
   163  	TemplateName() string
   164  }
   166  // renderElem implements the elem template function, used to render
   167  // sub-templates.
   168  func renderElem(t *template.Template, e Elem) (template.HTML, error) {
   169  	var data interface{} = e
   170  	if s, ok := e.(Section); ok {
   171  		data = struct {
   172  			Section
   173  			Template *template.Template
   174  		}{s, t}
   175  	}
   176  	return execTemplate(t, e.TemplateName(), data)
   177  }
   179  // pageNum derives a page number from a section.
   180  func pageNum(s Section, offset int) int {
   181  	if len(s.Number) == 0 {
   182  		return offset
   183  	}
   184  	return s.Number[0] + offset
   185  }
   187  func init() {
   188  	funcs["elem"] = renderElem
   189  	funcs["pagenum"] = pageNum
   190  }
   192  // execTemplate is a helper to execute a template and return the output as a
   193  // template.HTML value.
   194  func execTemplate(t *template.Template, name string, data interface{}) (template.HTML, error) {
   195  	b := new(bytes.Buffer)
   196  	err := t.ExecuteTemplate(b, name, data)
   197  	if err != nil {
   198  		return "", err
   199  	}
   200  	return template.HTML(b.String()), nil
   201  }
   203  // Text represents an optionally preformatted paragraph.
   204  type Text struct {
   205  	Lines []string
   206  	Pre   bool
   207  	Raw   string // original text, for Pre==true
   208  }
   210  func (t Text) TemplateName() string { return "text" }
   212  // List represents a bulleted list.
   213  type List struct {
   214  	Bullet []string
   215  }
   217  func (l List) TemplateName() string { return "list" }
   219  // Lines is a helper for parsing line-based input.
   220  type Lines struct {
   221  	line    int // 0 indexed, so has 1-indexed number of last line returned
   222  	text    []string
   223  	comment string
   224  }
   226  func readLines(r io.Reader) (*Lines, error) {
   227  	var lines []string
   228  	s := bufio.NewScanner(r)
   229  	for s.Scan() {
   230  		lines = append(lines, s.Text())
   231  	}
   232  	if err := s.Err(); err != nil {
   233  		return nil, err
   234  	}
   235  	return &Lines{0, lines, "#"}, nil
   236  }
   238  func (l *Lines) next() (text string, ok bool) {
   239  	for {
   240  		current := l.line
   241  		l.line++
   242  		if current >= len(l.text) {
   243  			return "", false
   244  		}
   245  		text = l.text[current]
   246  		// Lines starting with l.comment are comments.
   247  		if l.comment == "" || !strings.HasPrefix(text, l.comment) {
   248  			ok = true
   249  			break
   250  		}
   251  	}
   252  	return
   253  }
   255  func (l *Lines) back() {
   256  	l.line--
   257  }
   259  func (l *Lines) nextNonEmpty() (text string, ok bool) {
   260  	for {
   261  		text, ok = l.next()
   262  		if !ok {
   263  			return
   264  		}
   265  		if len(text) > 0 {
   266  			break
   267  		}
   268  	}
   269  	return
   270  }
   272  // A Context specifies the supporting context for parsing a presentation.
   273  type Context struct {
   274  	// ReadFile reads the file named by filename and returns the contents.
   275  	ReadFile func(filename string) ([]byte, error)
   276  }
   278  // ParseMode represents flags for the Parse function.
   279  type ParseMode int
   281  const (
   282  	// If set, parse only the title and subtitle.
   283  	TitlesOnly ParseMode = 1
   284  )
   286  // Parse parses a document from r.
   287  func (ctx *Context) Parse(r io.Reader, name string, mode ParseMode) (*Doc, error) {
   288  	doc := new(Doc)
   289  	lines, err := readLines(r)
   290  	if err != nil {
   291  		return nil, err
   292  	}
   294  	// Detect Markdown-enabled vs legacy present file.
   295  	// Markdown-enabled files have a title line beginning with "# "
   296  	// (like preprocessed C files of yore).
   297  	isMarkdown := false
   298  	for i := lines.line; i < len(lines.text); i++ {
   299  		line := lines.text[i]
   300  		if line == "" {
   301  			continue
   302  		}
   303  		isMarkdown = strings.HasPrefix(line, "# ")
   304  		break
   305  	}
   307  	sectionPrefix := "*"
   308  	if isMarkdown {
   309  		sectionPrefix = "##"
   310  		lines.comment = "//"
   311  	}
   313  	for i := lines.line; i < len(lines.text); i++ {
   314  		if strings.HasPrefix(lines.text[i], sectionPrefix) {
   315  			break
   316  		}
   318  		if isSpeakerNote(lines.text[i]) {
   319  			doc.TitleNotes = append(doc.TitleNotes, trimSpeakerNote(lines.text[i]))
   320  		}
   321  	}
   323  	err = parseHeader(doc, isMarkdown, lines)
   324  	if err != nil {
   325  		return nil, err
   326  	}
   327  	if mode&TitlesOnly != 0 {
   328  		return doc, nil
   329  	}
   331  	// Authors
   332  	if doc.Authors, err = parseAuthors(name, sectionPrefix, lines); err != nil {
   333  		return nil, err
   334  	}
   336  	// Sections
   337  	if doc.Sections, err = parseSections(ctx, name, sectionPrefix, lines, []int{}); err != nil {
   338  		return nil, err
   339  	}
   341  	return doc, nil
   342  }
   344  // Parse parses a document from r. Parse reads assets used by the presentation
   345  // from the file system using os.ReadFile.
   346  func Parse(r io.Reader, name string, mode ParseMode) (*Doc, error) {
   347  	ctx := Context{ReadFile: os.ReadFile}
   348  	return ctx.Parse(r, name, mode)
   349  }
   351  // isHeading matches any section heading.
   352  var (
   353  	isHeadingLegacy   = regexp.MustCompile(`^\*+( |$)`)
   354  	isHeadingMarkdown = regexp.MustCompile(`^\#+( |$)`)
   355  )
   357  // lesserHeading returns true if text is a heading of a lesser or equal level
   358  // than that denoted by prefix.
   359  func lesserHeading(isHeading *regexp.Regexp, text, prefix string) bool {
   360  	return isHeading.MatchString(text) && !strings.HasPrefix(text, prefix+prefix[:1])
   361  }
   363  // parseSections parses Sections from lines for the section level indicated by
   364  // number (a nil number indicates the top level).
   365  func parseSections(ctx *Context, name, prefix string, lines *Lines, number []int) ([]Section, error) {
   366  	isMarkdown := prefix[0] == '#'
   367  	isHeading := isHeadingLegacy
   368  	if isMarkdown {
   369  		isHeading = isHeadingMarkdown
   370  	}
   371  	var sections []Section
   372  	for i := 1; ; i++ {
   373  		// Next non-empty line is title.
   374  		text, ok := lines.nextNonEmpty()
   375  		for ok && text == "" {
   376  			text, ok = lines.next()
   377  		}
   378  		if !ok {
   379  			break
   380  		}
   381  		if text != prefix && !strings.HasPrefix(text, prefix+" ") {
   382  			lines.back()
   383  			break
   384  		}
   385  		// Markdown sections can end in {#id} to set the HTML anchor for the section.
   386  		// This is nicer than the default #TOC_1_2-style anchor.
   387  		title := strings.TrimSpace(text[len(prefix):])
   388  		id := ""
   389  		if isMarkdown && strings.HasSuffix(title, "}") {
   390  			j := strings.LastIndex(title, "{#")
   391  			if j >= 0 {
   392  				id = title[j+2 : len(title)-1]
   393  				title = strings.TrimSpace(title[:j])
   394  			}
   395  		}
   396  		section := Section{
   397  			Number: append(append([]int{}, number...), i),
   398  			Title:  title,
   399  			ID:     id,
   400  		}
   401  		text, ok = lines.nextNonEmpty()
   402  		for ok && !lesserHeading(isHeading, text, prefix) {
   403  			var e Elem
   404  			r, _ := utf8.DecodeRuneInString(text)
   405  			switch {
   406  			case !isMarkdown && unicode.IsSpace(r):
   407  				i := strings.IndexFunc(text, func(r rune) bool {
   408  					return !unicode.IsSpace(r)
   409  				})
   410  				if i < 0 {
   411  					break
   412  				}
   413  				indent := text[:i]
   414  				var s []string
   415  				for ok && (strings.HasPrefix(text, indent) || text == "") {
   416  					if text != "" {
   417  						text = text[i:]
   418  					}
   419  					s = append(s, text)
   420  					text, ok = lines.next()
   421  				}
   422  				lines.back()
   423  				pre := strings.Join(s, "\n")
   424  				raw := pre
   425  				pre = strings.Replace(pre, "\t", "    ", -1) // browsers treat tabs badly
   426  				pre = strings.TrimRightFunc(pre, unicode.IsSpace)
   427  				e = Text{Lines: []string{pre}, Pre: true, Raw: raw}
   428  			case !isMarkdown && strings.HasPrefix(text, "- "):
   429  				var b []string
   430  				for {
   431  					if strings.HasPrefix(text, "- ") {
   432  						b = append(b, text[2:])
   433  					} else if len(b) > 0 && strings.HasPrefix(text, " ") {
   434  						b[len(b)-1] += "\n" + strings.TrimSpace(text)
   435  					} else {
   436  						break
   437  					}
   438  					if text, ok = lines.next(); !ok {
   439  						break
   440  					}
   441  				}
   442  				lines.back()
   443  				e = List{Bullet: b}
   444  			case isSpeakerNote(text):
   445  				section.Notes = append(section.Notes, trimSpeakerNote(text))
   446  			case strings.HasPrefix(text, prefix+prefix[:1]+" ") || text == prefix+prefix[:1]:
   447  				lines.back()
   448  				subsecs, err := parseSections(ctx, name, prefix+prefix[:1], lines, section.Number)
   449  				if err != nil {
   450  					return nil, err
   451  				}
   452  				for _, ss := range subsecs {
   453  					section.Elem = append(section.Elem, ss)
   454  				}
   455  			case strings.HasPrefix(text, prefix+prefix[:1]):
   456  				return nil, fmt.Errorf("%s:%d: badly nested section inside %s: %s", name, lines.line, prefix, text)
   457  			case strings.HasPrefix(text, "."):
   458  				args := strings.Fields(text)
   459  				if args[0] == ".background" {
   460  					section.Classes = append(section.Classes, "background")
   461  					section.Styles = append(section.Styles, "background-image: url('"+args[1]+"')")
   462  					break
   463  				}
   464  				parser := parsers[args[0]]
   465  				if parser == nil {
   466  					return nil, fmt.Errorf("%s:%d: unknown command %q", name, lines.line, text)
   467  				}
   468  				t, err := parser(ctx, name, lines.line, text)
   469  				if err != nil {
   470  					return nil, err
   471  				}
   472  				e = t
   474  			case isMarkdown:
   475  				// Collect Markdown lines, including blank lines and indented text.
   476  				var block []string
   477  				endLine, endBlock := lines.line-1, -1 // end is last non-empty line
   478  				for ok {
   479  					trim := strings.TrimSpace(text)
   480  					if trim != "" {
   481  						// Command breaks text block.
   482  						// Section heading breaks text block in markdown.
   483  						if text[0] == '.' || text[0] == '#' || isSpeakerNote(text) {
   484  							break
   485  						}
   486  						if strings.HasPrefix(text, `\.`) { // Backslash escapes initial period.
   487  							text = text[1:]
   488  						}
   489  						endLine, endBlock = lines.line, len(block)
   490  					}
   491  					block = append(block, text)
   492  					text, ok = lines.next()
   493  				}
   494  				block = block[:endBlock+1]
   495  				lines.line = endLine + 1
   496  				if len(block) == 0 {
   497  					break
   498  				}
   500  				// Replace all leading tabs with 4 spaces,
   501  				// which render better in code blocks.
   502  				// CommonMark defines that for parsing the structure of the file
   503  				// a tab is equivalent to 4 spaces, so this change won't
   504  				// affect the later parsing at all.
   505  				// An alternative would be to apply this to code blocks after parsing,
   506  				// at the same time that we update <a> targets, but that turns out
   507  				// to be quite difficult to modify in the AST.
   508  				for i, line := range block {
   509  					if len(line) > 0 && line[0] == '\t' {
   510  						short := strings.TrimLeft(line, "\t")
   511  						line = strings.Repeat("    ", len(line)-len(short)) + short
   512  						block[i] = line
   513  					}
   514  				}
   515  				html, err := renderMarkdown([]byte(strings.Join(block, "\n")))
   516  				if err != nil {
   517  					return nil, err
   518  				}
   519  				e = HTML{HTML: html}
   521  			default:
   522  				// Collect text lines.
   523  				var block []string
   524  				for ok && strings.TrimSpace(text) != "" {
   525  					// Command breaks text block.
   526  					// Section heading breaks text block in markdown.
   527  					if text[0] == '.' || isSpeakerNote(text) {
   528  						lines.back()
   529  						break
   530  					}
   531  					if strings.HasPrefix(text, `\.`) { // Backslash escapes initial period.
   532  						text = text[1:]
   533  					}
   534  					block = append(block, text)
   535  					text, ok = lines.next()
   536  				}
   537  				if len(block) == 0 {
   538  					break
   539  				}
   540  				e = Text{Lines: block}
   541  			}
   542  			if e != nil {
   543  				section.Elem = append(section.Elem, e)
   544  			}
   545  			text, ok = lines.nextNonEmpty()
   546  		}
   547  		if isHeading.MatchString(text) {
   548  			lines.back()
   549  		}
   550  		sections = append(sections, section)
   551  	}
   553  	if len(sections) == 0 {
   554  		return nil, fmt.Errorf("%s:%d: unexpected line: %s", name, lines.line+1, lines.text[lines.line])
   555  	}
   556  	return sections, nil
   557  }
   559  func parseHeader(doc *Doc, isMarkdown bool, lines *Lines) error {
   560  	var ok bool
   561  	// First non-empty line starts header.
   562  	doc.Title, ok = lines.nextNonEmpty()
   563  	if !ok {
   564  		return errors.New("unexpected EOF; expected title")
   565  	}
   566  	if isMarkdown {
   567  		doc.Title = strings.TrimSpace(strings.TrimPrefix(doc.Title, "#"))
   568  	}
   570  	for {
   571  		text, ok := lines.next()
   572  		if !ok {
   573  			return errors.New("unexpected EOF")
   574  		}
   575  		if text == "" {
   576  			break
   577  		}
   578  		if isSpeakerNote(text) {
   579  			continue
   580  		}
   581  		if strings.HasPrefix(text, "Tags:") {
   582  			tags := strings.Split(text[len("Tags:"):], ",")
   583  			for i := range tags {
   584  				tags[i] = strings.TrimSpace(tags[i])
   585  			}
   586  			doc.Tags = append(doc.Tags, tags...)
   587  		} else if strings.HasPrefix(text, "Summary:") {
   588  			doc.Summary = strings.TrimSpace(text[len("Summary:"):])
   589  		} else if strings.HasPrefix(text, "OldURL:") {
   590  			doc.OldURL = append(doc.OldURL, strings.TrimSpace(text[len("OldURL:"):]))
   591  		} else if t, ok := parseTime(text); ok {
   592  			doc.Time = t
   593  		} else if doc.Subtitle == "" {
   594  			doc.Subtitle = text
   595  		} else {
   596  			return fmt.Errorf("unexpected header line: %q", text)
   597  		}
   598  	}
   599  	return nil
   600  }
   602  func parseAuthors(name, sectionPrefix string, lines *Lines) (authors []Author, err error) {
   603  	// This grammar demarcates authors with blanks.
   605  	// Skip blank lines.
   606  	if _, ok := lines.nextNonEmpty(); !ok {
   607  		return nil, errors.New("unexpected EOF")
   608  	}
   609  	lines.back()
   611  	var a *Author
   612  	for {
   613  		text, ok := lines.next()
   614  		if !ok {
   615  			return nil, errors.New("unexpected EOF")
   616  		}
   618  		// If we find a section heading, we're done.
   619  		if strings.HasPrefix(text, sectionPrefix) {
   620  			lines.back()
   621  			break
   622  		}
   624  		if isSpeakerNote(text) {
   625  			continue
   626  		}
   628  		// If we encounter a blank we're done with this author.
   629  		if a != nil && len(text) == 0 {
   630  			authors = append(authors, *a)
   631  			a = nil
   632  			continue
   633  		}
   634  		if a == nil {
   635  			a = new(Author)
   636  		}
   638  		// Parse the line. Those that
   639  		// - begin with @ are twitter names,
   640  		// - contain slashes are links, or
   641  		// - contain an @ symbol are an email address.
   642  		// The rest is just text.
   643  		var el Elem
   644  		switch {
   645  		case strings.HasPrefix(text, "@"):
   646  			el = parseAuthorURL(name, "http://twitter.com/"+text[1:])
   647  		case strings.Contains(text, ":"):
   648  			el = parseAuthorURL(name, text)
   649  		case strings.Contains(text, "@"):
   650  			el = parseAuthorURL(name, "mailto:"+text)
   651  		}
   652  		if l, ok := el.(Link); ok {
   653  			l.Label = text
   654  			el = l
   655  		}
   656  		if el == nil {
   657  			el = Text{Lines: []string{text}}
   658  		}
   659  		a.Elem = append(a.Elem, el)
   660  	}
   661  	if a != nil {
   662  		authors = append(authors, *a)
   663  	}
   664  	return authors, nil
   665  }
   667  func parseAuthorURL(name, text string) Elem {
   668  	u, err := url.Parse(text)
   669  	if err != nil {
   670  		log.Printf("parsing %s author block: invalid URL %q: %v", name, text, err)
   671  		return nil
   672  	}
   673  	return Link{URL: u}
   674  }
   676  func parseTime(text string) (t time.Time, ok bool) {
   677  	t, err := time.Parse("15:04 2 Jan 2006", text)
   678  	if err == nil {
   679  		return t, true
   680  	}
   681  	t, err = time.Parse("2 Jan 2006", text)
   682  	if err == nil {
   683  		// at 11am UTC it is the same date everywhere
   684  		t = t.Add(time.Hour * 11)
   685  		return t, true
   686  	}
   687  	return time.Time{}, false
   688  }
   690  func isSpeakerNote(s string) bool {
   691  	return strings.HasPrefix(s, ": ") || s == ":"
   692  }
   694  func trimSpeakerNote(s string) string {
   695  	if s == ":" {
   696  		return ""
   697  	}
   698  	return strings.TrimPrefix(s, ": ")
   699  }
   701  func renderMarkdown(input []byte) (template.HTML, error) {
   702  	md := goldmark.New(goldmark.WithRendererOptions(html.WithUnsafe()))
   703  	reader := text.NewReader(input)
   704  	doc := md.Parser().Parse(reader)
   705  	fixupMarkdown(doc)
   706  	var b strings.Builder
   707  	if err := md.Renderer().Render(&b, input, doc); err != nil {
   708  		return "", err
   709  	}
   710  	return template.HTML(b.String()), nil
   711  }
   713  func fixupMarkdown(n ast.Node) {
   714  	ast.Walk(n, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
   715  		if entering {
   716  			switch n := n.(type) {
   717  			case *ast.Link:
   718  				n.SetAttributeString("target", []byte("_blank"))
   719  				// https://developers.google.com/web/tools/lighthouse/audits/noopener
   720  				n.SetAttributeString("rel", []byte("noopener"))
   721  			}
   722  		}
   723  		return ast.WalkContinue, nil
   724  	})
   725  }