github.com/AndrienkoAleksandr/go@v0.0.19/src/go/doc/comment/parse.go (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package comment 6 7 import ( 8 "sort" 9 "strings" 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 // A Doc is a parsed Go doc comment. 15 type Doc struct { 16 // Content is the sequence of content blocks in the comment. 17 Content []Block 18 19 // Links is the link definitions in the comment. 20 Links []*LinkDef 21 } 22 23 // A LinkDef is a single link definition. 24 type LinkDef struct { 25 Text string // the link text 26 URL string // the link URL 27 Used bool // whether the comment uses the definition 28 } 29 30 // A Block is block-level content in a doc comment, 31 // one of [*Code], [*Heading], [*List], or [*Paragraph]. 32 type Block interface { 33 block() 34 } 35 36 // A Heading is a doc comment heading. 37 type Heading struct { 38 Text []Text // the heading text 39 } 40 41 func (*Heading) block() {} 42 43 // A List is a numbered or bullet list. 44 // Lists are always non-empty: len(Items) > 0. 45 // In a numbered list, every Items[i].Number is a non-empty string. 46 // In a bullet list, every Items[i].Number is an empty string. 47 type List struct { 48 // Items is the list items. 49 Items []*ListItem 50 51 // ForceBlankBefore indicates that the list must be 52 // preceded by a blank line when reformatting the comment, 53 // overriding the usual conditions. See the BlankBefore method. 54 // 55 // The comment parser sets ForceBlankBefore for any list 56 // that is preceded by a blank line, to make sure 57 // the blank line is preserved when printing. 58 ForceBlankBefore bool 59 60 // ForceBlankBetween indicates that list items must be 61 // separated by blank lines when reformatting the comment, 62 // overriding the usual conditions. See the BlankBetween method. 63 // 64 // The comment parser sets ForceBlankBetween for any list 65 // that has a blank line between any two of its items, to make sure 66 // the blank lines are preserved when printing. 67 ForceBlankBetween bool 68 } 69 70 func (*List) block() {} 71 72 // BlankBefore reports whether a reformatting of the comment 73 // should include a blank line before the list. 74 // The default rule is the same as for [BlankBetween]: 75 // if the list item content contains any blank lines 76 // (meaning at least one item has multiple paragraphs) 77 // then the list itself must be preceded by a blank line. 78 // A preceding blank line can be forced by setting [List].ForceBlankBefore. 79 func (l *List) BlankBefore() bool { 80 return l.ForceBlankBefore || l.BlankBetween() 81 } 82 83 // BlankBetween reports whether a reformatting of the comment 84 // should include a blank line between each pair of list items. 85 // The default rule is that if the list item content contains any blank lines 86 // (meaning at least one item has multiple paragraphs) 87 // then list items must themselves be separated by blank lines. 88 // Blank line separators can be forced by setting [List].ForceBlankBetween. 89 func (l *List) BlankBetween() bool { 90 if l.ForceBlankBetween { 91 return true 92 } 93 for _, item := range l.Items { 94 if len(item.Content) != 1 { 95 // Unreachable for parsed comments today, 96 // since the only way to get multiple item.Content 97 // is multiple paragraphs, which must have been 98 // separated by a blank line. 99 return true 100 } 101 } 102 return false 103 } 104 105 // A ListItem is a single item in a numbered or bullet list. 106 type ListItem struct { 107 // Number is a decimal string in a numbered list 108 // or an empty string in a bullet list. 109 Number string // "1", "2", ...; "" for bullet list 110 111 // Content is the list content. 112 // Currently, restrictions in the parser and printer 113 // require every element of Content to be a *Paragraph. 114 Content []Block // Content of this item. 115 } 116 117 // A Paragraph is a paragraph of text. 118 type Paragraph struct { 119 Text []Text 120 } 121 122 func (*Paragraph) block() {} 123 124 // A Code is a preformatted code block. 125 type Code struct { 126 // Text is the preformatted text, ending with a newline character. 127 // It may be multiple lines, each of which ends with a newline character. 128 // It is never empty, nor does it start or end with a blank line. 129 Text string 130 } 131 132 func (*Code) block() {} 133 134 // A Text is text-level content in a doc comment, 135 // one of [Plain], [Italic], [*Link], or [*DocLink]. 136 type Text interface { 137 text() 138 } 139 140 // A Plain is a string rendered as plain text (not italicized). 141 type Plain string 142 143 func (Plain) text() {} 144 145 // An Italic is a string rendered as italicized text. 146 type Italic string 147 148 func (Italic) text() {} 149 150 // A Link is a link to a specific URL. 151 type Link struct { 152 Auto bool // is this an automatic (implicit) link of a literal URL? 153 Text []Text // text of link 154 URL string // target URL of link 155 } 156 157 func (*Link) text() {} 158 159 // A DocLink is a link to documentation for a Go package or symbol. 160 type DocLink struct { 161 Text []Text // text of link 162 163 // ImportPath, Recv, and Name identify the Go package or symbol 164 // that is the link target. The potential combinations of 165 // non-empty fields are: 166 // - ImportPath: a link to another package 167 // - ImportPath, Name: a link to a const, func, type, or var in another package 168 // - ImportPath, Recv, Name: a link to a method in another package 169 // - Name: a link to a const, func, type, or var in this package 170 // - Recv, Name: a link to a method in this package 171 ImportPath string // import path 172 Recv string // receiver type, without any pointer star, for methods 173 Name string // const, func, type, var, or method name 174 } 175 176 func (*DocLink) text() {} 177 178 // A Parser is a doc comment parser. 179 // The fields in the struct can be filled in before calling Parse 180 // in order to customize the details of the parsing process. 181 type Parser struct { 182 // Words is a map of Go identifier words that 183 // should be italicized and potentially linked. 184 // If Words[w] is the empty string, then the word w 185 // is only italicized. Otherwise it is linked, using 186 // Words[w] as the link target. 187 // Words corresponds to the [go/doc.ToHTML] words parameter. 188 Words map[string]string 189 190 // LookupPackage resolves a package name to an import path. 191 // 192 // If LookupPackage(name) returns ok == true, then [name] 193 // (or [name.Sym] or [name.Sym.Method]) 194 // is considered a documentation link to importPath's package docs. 195 // It is valid to return "", true, in which case name is considered 196 // to refer to the current package. 197 // 198 // If LookupPackage(name) returns ok == false, 199 // then [name] (or [name.Sym] or [name.Sym.Method]) 200 // will not be considered a documentation link, 201 // except in the case where name is the full (but single-element) import path 202 // of a package in the standard library, such as in [math] or [io.Reader]. 203 // LookupPackage is still called for such names, 204 // in order to permit references to imports of other packages 205 // with the same package names. 206 // 207 // Setting LookupPackage to nil is equivalent to setting it to 208 // a function that always returns "", false. 209 LookupPackage func(name string) (importPath string, ok bool) 210 211 // LookupSym reports whether a symbol name or method name 212 // exists in the current package. 213 // 214 // If LookupSym("", "Name") returns true, then [Name] 215 // is considered a documentation link for a const, func, type, or var. 216 // 217 // Similarly, if LookupSym("Recv", "Name") returns true, 218 // then [Recv.Name] is considered a documentation link for 219 // type Recv's method Name. 220 // 221 // Setting LookupSym to nil is equivalent to setting it to a function 222 // that always returns false. 223 LookupSym func(recv, name string) (ok bool) 224 } 225 226 // parseDoc is parsing state for a single doc comment. 227 type parseDoc struct { 228 *Parser 229 *Doc 230 links map[string]*LinkDef 231 lines []string 232 lookupSym func(recv, name string) bool 233 } 234 235 // lookupPkg is called to look up the pkg in [pkg], [pkg.Name], and [pkg.Name.Recv]. 236 // If pkg has a slash, it is assumed to be the full import path and is returned with ok = true. 237 // 238 // Otherwise, pkg is probably a simple package name like "rand" (not "crypto/rand" or "math/rand"). 239 // d.LookupPackage provides a way for the caller to allow resolving such names with reference 240 // to the imports in the surrounding package. 241 // 242 // There is one collision between these two cases: single-element standard library names 243 // like "math" are full import paths but don't contain slashes. We let d.LookupPackage have 244 // the first chance to resolve it, in case there's a different package imported as math, 245 // and otherwise we refer to a built-in list of single-element standard library package names. 246 func (d *parseDoc) lookupPkg(pkg string) (importPath string, ok bool) { 247 if strings.Contains(pkg, "/") { // assume a full import path 248 if validImportPath(pkg) { 249 return pkg, true 250 } 251 return "", false 252 } 253 if d.LookupPackage != nil { 254 // Give LookupPackage a chance. 255 if path, ok := d.LookupPackage(pkg); ok { 256 return path, true 257 } 258 } 259 return DefaultLookupPackage(pkg) 260 } 261 262 func isStdPkg(path string) bool { 263 // TODO(rsc): Use sort.Find once we don't have to worry about 264 // copying this code into older Go environments. 265 i := sort.Search(len(stdPkgs), func(i int) bool { return stdPkgs[i] >= path }) 266 return i < len(stdPkgs) && stdPkgs[i] == path 267 } 268 269 // DefaultLookupPackage is the default package lookup 270 // function, used when [Parser].LookupPackage is nil. 271 // It recognizes names of the packages from the standard 272 // library with single-element import paths, such as math, 273 // which would otherwise be impossible to name. 274 // 275 // Note that the go/doc package provides a more sophisticated 276 // lookup based on the imports used in the current package. 277 func DefaultLookupPackage(name string) (importPath string, ok bool) { 278 if isStdPkg(name) { 279 return name, true 280 } 281 return "", false 282 } 283 284 // Parse parses the doc comment text and returns the *Doc form. 285 // Comment markers (/* // and */) in the text must have already been removed. 286 func (p *Parser) Parse(text string) *Doc { 287 lines := unindent(strings.Split(text, "\n")) 288 d := &parseDoc{ 289 Parser: p, 290 Doc: new(Doc), 291 links: make(map[string]*LinkDef), 292 lines: lines, 293 lookupSym: func(recv, name string) bool { return false }, 294 } 295 if p.LookupSym != nil { 296 d.lookupSym = p.LookupSym 297 } 298 299 // First pass: break into block structure and collect known links. 300 // The text is all recorded as Plain for now. 301 var prev span 302 for _, s := range parseSpans(lines) { 303 var b Block 304 switch s.kind { 305 default: 306 panic("go/doc/comment: internal error: unknown span kind") 307 case spanList: 308 b = d.list(lines[s.start:s.end], prev.end < s.start) 309 case spanCode: 310 b = d.code(lines[s.start:s.end]) 311 case spanOldHeading: 312 b = d.oldHeading(lines[s.start]) 313 case spanHeading: 314 b = d.heading(lines[s.start]) 315 case spanPara: 316 b = d.paragraph(lines[s.start:s.end]) 317 } 318 if b != nil { 319 d.Content = append(d.Content, b) 320 } 321 prev = s 322 } 323 324 // Second pass: interpret all the Plain text now that we know the links. 325 for _, b := range d.Content { 326 switch b := b.(type) { 327 case *Paragraph: 328 b.Text = d.parseLinkedText(string(b.Text[0].(Plain))) 329 case *List: 330 for _, i := range b.Items { 331 for _, c := range i.Content { 332 p := c.(*Paragraph) 333 p.Text = d.parseLinkedText(string(p.Text[0].(Plain))) 334 } 335 } 336 } 337 } 338 339 return d.Doc 340 } 341 342 // A span represents a single span of comment lines (lines[start:end]) 343 // of an identified kind (code, heading, paragraph, and so on). 344 type span struct { 345 start int 346 end int 347 kind spanKind 348 } 349 350 // A spanKind describes the kind of span. 351 type spanKind int 352 353 const ( 354 _ spanKind = iota 355 spanCode 356 spanHeading 357 spanList 358 spanOldHeading 359 spanPara 360 ) 361 362 func parseSpans(lines []string) []span { 363 var spans []span 364 365 // The loop may process a line twice: once as unindented 366 // and again forced indented. So the maximum expected 367 // number of iterations is 2*len(lines). The repeating logic 368 // can be subtle, though, and to protect against introduction 369 // of infinite loops in future changes, we watch to see that 370 // we are not looping too much. A panic is better than a 371 // quiet infinite loop. 372 watchdog := 2 * len(lines) 373 374 i := 0 375 forceIndent := 0 376 Spans: 377 for { 378 // Skip blank lines. 379 for i < len(lines) && lines[i] == "" { 380 i++ 381 } 382 if i >= len(lines) { 383 break 384 } 385 if watchdog--; watchdog < 0 { 386 panic("go/doc/comment: internal error: not making progress") 387 } 388 389 var kind spanKind 390 start := i 391 end := i 392 if i < forceIndent || indented(lines[i]) { 393 // Indented (or force indented). 394 // Ends before next unindented. (Blank lines are OK.) 395 // If this is an unindented list that we are heuristically treating as indented, 396 // then accept unindented list item lines up to the first blank lines. 397 // The heuristic is disabled at blank lines to contain its effect 398 // to non-gofmt'ed sections of the comment. 399 unindentedListOK := isList(lines[i]) && i < forceIndent 400 i++ 401 for i < len(lines) && (lines[i] == "" || i < forceIndent || indented(lines[i]) || (unindentedListOK && isList(lines[i]))) { 402 if lines[i] == "" { 403 unindentedListOK = false 404 } 405 i++ 406 } 407 408 // Drop trailing blank lines. 409 end = i 410 for end > start && lines[end-1] == "" { 411 end-- 412 } 413 414 // If indented lines are followed (without a blank line) 415 // by an unindented line ending in a brace, 416 // take that one line too. This fixes the common mistake 417 // of pasting in something like 418 // 419 // func main() { 420 // fmt.Println("hello, world") 421 // } 422 // 423 // and forgetting to indent it. 424 // The heuristic will never trigger on a gofmt'ed comment, 425 // because any gofmt'ed code block or list would be 426 // followed by a blank line or end of comment. 427 if end < len(lines) && strings.HasPrefix(lines[end], "}") { 428 end++ 429 } 430 431 if isList(lines[start]) { 432 kind = spanList 433 } else { 434 kind = spanCode 435 } 436 } else { 437 // Unindented. Ends at next blank or indented line. 438 i++ 439 for i < len(lines) && lines[i] != "" && !indented(lines[i]) { 440 i++ 441 } 442 end = i 443 444 // If unindented lines are followed (without a blank line) 445 // by an indented line that would start a code block, 446 // check whether the final unindented lines 447 // should be left for the indented section. 448 // This can happen for the common mistakes of 449 // unindented code or unindented lists. 450 // The heuristic will never trigger on a gofmt'ed comment, 451 // because any gofmt'ed code block would have a blank line 452 // preceding it after the unindented lines. 453 if i < len(lines) && lines[i] != "" && !isList(lines[i]) { 454 switch { 455 case isList(lines[i-1]): 456 // If the final unindented line looks like a list item, 457 // this may be the first indented line wrap of 458 // a mistakenly unindented list. 459 // Leave all the unindented list items. 460 forceIndent = end 461 end-- 462 for end > start && isList(lines[end-1]) { 463 end-- 464 } 465 466 case strings.HasSuffix(lines[i-1], "{") || strings.HasSuffix(lines[i-1], `\`): 467 // If the final unindented line ended in { or \ 468 // it is probably the start of a misindented code block. 469 // Give the user a single line fix. 470 // Often that's enough; if not, the user can fix the others themselves. 471 forceIndent = end 472 end-- 473 } 474 475 if start == end && forceIndent > start { 476 i = start 477 continue Spans 478 } 479 } 480 481 // Span is either paragraph or heading. 482 if end-start == 1 && isHeading(lines[start]) { 483 kind = spanHeading 484 } else if end-start == 1 && isOldHeading(lines[start], lines, start) { 485 kind = spanOldHeading 486 } else { 487 kind = spanPara 488 } 489 } 490 491 spans = append(spans, span{start, end, kind}) 492 i = end 493 } 494 495 return spans 496 } 497 498 // indented reports whether line is indented 499 // (starts with a leading space or tab). 500 func indented(line string) bool { 501 return line != "" && (line[0] == ' ' || line[0] == '\t') 502 } 503 504 // unindent removes any common space/tab prefix 505 // from each line in lines, returning a copy of lines in which 506 // those prefixes have been trimmed from each line. 507 // It also replaces any lines containing only spaces with blank lines (empty strings). 508 func unindent(lines []string) []string { 509 // Trim leading and trailing blank lines. 510 for len(lines) > 0 && isBlank(lines[0]) { 511 lines = lines[1:] 512 } 513 for len(lines) > 0 && isBlank(lines[len(lines)-1]) { 514 lines = lines[:len(lines)-1] 515 } 516 if len(lines) == 0 { 517 return nil 518 } 519 520 // Compute and remove common indentation. 521 prefix := leadingSpace(lines[0]) 522 for _, line := range lines[1:] { 523 if !isBlank(line) { 524 prefix = commonPrefix(prefix, leadingSpace(line)) 525 } 526 } 527 528 out := make([]string, len(lines)) 529 for i, line := range lines { 530 line = strings.TrimPrefix(line, prefix) 531 if strings.TrimSpace(line) == "" { 532 line = "" 533 } 534 out[i] = line 535 } 536 for len(out) > 0 && out[0] == "" { 537 out = out[1:] 538 } 539 for len(out) > 0 && out[len(out)-1] == "" { 540 out = out[:len(out)-1] 541 } 542 return out 543 } 544 545 // isBlank reports whether s is a blank line. 546 func isBlank(s string) bool { 547 return len(s) == 0 || (len(s) == 1 && s[0] == '\n') 548 } 549 550 // commonPrefix returns the longest common prefix of a and b. 551 func commonPrefix(a, b string) string { 552 i := 0 553 for i < len(a) && i < len(b) && a[i] == b[i] { 554 i++ 555 } 556 return a[0:i] 557 } 558 559 // leadingSpace returns the longest prefix of s consisting of spaces and tabs. 560 func leadingSpace(s string) string { 561 i := 0 562 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 563 i++ 564 } 565 return s[:i] 566 } 567 568 // isOldHeading reports whether line is an old-style section heading. 569 // line is all[off]. 570 func isOldHeading(line string, all []string, off int) bool { 571 if off <= 0 || all[off-1] != "" || off+2 >= len(all) || all[off+1] != "" || leadingSpace(all[off+2]) != "" { 572 return false 573 } 574 575 line = strings.TrimSpace(line) 576 577 // a heading must start with an uppercase letter 578 r, _ := utf8.DecodeRuneInString(line) 579 if !unicode.IsLetter(r) || !unicode.IsUpper(r) { 580 return false 581 } 582 583 // it must end in a letter or digit: 584 r, _ = utf8.DecodeLastRuneInString(line) 585 if !unicode.IsLetter(r) && !unicode.IsDigit(r) { 586 return false 587 } 588 589 // exclude lines with illegal characters. we allow "()," 590 if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") { 591 return false 592 } 593 594 // allow "'" for possessive "'s" only 595 for b := line; ; { 596 var ok bool 597 if _, b, ok = strings.Cut(b, "'"); !ok { 598 break 599 } 600 if b != "s" && !strings.HasPrefix(b, "s ") { 601 return false // ' not followed by s and then end-of-word 602 } 603 } 604 605 // allow "." when followed by non-space 606 for b := line; ; { 607 var ok bool 608 if _, b, ok = strings.Cut(b, "."); !ok { 609 break 610 } 611 if b == "" || strings.HasPrefix(b, " ") { 612 return false // not followed by non-space 613 } 614 } 615 616 return true 617 } 618 619 // oldHeading returns the *Heading for the given old-style section heading line. 620 func (d *parseDoc) oldHeading(line string) Block { 621 return &Heading{Text: []Text{Plain(strings.TrimSpace(line))}} 622 } 623 624 // isHeading reports whether line is a new-style section heading. 625 func isHeading(line string) bool { 626 return len(line) >= 2 && 627 line[0] == '#' && 628 (line[1] == ' ' || line[1] == '\t') && 629 strings.TrimSpace(line) != "#" 630 } 631 632 // heading returns the *Heading for the given new-style section heading line. 633 func (d *parseDoc) heading(line string) Block { 634 return &Heading{Text: []Text{Plain(strings.TrimSpace(line[1:]))}} 635 } 636 637 // code returns a code block built from the lines. 638 func (d *parseDoc) code(lines []string) *Code { 639 body := unindent(lines) 640 body = append(body, "") // to get final \n from Join 641 return &Code{Text: strings.Join(body, "\n")} 642 } 643 644 // paragraph returns a paragraph block built from the lines. 645 // If the lines are link definitions, paragraph adds them to d and returns nil. 646 func (d *parseDoc) paragraph(lines []string) Block { 647 // Is this a block of known links? Handle. 648 var defs []*LinkDef 649 for _, line := range lines { 650 def, ok := parseLink(line) 651 if !ok { 652 goto NoDefs 653 } 654 defs = append(defs, def) 655 } 656 for _, def := range defs { 657 d.Links = append(d.Links, def) 658 if d.links[def.Text] == nil { 659 d.links[def.Text] = def 660 } 661 } 662 return nil 663 NoDefs: 664 665 return &Paragraph{Text: []Text{Plain(strings.Join(lines, "\n"))}} 666 } 667 668 // parseLink parses a single link definition line: 669 // 670 // [text]: url 671 // 672 // It returns the link definition and whether the line was well formed. 673 func parseLink(line string) (*LinkDef, bool) { 674 if line == "" || line[0] != '[' { 675 return nil, false 676 } 677 i := strings.Index(line, "]:") 678 if i < 0 || i+3 >= len(line) || (line[i+2] != ' ' && line[i+2] != '\t') { 679 return nil, false 680 } 681 682 text := line[1:i] 683 url := strings.TrimSpace(line[i+3:]) 684 j := strings.Index(url, "://") 685 if j < 0 || !isScheme(url[:j]) { 686 return nil, false 687 } 688 689 // Line has right form and has valid scheme://. 690 // That's good enough for us - we are not as picky 691 // about the characters beyond the :// as we are 692 // when extracting inline URLs from text. 693 return &LinkDef{Text: text, URL: url}, true 694 } 695 696 // list returns a list built from the indented lines, 697 // using forceBlankBefore as the value of the List's ForceBlankBefore field. 698 func (d *parseDoc) list(lines []string, forceBlankBefore bool) *List { 699 num, _, _ := listMarker(lines[0]) 700 var ( 701 list *List = &List{ForceBlankBefore: forceBlankBefore} 702 item *ListItem 703 text []string 704 ) 705 flush := func() { 706 if item != nil { 707 if para := d.paragraph(text); para != nil { 708 item.Content = append(item.Content, para) 709 } 710 } 711 text = nil 712 } 713 714 for _, line := range lines { 715 if n, after, ok := listMarker(line); ok && (n != "") == (num != "") { 716 // start new list item 717 flush() 718 719 item = &ListItem{Number: n} 720 list.Items = append(list.Items, item) 721 line = after 722 } 723 line = strings.TrimSpace(line) 724 if line == "" { 725 list.ForceBlankBetween = true 726 flush() 727 continue 728 } 729 text = append(text, strings.TrimSpace(line)) 730 } 731 flush() 732 return list 733 } 734 735 // listMarker parses the line as beginning with a list marker. 736 // If it can do that, it returns the numeric marker ("" for a bullet list), 737 // the rest of the line, and ok == true. 738 // Otherwise, it returns "", "", false. 739 func listMarker(line string) (num, rest string, ok bool) { 740 line = strings.TrimSpace(line) 741 if line == "" { 742 return "", "", false 743 } 744 745 // Can we find a marker? 746 if r, n := utf8.DecodeRuneInString(line); r == '•' || r == '*' || r == '+' || r == '-' { 747 num, rest = "", line[n:] 748 } else if '0' <= line[0] && line[0] <= '9' { 749 n := 1 750 for n < len(line) && '0' <= line[n] && line[n] <= '9' { 751 n++ 752 } 753 if n >= len(line) || (line[n] != '.' && line[n] != ')') { 754 return "", "", false 755 } 756 num, rest = line[:n], line[n+1:] 757 } else { 758 return "", "", false 759 } 760 761 if !indented(rest) || strings.TrimSpace(rest) == "" { 762 return "", "", false 763 } 764 765 return num, rest, true 766 } 767 768 // isList reports whether the line is the first line of a list, 769 // meaning starts with a list marker after any indentation. 770 // (The caller is responsible for checking the line is indented, as appropriate.) 771 func isList(line string) bool { 772 _, _, ok := listMarker(line) 773 return ok 774 } 775 776 // parseLinkedText parses text that is allowed to contain explicit links, 777 // such as [math.Sin] or [Go home page], into a slice of Text items. 778 // 779 // A “pkg” is only assumed to be a full import path if it starts with 780 // a domain name (a path element with a dot) or is one of the packages 781 // from the standard library (“[os]”, “[encoding/json]”, and so on). 782 // To avoid problems with maps, generics, and array types, doc links 783 // must be both preceded and followed by punctuation, spaces, tabs, 784 // or the start or end of a line. An example problem would be treating 785 // map[ast.Expr]TypeAndValue as containing a link. 786 func (d *parseDoc) parseLinkedText(text string) []Text { 787 var out []Text 788 wrote := 0 789 flush := func(i int) { 790 if wrote < i { 791 out = d.parseText(out, text[wrote:i], true) 792 wrote = i 793 } 794 } 795 796 start := -1 797 var buf []byte 798 for i := 0; i < len(text); i++ { 799 c := text[i] 800 if c == '\n' || c == '\t' { 801 c = ' ' 802 } 803 switch c { 804 case '[': 805 start = i 806 case ']': 807 if start >= 0 { 808 if def, ok := d.links[string(buf)]; ok { 809 def.Used = true 810 flush(start) 811 out = append(out, &Link{ 812 Text: d.parseText(nil, text[start+1:i], false), 813 URL: def.URL, 814 }) 815 wrote = i + 1 816 } else if link, ok := d.docLink(text[start+1:i], text[:start], text[i+1:]); ok { 817 flush(start) 818 link.Text = d.parseText(nil, text[start+1:i], false) 819 out = append(out, link) 820 wrote = i + 1 821 } 822 } 823 start = -1 824 buf = buf[:0] 825 } 826 if start >= 0 && i != start { 827 buf = append(buf, c) 828 } 829 } 830 831 flush(len(text)) 832 return out 833 } 834 835 // docLink parses text, which was found inside [ ] brackets, 836 // as a doc link if possible, returning the DocLink and ok == true 837 // or else nil, false. 838 // The before and after strings are the text before the [ and after the ] 839 // on the same line. Doc links must be preceded and followed by 840 // punctuation, spaces, tabs, or the start or end of a line. 841 func (d *parseDoc) docLink(text, before, after string) (link *DocLink, ok bool) { 842 if before != "" { 843 r, _ := utf8.DecodeLastRuneInString(before) 844 if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' { 845 return nil, false 846 } 847 } 848 if after != "" { 849 r, _ := utf8.DecodeRuneInString(after) 850 if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' { 851 return nil, false 852 } 853 } 854 text = strings.TrimPrefix(text, "*") 855 pkg, name, ok := splitDocName(text) 856 var recv string 857 if ok { 858 pkg, recv, _ = splitDocName(pkg) 859 } 860 if pkg != "" { 861 if pkg, ok = d.lookupPkg(pkg); !ok { 862 return nil, false 863 } 864 } else { 865 if ok = d.lookupSym(recv, name); !ok { 866 return nil, false 867 } 868 } 869 link = &DocLink{ 870 ImportPath: pkg, 871 Recv: recv, 872 Name: name, 873 } 874 return link, true 875 } 876 877 // If text is of the form before.Name, where Name is a capitalized Go identifier, 878 // then splitDocName returns before, name, true. 879 // Otherwise it returns text, "", false. 880 func splitDocName(text string) (before, name string, foundDot bool) { 881 i := strings.LastIndex(text, ".") 882 name = text[i+1:] 883 if !isName(name) { 884 return text, "", false 885 } 886 if i >= 0 { 887 before = text[:i] 888 } 889 return before, name, true 890 } 891 892 // parseText parses s as text and returns the result of appending 893 // those parsed Text elements to out. 894 // parseText does not handle explicit links like [math.Sin] or [Go home page]: 895 // those are handled by parseLinkedText. 896 // If autoLink is true, then parseText recognizes URLs and words from d.Words 897 // and converts those to links as appropriate. 898 func (d *parseDoc) parseText(out []Text, s string, autoLink bool) []Text { 899 var w strings.Builder 900 wrote := 0 901 writeUntil := func(i int) { 902 w.WriteString(s[wrote:i]) 903 wrote = i 904 } 905 flush := func(i int) { 906 writeUntil(i) 907 if w.Len() > 0 { 908 out = append(out, Plain(w.String())) 909 w.Reset() 910 } 911 } 912 for i := 0; i < len(s); { 913 t := s[i:] 914 if autoLink { 915 if url, ok := autoURL(t); ok { 916 flush(i) 917 // Note: The old comment parser would look up the URL in words 918 // and replace the target with words[URL] if it was non-empty. 919 // That would allow creating links that display as one URL but 920 // when clicked go to a different URL. Not sure what the point 921 // of that is, so we're not doing that lookup here. 922 out = append(out, &Link{Auto: true, Text: []Text{Plain(url)}, URL: url}) 923 i += len(url) 924 wrote = i 925 continue 926 } 927 if id, ok := ident(t); ok { 928 url, italics := d.Words[id] 929 if !italics { 930 i += len(id) 931 continue 932 } 933 flush(i) 934 if url == "" { 935 out = append(out, Italic(id)) 936 } else { 937 out = append(out, &Link{Auto: true, Text: []Text{Italic(id)}, URL: url}) 938 } 939 i += len(id) 940 wrote = i 941 continue 942 } 943 } 944 switch { 945 case strings.HasPrefix(t, "``"): 946 if len(t) >= 3 && t[2] == '`' { 947 // Do not convert `` inside ```, in case people are mistakenly writing Markdown. 948 i += 3 949 for i < len(t) && t[i] == '`' { 950 i++ 951 } 952 break 953 } 954 writeUntil(i) 955 w.WriteRune('“') 956 i += 2 957 wrote = i 958 case strings.HasPrefix(t, "''"): 959 writeUntil(i) 960 w.WriteRune('”') 961 i += 2 962 wrote = i 963 default: 964 i++ 965 } 966 } 967 flush(len(s)) 968 return out 969 } 970 971 // autoURL checks whether s begins with a URL that should be hyperlinked. 972 // If so, it returns the URL, which is a prefix of s, and ok == true. 973 // Otherwise it returns "", false. 974 // The caller should skip over the first len(url) bytes of s 975 // before further processing. 976 func autoURL(s string) (url string, ok bool) { 977 // Find the ://. Fast path to pick off non-URL, 978 // since we call this at every position in the string. 979 // The shortest possible URL is ftp://x, 7 bytes. 980 var i int 981 switch { 982 case len(s) < 7: 983 return "", false 984 case s[3] == ':': 985 i = 3 986 case s[4] == ':': 987 i = 4 988 case s[5] == ':': 989 i = 5 990 case s[6] == ':': 991 i = 6 992 default: 993 return "", false 994 } 995 if i+3 > len(s) || s[i:i+3] != "://" { 996 return "", false 997 } 998 999 // Check valid scheme. 1000 if !isScheme(s[:i]) { 1001 return "", false 1002 } 1003 1004 // Scan host part. Must have at least one byte, 1005 // and must start and end in non-punctuation. 1006 i += 3 1007 if i >= len(s) || !isHost(s[i]) || isPunct(s[i]) { 1008 return "", false 1009 } 1010 i++ 1011 end := i 1012 for i < len(s) && isHost(s[i]) { 1013 if !isPunct(s[i]) { 1014 end = i + 1 1015 } 1016 i++ 1017 } 1018 i = end 1019 1020 // At this point we are definitely returning a URL (scheme://host). 1021 // We just have to find the longest path we can add to it. 1022 // Heuristics abound. 1023 // We allow parens, braces, and brackets, 1024 // but only if they match (#5043, #22285). 1025 // We allow .,:;?! in the path but not at the end, 1026 // to avoid end-of-sentence punctuation (#18139, #16565). 1027 stk := []byte{} 1028 end = i 1029 Path: 1030 for ; i < len(s); i++ { 1031 if isPunct(s[i]) { 1032 continue 1033 } 1034 if !isPath(s[i]) { 1035 break 1036 } 1037 switch s[i] { 1038 case '(': 1039 stk = append(stk, ')') 1040 case '{': 1041 stk = append(stk, '}') 1042 case '[': 1043 stk = append(stk, ']') 1044 case ')', '}', ']': 1045 if len(stk) == 0 || stk[len(stk)-1] != s[i] { 1046 break Path 1047 } 1048 stk = stk[:len(stk)-1] 1049 } 1050 if len(stk) == 0 { 1051 end = i + 1 1052 } 1053 } 1054 1055 return s[:end], true 1056 } 1057 1058 // isScheme reports whether s is a recognized URL scheme. 1059 // Note that if strings of new length (beyond 3-7) 1060 // are added here, the fast path at the top of autoURL will need updating. 1061 func isScheme(s string) bool { 1062 switch s { 1063 case "file", 1064 "ftp", 1065 "gopher", 1066 "http", 1067 "https", 1068 "mailto", 1069 "nntp": 1070 return true 1071 } 1072 return false 1073 } 1074 1075 // isHost reports whether c is a byte that can appear in a URL host, 1076 // like www.example.com or user@[::1]:8080 1077 func isHost(c byte) bool { 1078 // mask is a 128-bit bitmap with 1s for allowed bytes, 1079 // so that the byte c can be tested with a shift and an and. 1080 // If c > 128, then 1<<c and 1<<(c-64) will both be zero, 1081 // and this function will return false. 1082 const mask = 0 | 1083 (1<<26-1)<<'A' | 1084 (1<<26-1)<<'a' | 1085 (1<<10-1)<<'0' | 1086 1<<'_' | 1087 1<<'@' | 1088 1<<'-' | 1089 1<<'.' | 1090 1<<'[' | 1091 1<<']' | 1092 1<<':' 1093 1094 return ((uint64(1)<<c)&(mask&(1<<64-1)) | 1095 (uint64(1)<<(c-64))&(mask>>64)) != 0 1096 } 1097 1098 // isPunct reports whether c is a punctuation byte that can appear 1099 // inside a path but not at the end. 1100 func isPunct(c byte) bool { 1101 // mask is a 128-bit bitmap with 1s for allowed bytes, 1102 // so that the byte c can be tested with a shift and an and. 1103 // If c > 128, then 1<<c and 1<<(c-64) will both be zero, 1104 // and this function will return false. 1105 const mask = 0 | 1106 1<<'.' | 1107 1<<',' | 1108 1<<':' | 1109 1<<';' | 1110 1<<'?' | 1111 1<<'!' 1112 1113 return ((uint64(1)<<c)&(mask&(1<<64-1)) | 1114 (uint64(1)<<(c-64))&(mask>>64)) != 0 1115 } 1116 1117 // isPath reports whether c is a (non-punctuation) path byte. 1118 func isPath(c byte) bool { 1119 // mask is a 128-bit bitmap with 1s for allowed bytes, 1120 // so that the byte c can be tested with a shift and an and. 1121 // If c > 128, then 1<<c and 1<<(c-64) will both be zero, 1122 // and this function will return false. 1123 const mask = 0 | 1124 (1<<26-1)<<'A' | 1125 (1<<26-1)<<'a' | 1126 (1<<10-1)<<'0' | 1127 1<<'$' | 1128 1<<'\'' | 1129 1<<'(' | 1130 1<<')' | 1131 1<<'*' | 1132 1<<'+' | 1133 1<<'&' | 1134 1<<'#' | 1135 1<<'=' | 1136 1<<'@' | 1137 1<<'~' | 1138 1<<'_' | 1139 1<<'/' | 1140 1<<'-' | 1141 1<<'[' | 1142 1<<']' | 1143 1<<'{' | 1144 1<<'}' | 1145 1<<'%' 1146 1147 return ((uint64(1)<<c)&(mask&(1<<64-1)) | 1148 (uint64(1)<<(c-64))&(mask>>64)) != 0 1149 } 1150 1151 // isName reports whether s is a capitalized Go identifier (like Name). 1152 func isName(s string) bool { 1153 t, ok := ident(s) 1154 if !ok || t != s { 1155 return false 1156 } 1157 r, _ := utf8.DecodeRuneInString(s) 1158 return unicode.IsUpper(r) 1159 } 1160 1161 // ident checks whether s begins with a Go identifier. 1162 // If so, it returns the identifier, which is a prefix of s, and ok == true. 1163 // Otherwise it returns "", false. 1164 // The caller should skip over the first len(id) bytes of s 1165 // before further processing. 1166 func ident(s string) (id string, ok bool) { 1167 // Scan [\pL_][\pL_0-9]* 1168 n := 0 1169 for n < len(s) { 1170 if c := s[n]; c < utf8.RuneSelf { 1171 if isIdentASCII(c) && (n > 0 || c < '0' || c > '9') { 1172 n++ 1173 continue 1174 } 1175 break 1176 } 1177 r, nr := utf8.DecodeRuneInString(s[n:]) 1178 if unicode.IsLetter(r) { 1179 n += nr 1180 continue 1181 } 1182 break 1183 } 1184 return s[:n], n > 0 1185 } 1186 1187 // isIdentASCII reports whether c is an ASCII identifier byte. 1188 func isIdentASCII(c byte) bool { 1189 // mask is a 128-bit bitmap with 1s for allowed bytes, 1190 // so that the byte c can be tested with a shift and an and. 1191 // If c > 128, then 1<<c and 1<<(c-64) will both be zero, 1192 // and this function will return false. 1193 const mask = 0 | 1194 (1<<26-1)<<'A' | 1195 (1<<26-1)<<'a' | 1196 (1<<10-1)<<'0' | 1197 1<<'_' 1198 1199 return ((uint64(1)<<c)&(mask&(1<<64-1)) | 1200 (uint64(1)<<(c-64))&(mask>>64)) != 0 1201 } 1202 1203 // validImportPath reports whether path is a valid import path. 1204 // It is a lightly edited copy of golang.org/x/mod/module.CheckImportPath. 1205 func validImportPath(path string) bool { 1206 if !utf8.ValidString(path) { 1207 return false 1208 } 1209 if path == "" { 1210 return false 1211 } 1212 if path[0] == '-' { 1213 return false 1214 } 1215 if strings.Contains(path, "//") { 1216 return false 1217 } 1218 if path[len(path)-1] == '/' { 1219 return false 1220 } 1221 elemStart := 0 1222 for i, r := range path { 1223 if r == '/' { 1224 if !validImportPathElem(path[elemStart:i]) { 1225 return false 1226 } 1227 elemStart = i + 1 1228 } 1229 } 1230 return validImportPathElem(path[elemStart:]) 1231 } 1232 1233 func validImportPathElem(elem string) bool { 1234 if elem == "" || elem[0] == '.' || elem[len(elem)-1] == '.' { 1235 return false 1236 } 1237 for i := 0; i < len(elem); i++ { 1238 if !importPathOK(elem[i]) { 1239 return false 1240 } 1241 } 1242 return true 1243 } 1244 1245 func importPathOK(c byte) bool { 1246 // mask is a 128-bit bitmap with 1s for allowed bytes, 1247 // so that the byte c can be tested with a shift and an and. 1248 // If c > 128, then 1<<c and 1<<(c-64) will both be zero, 1249 // and this function will return false. 1250 const mask = 0 | 1251 (1<<26-1)<<'A' | 1252 (1<<26-1)<<'a' | 1253 (1<<10-1)<<'0' | 1254 1<<'-' | 1255 1<<'.' | 1256 1<<'~' | 1257 1<<'_' | 1258 1<<'+' 1259 1260 return ((uint64(1)<<c)&(mask&(1<<64-1)) | 1261 (uint64(1)<<(c-64))&(mask>>64)) != 0 1262 }