github.com/bir3/gocompiler@v0.9.2202/src/go/doc/comment/parse.go (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package comment 6 7 import ( 8 "slices" 9 "strings" 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 // A Doc is a parsed Go doc comment. 15 type Doc struct { 16 // Content is the sequence of content blocks in the comment. 17 Content []Block 18 19 // Links is the link definitions in the comment. 20 Links []*LinkDef 21 } 22 23 // A LinkDef is a single link definition. 24 type LinkDef struct { 25 Text string // the link text 26 URL string // the link URL 27 Used bool // whether the comment uses the definition 28 } 29 30 // A Block is block-level content in a doc comment, 31 // one of [*Code], [*Heading], [*List], or [*Paragraph]. 32 type Block interface { 33 block() 34 } 35 36 // A Heading is a doc comment heading. 37 type Heading struct { 38 Text []Text // the heading text 39 } 40 41 func (*Heading) block() {} 42 43 // A List is a numbered or bullet list. 44 // Lists are always non-empty: len(Items) > 0. 45 // In a numbered list, every Items[i].Number is a non-empty string. 46 // In a bullet list, every Items[i].Number is an empty string. 47 type List struct { 48 // Items is the list items. 49 Items []*ListItem 50 51 // ForceBlankBefore indicates that the list must be 52 // preceded by a blank line when reformatting the comment, 53 // overriding the usual conditions. See the BlankBefore method. 54 // 55 // The comment parser sets ForceBlankBefore for any list 56 // that is preceded by a blank line, to make sure 57 // the blank line is preserved when printing. 58 ForceBlankBefore bool 59 60 // ForceBlankBetween indicates that list items must be 61 // separated by blank lines when reformatting the comment, 62 // overriding the usual conditions. See the BlankBetween method. 63 // 64 // The comment parser sets ForceBlankBetween for any list 65 // that has a blank line between any two of its items, to make sure 66 // the blank lines are preserved when printing. 67 ForceBlankBetween bool 68 } 69 70 func (*List) block() {} 71 72 // BlankBefore reports whether a reformatting of the comment 73 // should include a blank line before the list. 74 // The default rule is the same as for [BlankBetween]: 75 // if the list item content contains any blank lines 76 // (meaning at least one item has multiple paragraphs) 77 // then the list itself must be preceded by a blank line. 78 // A preceding blank line can be forced by setting [List].ForceBlankBefore. 79 func (l *List) BlankBefore() bool { 80 return l.ForceBlankBefore || l.BlankBetween() 81 } 82 83 // BlankBetween reports whether a reformatting of the comment 84 // should include a blank line between each pair of list items. 85 // The default rule is that if the list item content contains any blank lines 86 // (meaning at least one item has multiple paragraphs) 87 // then list items must themselves be separated by blank lines. 88 // Blank line separators can be forced by setting [List].ForceBlankBetween. 89 func (l *List) BlankBetween() bool { 90 if l.ForceBlankBetween { 91 return true 92 } 93 for _, item := range l.Items { 94 if len(item.Content) != 1 { 95 // Unreachable for parsed comments today, 96 // since the only way to get multiple item.Content 97 // is multiple paragraphs, which must have been 98 // separated by a blank line. 99 return true 100 } 101 } 102 return false 103 } 104 105 // A ListItem is a single item in a numbered or bullet list. 106 type ListItem struct { 107 // Number is a decimal string in a numbered list 108 // or an empty string in a bullet list. 109 Number string // "1", "2", ...; "" for bullet list 110 111 // Content is the list content. 112 // Currently, restrictions in the parser and printer 113 // require every element of Content to be a *Paragraph. 114 Content []Block // Content of this item. 115 } 116 117 // A Paragraph is a paragraph of text. 118 type Paragraph struct { 119 Text []Text 120 } 121 122 func (*Paragraph) block() {} 123 124 // A Code is a preformatted code block. 125 type Code struct { 126 // Text is the preformatted text, ending with a newline character. 127 // It may be multiple lines, each of which ends with a newline character. 128 // It is never empty, nor does it start or end with a blank line. 129 Text string 130 } 131 132 func (*Code) block() {} 133 134 // A Text is text-level content in a doc comment, 135 // one of [Plain], [Italic], [*Link], or [*DocLink]. 136 type Text interface { 137 text() 138 } 139 140 // A Plain is a string rendered as plain text (not italicized). 141 type Plain string 142 143 func (Plain) text() {} 144 145 // An Italic is a string rendered as italicized text. 146 type Italic string 147 148 func (Italic) text() {} 149 150 // A Link is a link to a specific URL. 151 type Link struct { 152 Auto bool // is this an automatic (implicit) link of a literal URL? 153 Text []Text // text of link 154 URL string // target URL of link 155 } 156 157 func (*Link) text() {} 158 159 // A DocLink is a link to documentation for a Go package or symbol. 160 type DocLink struct { 161 Text []Text // text of link 162 163 // ImportPath, Recv, and Name identify the Go package or symbol 164 // that is the link target. The potential combinations of 165 // non-empty fields are: 166 // - ImportPath: a link to another package 167 // - ImportPath, Name: a link to a const, func, type, or var in another package 168 // - ImportPath, Recv, Name: a link to a method in another package 169 // - Name: a link to a const, func, type, or var in this package 170 // - Recv, Name: a link to a method in this package 171 ImportPath string // import path 172 Recv string // receiver type, without any pointer star, for methods 173 Name string // const, func, type, var, or method name 174 } 175 176 func (*DocLink) text() {} 177 178 // A Parser is a doc comment parser. 179 // The fields in the struct can be filled in before calling [Parser.Parse] 180 // in order to customize the details of the parsing process. 181 type Parser struct { 182 // Words is a map of Go identifier words that 183 // should be italicized and potentially linked. 184 // If Words[w] is the empty string, then the word w 185 // is only italicized. Otherwise it is linked, using 186 // Words[w] as the link target. 187 // Words corresponds to the [go/doc.ToHTML] words parameter. 188 Words map[string]string 189 190 // LookupPackage resolves a package name to an import path. 191 // 192 // If LookupPackage(name) returns ok == true, then [name] 193 // (or [name.Sym] or [name.Sym.Method]) 194 // is considered a documentation link to importPath's package docs. 195 // It is valid to return "", true, in which case name is considered 196 // to refer to the current package. 197 // 198 // If LookupPackage(name) returns ok == false, 199 // then [name] (or [name.Sym] or [name.Sym.Method]) 200 // will not be considered a documentation link, 201 // except in the case where name is the full (but single-element) import path 202 // of a package in the standard library, such as in [math] or [io.Reader]. 203 // LookupPackage is still called for such names, 204 // in order to permit references to imports of other packages 205 // with the same package names. 206 // 207 // Setting LookupPackage to nil is equivalent to setting it to 208 // a function that always returns "", false. 209 LookupPackage func(name string) (importPath string, ok bool) 210 211 // LookupSym reports whether a symbol name or method name 212 // exists in the current package. 213 // 214 // If LookupSym("", "Name") returns true, then [Name] 215 // is considered a documentation link for a const, func, type, or var. 216 // 217 // Similarly, if LookupSym("Recv", "Name") returns true, 218 // then [Recv.Name] is considered a documentation link for 219 // type Recv's method Name. 220 // 221 // Setting LookupSym to nil is equivalent to setting it to a function 222 // that always returns false. 223 LookupSym func(recv, name string) (ok bool) 224 } 225 226 // parseDoc is parsing state for a single doc comment. 227 type parseDoc struct { 228 *Parser 229 *Doc 230 links map[string]*LinkDef 231 lines []string 232 lookupSym func(recv, name string) bool 233 } 234 235 // lookupPkg is called to look up the pkg in [pkg], [pkg.Name], and [pkg.Name.Recv]. 236 // If pkg has a slash, it is assumed to be the full import path and is returned with ok = true. 237 // 238 // Otherwise, pkg is probably a simple package name like "rand" (not "crypto/rand" or "math/rand"). 239 // d.LookupPackage provides a way for the caller to allow resolving such names with reference 240 // to the imports in the surrounding package. 241 // 242 // There is one collision between these two cases: single-element standard library names 243 // like "math" are full import paths but don't contain slashes. We let d.LookupPackage have 244 // the first chance to resolve it, in case there's a different package imported as math, 245 // and otherwise we refer to a built-in list of single-element standard library package names. 246 func (d *parseDoc) lookupPkg(pkg string) (importPath string, ok bool) { 247 if strings.Contains(pkg, "/") { // assume a full import path 248 if validImportPath(pkg) { 249 return pkg, true 250 } 251 return "", false 252 } 253 if d.LookupPackage != nil { 254 // Give LookupPackage a chance. 255 if path, ok := d.LookupPackage(pkg); ok { 256 return path, true 257 } 258 } 259 return DefaultLookupPackage(pkg) 260 } 261 262 func isStdPkg(path string) bool { 263 _, ok := slices.BinarySearch(stdPkgs, path) 264 return ok 265 } 266 267 // DefaultLookupPackage is the default package lookup 268 // function, used when [Parser.LookupPackage] is nil. 269 // It recognizes names of the packages from the standard 270 // library with single-element import paths, such as math, 271 // which would otherwise be impossible to name. 272 // 273 // Note that the go/doc package provides a more sophisticated 274 // lookup based on the imports used in the current package. 275 func DefaultLookupPackage(name string) (importPath string, ok bool) { 276 if isStdPkg(name) { 277 return name, true 278 } 279 return "", false 280 } 281 282 // Parse parses the doc comment text and returns the *[Doc] form. 283 // Comment markers (/* // and */) in the text must have already been removed. 284 func (p *Parser) Parse(text string) *Doc { 285 lines := unindent(strings.Split(text, "\n")) 286 d := &parseDoc{ 287 Parser: p, 288 Doc: new(Doc), 289 links: make(map[string]*LinkDef), 290 lines: lines, 291 lookupSym: func(recv, name string) bool { return false }, 292 } 293 if p.LookupSym != nil { 294 d.lookupSym = p.LookupSym 295 } 296 297 // First pass: break into block structure and collect known links. 298 // The text is all recorded as Plain for now. 299 var prev span 300 for _, s := range parseSpans(lines) { 301 var b Block 302 switch s.kind { 303 default: 304 panic("go/doc/comment: internal error: unknown span kind") 305 case spanList: 306 b = d.list(lines[s.start:s.end], prev.end < s.start) 307 case spanCode: 308 b = d.code(lines[s.start:s.end]) 309 case spanOldHeading: 310 b = d.oldHeading(lines[s.start]) 311 case spanHeading: 312 b = d.heading(lines[s.start]) 313 case spanPara: 314 b = d.paragraph(lines[s.start:s.end]) 315 } 316 if b != nil { 317 d.Content = append(d.Content, b) 318 } 319 prev = s 320 } 321 322 // Second pass: interpret all the Plain text now that we know the links. 323 for _, b := range d.Content { 324 switch b := b.(type) { 325 case *Paragraph: 326 b.Text = d.parseLinkedText(string(b.Text[0].(Plain))) 327 case *List: 328 for _, i := range b.Items { 329 for _, c := range i.Content { 330 p := c.(*Paragraph) 331 p.Text = d.parseLinkedText(string(p.Text[0].(Plain))) 332 } 333 } 334 } 335 } 336 337 return d.Doc 338 } 339 340 // A span represents a single span of comment lines (lines[start:end]) 341 // of an identified kind (code, heading, paragraph, and so on). 342 type span struct { 343 start int 344 end int 345 kind spanKind 346 } 347 348 // A spanKind describes the kind of span. 349 type spanKind int 350 351 const ( 352 _ spanKind = iota 353 spanCode 354 spanHeading 355 spanList 356 spanOldHeading 357 spanPara 358 ) 359 360 func parseSpans(lines []string) []span { 361 var spans []span 362 363 // The loop may process a line twice: once as unindented 364 // and again forced indented. So the maximum expected 365 // number of iterations is 2*len(lines). The repeating logic 366 // can be subtle, though, and to protect against introduction 367 // of infinite loops in future changes, we watch to see that 368 // we are not looping too much. A panic is better than a 369 // quiet infinite loop. 370 watchdog := 2 * len(lines) 371 372 i := 0 373 forceIndent := 0 374 Spans: 375 for { 376 // Skip blank lines. 377 for i < len(lines) && lines[i] == "" { 378 i++ 379 } 380 if i >= len(lines) { 381 break 382 } 383 if watchdog--; watchdog < 0 { 384 panic("go/doc/comment: internal error: not making progress") 385 } 386 387 var kind spanKind 388 start := i 389 end := i 390 if i < forceIndent || indented(lines[i]) { 391 // Indented (or force indented). 392 // Ends before next unindented. (Blank lines are OK.) 393 // If this is an unindented list that we are heuristically treating as indented, 394 // then accept unindented list item lines up to the first blank lines. 395 // The heuristic is disabled at blank lines to contain its effect 396 // to non-gofmt'ed sections of the comment. 397 unindentedListOK := isList(lines[i]) && i < forceIndent 398 i++ 399 for i < len(lines) && (lines[i] == "" || i < forceIndent || indented(lines[i]) || (unindentedListOK && isList(lines[i]))) { 400 if lines[i] == "" { 401 unindentedListOK = false 402 } 403 i++ 404 } 405 406 // Drop trailing blank lines. 407 end = i 408 for end > start && lines[end-1] == "" { 409 end-- 410 } 411 412 // If indented lines are followed (without a blank line) 413 // by an unindented line ending in a brace, 414 // take that one line too. This fixes the common mistake 415 // of pasting in something like 416 // 417 // func main() { 418 // fmt.Println("hello, world") 419 // } 420 // 421 // and forgetting to indent it. 422 // The heuristic will never trigger on a gofmt'ed comment, 423 // because any gofmt'ed code block or list would be 424 // followed by a blank line or end of comment. 425 if end < len(lines) && strings.HasPrefix(lines[end], "}") { 426 end++ 427 } 428 429 if isList(lines[start]) { 430 kind = spanList 431 } else { 432 kind = spanCode 433 } 434 } else { 435 // Unindented. Ends at next blank or indented line. 436 i++ 437 for i < len(lines) && lines[i] != "" && !indented(lines[i]) { 438 i++ 439 } 440 end = i 441 442 // If unindented lines are followed (without a blank line) 443 // by an indented line that would start a code block, 444 // check whether the final unindented lines 445 // should be left for the indented section. 446 // This can happen for the common mistakes of 447 // unindented code or unindented lists. 448 // The heuristic will never trigger on a gofmt'ed comment, 449 // because any gofmt'ed code block would have a blank line 450 // preceding it after the unindented lines. 451 if i < len(lines) && lines[i] != "" && !isList(lines[i]) { 452 switch { 453 case isList(lines[i-1]): 454 // If the final unindented line looks like a list item, 455 // this may be the first indented line wrap of 456 // a mistakenly unindented list. 457 // Leave all the unindented list items. 458 forceIndent = end 459 end-- 460 for end > start && isList(lines[end-1]) { 461 end-- 462 } 463 464 case strings.HasSuffix(lines[i-1], "{") || strings.HasSuffix(lines[i-1], `\`): 465 // If the final unindented line ended in { or \ 466 // it is probably the start of a misindented code block. 467 // Give the user a single line fix. 468 // Often that's enough; if not, the user can fix the others themselves. 469 forceIndent = end 470 end-- 471 } 472 473 if start == end && forceIndent > start { 474 i = start 475 continue Spans 476 } 477 } 478 479 // Span is either paragraph or heading. 480 if end-start == 1 && isHeading(lines[start]) { 481 kind = spanHeading 482 } else if end-start == 1 && isOldHeading(lines[start], lines, start) { 483 kind = spanOldHeading 484 } else { 485 kind = spanPara 486 } 487 } 488 489 spans = append(spans, span{start, end, kind}) 490 i = end 491 } 492 493 return spans 494 } 495 496 // indented reports whether line is indented 497 // (starts with a leading space or tab). 498 func indented(line string) bool { 499 return line != "" && (line[0] == ' ' || line[0] == '\t') 500 } 501 502 // unindent removes any common space/tab prefix 503 // from each line in lines, returning a copy of lines in which 504 // those prefixes have been trimmed from each line. 505 // It also replaces any lines containing only spaces with blank lines (empty strings). 506 func unindent(lines []string) []string { 507 // Trim leading and trailing blank lines. 508 for len(lines) > 0 && isBlank(lines[0]) { 509 lines = lines[1:] 510 } 511 for len(lines) > 0 && isBlank(lines[len(lines)-1]) { 512 lines = lines[:len(lines)-1] 513 } 514 if len(lines) == 0 { 515 return nil 516 } 517 518 // Compute and remove common indentation. 519 prefix := leadingSpace(lines[0]) 520 for _, line := range lines[1:] { 521 if !isBlank(line) { 522 prefix = commonPrefix(prefix, leadingSpace(line)) 523 } 524 } 525 526 out := make([]string, len(lines)) 527 for i, line := range lines { 528 line = strings.TrimPrefix(line, prefix) 529 if strings.TrimSpace(line) == "" { 530 line = "" 531 } 532 out[i] = line 533 } 534 for len(out) > 0 && out[0] == "" { 535 out = out[1:] 536 } 537 for len(out) > 0 && out[len(out)-1] == "" { 538 out = out[:len(out)-1] 539 } 540 return out 541 } 542 543 // isBlank reports whether s is a blank line. 544 func isBlank(s string) bool { 545 return len(s) == 0 || (len(s) == 1 && s[0] == '\n') 546 } 547 548 // commonPrefix returns the longest common prefix of a and b. 549 func commonPrefix(a, b string) string { 550 i := 0 551 for i < len(a) && i < len(b) && a[i] == b[i] { 552 i++ 553 } 554 return a[0:i] 555 } 556 557 // leadingSpace returns the longest prefix of s consisting of spaces and tabs. 558 func leadingSpace(s string) string { 559 i := 0 560 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 561 i++ 562 } 563 return s[:i] 564 } 565 566 // isOldHeading reports whether line is an old-style section heading. 567 // line is all[off]. 568 func isOldHeading(line string, all []string, off int) bool { 569 if off <= 0 || all[off-1] != "" || off+2 >= len(all) || all[off+1] != "" || leadingSpace(all[off+2]) != "" { 570 return false 571 } 572 573 line = strings.TrimSpace(line) 574 575 // a heading must start with an uppercase letter 576 r, _ := utf8.DecodeRuneInString(line) 577 if !unicode.IsLetter(r) || !unicode.IsUpper(r) { 578 return false 579 } 580 581 // it must end in a letter or digit: 582 r, _ = utf8.DecodeLastRuneInString(line) 583 if !unicode.IsLetter(r) && !unicode.IsDigit(r) { 584 return false 585 } 586 587 // exclude lines with illegal characters. we allow "()," 588 if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") { 589 return false 590 } 591 592 // allow "'" for possessive "'s" only 593 for b := line; ; { 594 var ok bool 595 if _, b, ok = strings.Cut(b, "'"); !ok { 596 break 597 } 598 if b != "s" && !strings.HasPrefix(b, "s ") { 599 return false // ' not followed by s and then end-of-word 600 } 601 } 602 603 // allow "." when followed by non-space 604 for b := line; ; { 605 var ok bool 606 if _, b, ok = strings.Cut(b, "."); !ok { 607 break 608 } 609 if b == "" || strings.HasPrefix(b, " ") { 610 return false // not followed by non-space 611 } 612 } 613 614 return true 615 } 616 617 // oldHeading returns the *Heading for the given old-style section heading line. 618 func (d *parseDoc) oldHeading(line string) Block { 619 return &Heading{Text: []Text{Plain(strings.TrimSpace(line))}} 620 } 621 622 // isHeading reports whether line is a new-style section heading. 623 func isHeading(line string) bool { 624 return len(line) >= 2 && 625 line[0] == '#' && 626 (line[1] == ' ' || line[1] == '\t') && 627 strings.TrimSpace(line) != "#" 628 } 629 630 // heading returns the *Heading for the given new-style section heading line. 631 func (d *parseDoc) heading(line string) Block { 632 return &Heading{Text: []Text{Plain(strings.TrimSpace(line[1:]))}} 633 } 634 635 // code returns a code block built from the lines. 636 func (d *parseDoc) code(lines []string) *Code { 637 body := unindent(lines) 638 body = append(body, "") // to get final \n from Join 639 return &Code{Text: strings.Join(body, "\n")} 640 } 641 642 // paragraph returns a paragraph block built from the lines. 643 // If the lines are link definitions, paragraph adds them to d and returns nil. 644 func (d *parseDoc) paragraph(lines []string) Block { 645 // Is this a block of known links? Handle. 646 var defs []*LinkDef 647 for _, line := range lines { 648 def, ok := parseLink(line) 649 if !ok { 650 goto NoDefs 651 } 652 defs = append(defs, def) 653 } 654 for _, def := range defs { 655 d.Links = append(d.Links, def) 656 if d.links[def.Text] == nil { 657 d.links[def.Text] = def 658 } 659 } 660 return nil 661 NoDefs: 662 663 return &Paragraph{Text: []Text{Plain(strings.Join(lines, "\n"))}} 664 } 665 666 // parseLink parses a single link definition line: 667 // 668 // [text]: url 669 // 670 // It returns the link definition and whether the line was well formed. 671 func parseLink(line string) (*LinkDef, bool) { 672 if line == "" || line[0] != '[' { 673 return nil, false 674 } 675 i := strings.Index(line, "]:") 676 if i < 0 || i+3 >= len(line) || (line[i+2] != ' ' && line[i+2] != '\t') { 677 return nil, false 678 } 679 680 text := line[1:i] 681 url := strings.TrimSpace(line[i+3:]) 682 j := strings.Index(url, "://") 683 if j < 0 || !isScheme(url[:j]) { 684 return nil, false 685 } 686 687 // Line has right form and has valid scheme://. 688 // That's good enough for us - we are not as picky 689 // about the characters beyond the :// as we are 690 // when extracting inline URLs from text. 691 return &LinkDef{Text: text, URL: url}, true 692 } 693 694 // list returns a list built from the indented lines, 695 // using forceBlankBefore as the value of the List's ForceBlankBefore field. 696 func (d *parseDoc) list(lines []string, forceBlankBefore bool) *List { 697 num, _, _ := listMarker(lines[0]) 698 var ( 699 list *List = &List{ForceBlankBefore: forceBlankBefore} 700 item *ListItem 701 text []string 702 ) 703 flush := func() { 704 if item != nil { 705 if para := d.paragraph(text); para != nil { 706 item.Content = append(item.Content, para) 707 } 708 } 709 text = nil 710 } 711 712 for _, line := range lines { 713 if n, after, ok := listMarker(line); ok && (n != "") == (num != "") { 714 // start new list item 715 flush() 716 717 item = &ListItem{Number: n} 718 list.Items = append(list.Items, item) 719 line = after 720 } 721 line = strings.TrimSpace(line) 722 if line == "" { 723 list.ForceBlankBetween = true 724 flush() 725 continue 726 } 727 text = append(text, strings.TrimSpace(line)) 728 } 729 flush() 730 return list 731 } 732 733 // listMarker parses the line as beginning with a list marker. 734 // If it can do that, it returns the numeric marker ("" for a bullet list), 735 // the rest of the line, and ok == true. 736 // Otherwise, it returns "", "", false. 737 func listMarker(line string) (num, rest string, ok bool) { 738 line = strings.TrimSpace(line) 739 if line == "" { 740 return "", "", false 741 } 742 743 // Can we find a marker? 744 if r, n := utf8.DecodeRuneInString(line); r == '•' || r == '*' || r == '+' || r == '-' { 745 num, rest = "", line[n:] 746 } else if '0' <= line[0] && line[0] <= '9' { 747 n := 1 748 for n < len(line) && '0' <= line[n] && line[n] <= '9' { 749 n++ 750 } 751 if n >= len(line) || (line[n] != '.' && line[n] != ')') { 752 return "", "", false 753 } 754 num, rest = line[:n], line[n+1:] 755 } else { 756 return "", "", false 757 } 758 759 if !indented(rest) || strings.TrimSpace(rest) == "" { 760 return "", "", false 761 } 762 763 return num, rest, true 764 } 765 766 // isList reports whether the line is the first line of a list, 767 // meaning starts with a list marker after any indentation. 768 // (The caller is responsible for checking the line is indented, as appropriate.) 769 func isList(line string) bool { 770 _, _, ok := listMarker(line) 771 return ok 772 } 773 774 // parseLinkedText parses text that is allowed to contain explicit links, 775 // such as [math.Sin] or [Go home page], into a slice of Text items. 776 // 777 // A “pkg” is only assumed to be a full import path if it starts with 778 // a domain name (a path element with a dot) or is one of the packages 779 // from the standard library (“[os]”, “[encoding/json]”, and so on). 780 // To avoid problems with maps, generics, and array types, doc links 781 // must be both preceded and followed by punctuation, spaces, tabs, 782 // or the start or end of a line. An example problem would be treating 783 // map[ast.Expr]TypeAndValue as containing a link. 784 func (d *parseDoc) parseLinkedText(text string) []Text { 785 var out []Text 786 wrote := 0 787 flush := func(i int) { 788 if wrote < i { 789 out = d.parseText(out, text[wrote:i], true) 790 wrote = i 791 } 792 } 793 794 start := -1 795 var buf []byte 796 for i := 0; i < len(text); i++ { 797 c := text[i] 798 if c == '\n' || c == '\t' { 799 c = ' ' 800 } 801 switch c { 802 case '[': 803 start = i 804 case ']': 805 if start >= 0 { 806 if def, ok := d.links[string(buf)]; ok { 807 def.Used = true 808 flush(start) 809 out = append(out, &Link{ 810 Text: d.parseText(nil, text[start+1:i], false), 811 URL: def.URL, 812 }) 813 wrote = i + 1 814 } else if link, ok := d.docLink(text[start+1:i], text[:start], text[i+1:]); ok { 815 flush(start) 816 link.Text = d.parseText(nil, text[start+1:i], false) 817 out = append(out, link) 818 wrote = i + 1 819 } 820 } 821 start = -1 822 buf = buf[:0] 823 } 824 if start >= 0 && i != start { 825 buf = append(buf, c) 826 } 827 } 828 829 flush(len(text)) 830 return out 831 } 832 833 // docLink parses text, which was found inside [ ] brackets, 834 // as a doc link if possible, returning the DocLink and ok == true 835 // or else nil, false. 836 // The before and after strings are the text before the [ and after the ] 837 // on the same line. Doc links must be preceded and followed by 838 // punctuation, spaces, tabs, or the start or end of a line. 839 func (d *parseDoc) docLink(text, before, after string) (link *DocLink, ok bool) { 840 if before != "" { 841 r, _ := utf8.DecodeLastRuneInString(before) 842 if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' { 843 return nil, false 844 } 845 } 846 if after != "" { 847 r, _ := utf8.DecodeRuneInString(after) 848 if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' { 849 return nil, false 850 } 851 } 852 text = strings.TrimPrefix(text, "*") 853 pkg, name, ok := splitDocName(text) 854 var recv string 855 if ok { 856 pkg, recv, _ = splitDocName(pkg) 857 } 858 if pkg != "" { 859 if pkg, ok = d.lookupPkg(pkg); !ok { 860 return nil, false 861 } 862 } else { 863 if ok = d.lookupSym(recv, name); !ok { 864 return nil, false 865 } 866 } 867 link = &DocLink{ 868 ImportPath: pkg, 869 Recv: recv, 870 Name: name, 871 } 872 return link, true 873 } 874 875 // If text is of the form before.Name, where Name is a capitalized Go identifier, 876 // then splitDocName returns before, name, true. 877 // Otherwise it returns text, "", false. 878 func splitDocName(text string) (before, name string, foundDot bool) { 879 i := strings.LastIndex(text, ".") 880 name = text[i+1:] 881 if !isName(name) { 882 return text, "", false 883 } 884 if i >= 0 { 885 before = text[:i] 886 } 887 return before, name, true 888 } 889 890 // parseText parses s as text and returns the result of appending 891 // those parsed Text elements to out. 892 // parseText does not handle explicit links like [math.Sin] or [Go home page]: 893 // those are handled by parseLinkedText. 894 // If autoLink is true, then parseText recognizes URLs and words from d.Words 895 // and converts those to links as appropriate. 896 func (d *parseDoc) parseText(out []Text, s string, autoLink bool) []Text { 897 var w strings.Builder 898 wrote := 0 899 writeUntil := func(i int) { 900 w.WriteString(s[wrote:i]) 901 wrote = i 902 } 903 flush := func(i int) { 904 writeUntil(i) 905 if w.Len() > 0 { 906 out = append(out, Plain(w.String())) 907 w.Reset() 908 } 909 } 910 for i := 0; i < len(s); { 911 t := s[i:] 912 if autoLink { 913 if url, ok := autoURL(t); ok { 914 flush(i) 915 // Note: The old comment parser would look up the URL in words 916 // and replace the target with words[URL] if it was non-empty. 917 // That would allow creating links that display as one URL but 918 // when clicked go to a different URL. Not sure what the point 919 // of that is, so we're not doing that lookup here. 920 out = append(out, &Link{Auto: true, Text: []Text{Plain(url)}, URL: url}) 921 i += len(url) 922 wrote = i 923 continue 924 } 925 if id, ok := ident(t); ok { 926 url, italics := d.Words[id] 927 if !italics { 928 i += len(id) 929 continue 930 } 931 flush(i) 932 if url == "" { 933 out = append(out, Italic(id)) 934 } else { 935 out = append(out, &Link{Auto: true, Text: []Text{Italic(id)}, URL: url}) 936 } 937 i += len(id) 938 wrote = i 939 continue 940 } 941 } 942 switch { 943 case strings.HasPrefix(t, "``"): 944 if len(t) >= 3 && t[2] == '`' { 945 // Do not convert `` inside ```, in case people are mistakenly writing Markdown. 946 i += 3 947 for i < len(t) && t[i] == '`' { 948 i++ 949 } 950 break 951 } 952 writeUntil(i) 953 w.WriteRune('“') 954 i += 2 955 wrote = i 956 case strings.HasPrefix(t, "''"): 957 writeUntil(i) 958 w.WriteRune('”') 959 i += 2 960 wrote = i 961 default: 962 i++ 963 } 964 } 965 flush(len(s)) 966 return out 967 } 968 969 // autoURL checks whether s begins with a URL that should be hyperlinked. 970 // If so, it returns the URL, which is a prefix of s, and ok == true. 971 // Otherwise it returns "", false. 972 // The caller should skip over the first len(url) bytes of s 973 // before further processing. 974 func autoURL(s string) (url string, ok bool) { 975 // Find the ://. Fast path to pick off non-URL, 976 // since we call this at every position in the string. 977 // The shortest possible URL is ftp://x, 7 bytes. 978 var i int 979 switch { 980 case len(s) < 7: 981 return "", false 982 case s[3] == ':': 983 i = 3 984 case s[4] == ':': 985 i = 4 986 case s[5] == ':': 987 i = 5 988 case s[6] == ':': 989 i = 6 990 default: 991 return "", false 992 } 993 if i+3 > len(s) || s[i:i+3] != "://" { 994 return "", false 995 } 996 997 // Check valid scheme. 998 if !isScheme(s[:i]) { 999 return "", false 1000 } 1001 1002 // Scan host part. Must have at least one byte, 1003 // and must start and end in non-punctuation. 1004 i += 3 1005 if i >= len(s) || !isHost(s[i]) || isPunct(s[i]) { 1006 return "", false 1007 } 1008 i++ 1009 end := i 1010 for i < len(s) && isHost(s[i]) { 1011 if !isPunct(s[i]) { 1012 end = i + 1 1013 } 1014 i++ 1015 } 1016 i = end 1017 1018 // At this point we are definitely returning a URL (scheme://host). 1019 // We just have to find the longest path we can add to it. 1020 // Heuristics abound. 1021 // We allow parens, braces, and brackets, 1022 // but only if they match (#5043, #22285). 1023 // We allow .,:;?! in the path but not at the end, 1024 // to avoid end-of-sentence punctuation (#18139, #16565). 1025 stk := []byte{} 1026 end = i 1027 Path: 1028 for ; i < len(s); i++ { 1029 if isPunct(s[i]) { 1030 continue 1031 } 1032 if !isPath(s[i]) { 1033 break 1034 } 1035 switch s[i] { 1036 case '(': 1037 stk = append(stk, ')') 1038 case '{': 1039 stk = append(stk, '}') 1040 case '[': 1041 stk = append(stk, ']') 1042 case ')', '}', ']': 1043 if len(stk) == 0 || stk[len(stk)-1] != s[i] { 1044 break Path 1045 } 1046 stk = stk[:len(stk)-1] 1047 } 1048 if len(stk) == 0 { 1049 end = i + 1 1050 } 1051 } 1052 1053 return s[:end], true 1054 } 1055 1056 // isScheme reports whether s is a recognized URL scheme. 1057 // Note that if strings of new length (beyond 3-7) 1058 // are added here, the fast path at the top of autoURL will need updating. 1059 func isScheme(s string) bool { 1060 switch s { 1061 case "file", 1062 "ftp", 1063 "gopher", 1064 "http", 1065 "https", 1066 "mailto", 1067 "nntp": 1068 return true 1069 } 1070 return false 1071 } 1072 1073 // isHost reports whether c is a byte that can appear in a URL host, 1074 // like www.example.com or user@[::1]:8080 1075 func isHost(c byte) bool { 1076 // mask is a 128-bit bitmap with 1s for allowed bytes, 1077 // so that the byte c can be tested with a shift and an and. 1078 // If c > 128, then 1<<c and 1<<(c-64) will both be zero, 1079 // and this function will return false. 1080 const mask = 0 | 1081 (1<<26-1)<<'A' | 1082 (1<<26-1)<<'a' | 1083 (1<<10-1)<<'0' | 1084 1<<'_' | 1085 1<<'@' | 1086 1<<'-' | 1087 1<<'.' | 1088 1<<'[' | 1089 1<<']' | 1090 1<<':' 1091 1092 return ((uint64(1)<<c)&(mask&(1<<64-1)) | 1093 (uint64(1)<<(c-64))&(mask>>64)) != 0 1094 } 1095 1096 // isPunct reports whether c is a punctuation byte that can appear 1097 // inside a path but not at the end. 1098 func isPunct(c byte) bool { 1099 // mask is a 128-bit bitmap with 1s for allowed bytes, 1100 // so that the byte c can be tested with a shift and an and. 1101 // If c > 128, then 1<<c and 1<<(c-64) will both be zero, 1102 // and this function will return false. 1103 const mask = 0 | 1104 1<<'.' | 1105 1<<',' | 1106 1<<':' | 1107 1<<';' | 1108 1<<'?' | 1109 1<<'!' 1110 1111 return ((uint64(1)<<c)&(mask&(1<<64-1)) | 1112 (uint64(1)<<(c-64))&(mask>>64)) != 0 1113 } 1114 1115 // isPath reports whether c is a (non-punctuation) path byte. 1116 func isPath(c byte) bool { 1117 // mask is a 128-bit bitmap with 1s for allowed bytes, 1118 // so that the byte c can be tested with a shift and an and. 1119 // If c > 128, then 1<<c and 1<<(c-64) will both be zero, 1120 // and this function will return false. 1121 const mask = 0 | 1122 (1<<26-1)<<'A' | 1123 (1<<26-1)<<'a' | 1124 (1<<10-1)<<'0' | 1125 1<<'$' | 1126 1<<'\'' | 1127 1<<'(' | 1128 1<<')' | 1129 1<<'*' | 1130 1<<'+' | 1131 1<<'&' | 1132 1<<'#' | 1133 1<<'=' | 1134 1<<'@' | 1135 1<<'~' | 1136 1<<'_' | 1137 1<<'/' | 1138 1<<'-' | 1139 1<<'[' | 1140 1<<']' | 1141 1<<'{' | 1142 1<<'}' | 1143 1<<'%' 1144 1145 return ((uint64(1)<<c)&(mask&(1<<64-1)) | 1146 (uint64(1)<<(c-64))&(mask>>64)) != 0 1147 } 1148 1149 // isName reports whether s is a capitalized Go identifier (like Name). 1150 func isName(s string) bool { 1151 t, ok := ident(s) 1152 if !ok || t != s { 1153 return false 1154 } 1155 r, _ := utf8.DecodeRuneInString(s) 1156 return unicode.IsUpper(r) 1157 } 1158 1159 // ident checks whether s begins with a Go identifier. 1160 // If so, it returns the identifier, which is a prefix of s, and ok == true. 1161 // Otherwise it returns "", false. 1162 // The caller should skip over the first len(id) bytes of s 1163 // before further processing. 1164 func ident(s string) (id string, ok bool) { 1165 // Scan [\pL_][\pL_0-9]* 1166 n := 0 1167 for n < len(s) { 1168 if c := s[n]; c < utf8.RuneSelf { 1169 if isIdentASCII(c) && (n > 0 || c < '0' || c > '9') { 1170 n++ 1171 continue 1172 } 1173 break 1174 } 1175 r, nr := utf8.DecodeRuneInString(s[n:]) 1176 if unicode.IsLetter(r) { 1177 n += nr 1178 continue 1179 } 1180 break 1181 } 1182 return s[:n], n > 0 1183 } 1184 1185 // isIdentASCII reports whether c is an ASCII identifier byte. 1186 func isIdentASCII(c byte) bool { 1187 // mask is a 128-bit bitmap with 1s for allowed bytes, 1188 // so that the byte c can be tested with a shift and an and. 1189 // If c > 128, then 1<<c and 1<<(c-64) will both be zero, 1190 // and this function will return false. 1191 const mask = 0 | 1192 (1<<26-1)<<'A' | 1193 (1<<26-1)<<'a' | 1194 (1<<10-1)<<'0' | 1195 1<<'_' 1196 1197 return ((uint64(1)<<c)&(mask&(1<<64-1)) | 1198 (uint64(1)<<(c-64))&(mask>>64)) != 0 1199 } 1200 1201 // validImportPath reports whether path is a valid import path. 1202 // It is a lightly edited copy of golang.org/x/mod/module.CheckImportPath. 1203 func validImportPath(path string) bool { 1204 if !utf8.ValidString(path) { 1205 return false 1206 } 1207 if path == "" { 1208 return false 1209 } 1210 if path[0] == '-' { 1211 return false 1212 } 1213 if strings.Contains(path, "//") { 1214 return false 1215 } 1216 if path[len(path)-1] == '/' { 1217 return false 1218 } 1219 elemStart := 0 1220 for i, r := range path { 1221 if r == '/' { 1222 if !validImportPathElem(path[elemStart:i]) { 1223 return false 1224 } 1225 elemStart = i + 1 1226 } 1227 } 1228 return validImportPathElem(path[elemStart:]) 1229 } 1230 1231 func validImportPathElem(elem string) bool { 1232 if elem == "" || elem[0] == '.' || elem[len(elem)-1] == '.' { 1233 return false 1234 } 1235 for i := 0; i < len(elem); i++ { 1236 if !importPathOK(elem[i]) { 1237 return false 1238 } 1239 } 1240 return true 1241 } 1242 1243 func importPathOK(c byte) bool { 1244 // mask is a 128-bit bitmap with 1s for allowed bytes, 1245 // so that the byte c can be tested with a shift and an and. 1246 // If c > 128, then 1<<c and 1<<(c-64) will both be zero, 1247 // and this function will return false. 1248 const mask = 0 | 1249 (1<<26-1)<<'A' | 1250 (1<<26-1)<<'a' | 1251 (1<<10-1)<<'0' | 1252 1<<'-' | 1253 1<<'.' | 1254 1<<'~' | 1255 1<<'_' | 1256 1<<'+' 1257 1258 return ((uint64(1)<<c)&(mask&(1<<64-1)) | 1259 (uint64(1)<<(c-64))&(mask>>64)) != 0 1260 }