golang.org/x/net@v0.25.1-0.20240516223405-c87a5b62e243/publicsuffix/gen.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ignore 6 7 package main 8 9 // This program generates table.go and table_test.go based on the authoritative 10 // public suffix list at https://publicsuffix.org/list/effective_tld_names.dat 11 // 12 // The version is derived from 13 // https://api.github.com/repos/publicsuffix/list/commits?path=public_suffix_list.dat 14 // and a human-readable form is at 15 // https://github.com/publicsuffix/list/commits/master/public_suffix_list.dat 16 // 17 // To fetch a particular git revision, such as 5c70ccd250, pass 18 // -url "https://raw.githubusercontent.com/publicsuffix/list/5c70ccd250/public_suffix_list.dat" 19 // and -version "an explicit version string". 20 21 import ( 22 "bufio" 23 "bytes" 24 "encoding/binary" 25 "flag" 26 "fmt" 27 "go/format" 28 "io" 29 "io/ioutil" 30 "net/http" 31 "os" 32 "regexp" 33 "sort" 34 "strings" 35 36 "golang.org/x/net/idna" 37 ) 38 39 const ( 40 // This must be a multiple of 8 and no greater than 64. 41 // Update nodeValue in list.go if this changes. 42 nodesBits = 40 43 44 // These sum of these four values must be no greater than nodesBits. 45 nodesBitsChildren = 10 46 nodesBitsICANN = 1 47 nodesBitsTextOffset = 16 48 nodesBitsTextLength = 6 49 50 // These sum of these four values must be no greater than 32. 51 childrenBitsWildcard = 1 52 childrenBitsNodeType = 2 53 childrenBitsHi = 14 54 childrenBitsLo = 14 55 ) 56 57 var ( 58 combinedText string 59 maxChildren int 60 maxTextOffset int 61 maxTextLength int 62 maxHi uint32 63 maxLo uint32 64 ) 65 66 func max(a, b int) int { 67 if a < b { 68 return b 69 } 70 return a 71 } 72 73 func u32max(a, b uint32) uint32 { 74 if a < b { 75 return b 76 } 77 return a 78 } 79 80 const ( 81 nodeTypeNormal = 0 82 nodeTypeException = 1 83 nodeTypeParentOnly = 2 84 numNodeType = 3 85 ) 86 87 func nodeTypeStr(n int) string { 88 switch n { 89 case nodeTypeNormal: 90 return "+" 91 case nodeTypeException: 92 return "!" 93 case nodeTypeParentOnly: 94 return "o" 95 } 96 panic("unreachable") 97 } 98 99 const ( 100 defaultURL = "https://publicsuffix.org/list/effective_tld_names.dat" 101 gitCommitURL = "https://api.github.com/repos/publicsuffix/list/commits?path=public_suffix_list.dat" 102 ) 103 104 var ( 105 labelEncoding = map[string]uint64{} 106 labelsList = []string{} 107 labelsMap = map[string]bool{} 108 rules = []string{} 109 numICANNRules = 0 110 111 // validSuffixRE is used to check that the entries in the public suffix 112 // list are in canonical form (after Punycode encoding). Specifically, 113 // capital letters are not allowed. 114 validSuffixRE = regexp.MustCompile(`^[a-z0-9_\!\*\-\.]+$`) 115 116 shaRE = regexp.MustCompile(`"sha":"([^"]+)"`) 117 dateRE = regexp.MustCompile(`"committer":{[^{]+"date":"([^"]+)"`) 118 119 subset = flag.Bool("subset", false, "generate only a subset of the full table, for debugging") 120 url = flag.String("url", defaultURL, "URL of the publicsuffix.org list. If empty, stdin is read instead") 121 v = flag.Bool("v", false, "verbose output (to stderr)") 122 version = flag.String("version", "", "the effective_tld_names.dat version") 123 ) 124 125 func main() { 126 if err := main1(); err != nil { 127 fmt.Fprintln(os.Stderr, err) 128 os.Exit(1) 129 } 130 } 131 132 func main1() error { 133 flag.Parse() 134 if nodesBits > 64 { 135 return fmt.Errorf("nodesBits is too large") 136 } 137 if nodesBits%8 != 0 { 138 return fmt.Errorf("nodesBits must be a multiple of 8") 139 } 140 if nodesBitsTextLength+nodesBitsTextOffset+nodesBitsICANN+nodesBitsChildren > nodesBits { 141 return fmt.Errorf("not enough bits to encode the nodes table") 142 } 143 if childrenBitsLo+childrenBitsHi+childrenBitsNodeType+childrenBitsWildcard > 32 { 144 return fmt.Errorf("not enough bits to encode the children table") 145 } 146 if *version == "" { 147 if *url != defaultURL { 148 return fmt.Errorf("-version was not specified, and the -url is not the default one") 149 } 150 sha, date, err := gitCommit() 151 if err != nil { 152 return err 153 } 154 *version = fmt.Sprintf("publicsuffix.org's public_suffix_list.dat, git revision %s (%s)", sha, date) 155 } 156 var r io.Reader = os.Stdin 157 if *url != "" { 158 res, err := http.Get(*url) 159 if err != nil { 160 return err 161 } 162 if res.StatusCode != http.StatusOK { 163 return fmt.Errorf("bad GET status for %s: %s", *url, res.Status) 164 } 165 r = res.Body 166 defer res.Body.Close() 167 } 168 169 var root node 170 icann := false 171 br := bufio.NewReader(r) 172 for { 173 s, err := br.ReadString('\n') 174 if err != nil { 175 if err == io.EOF { 176 break 177 } 178 return err 179 } 180 s = strings.TrimSpace(s) 181 if strings.Contains(s, "BEGIN ICANN DOMAINS") { 182 if len(rules) != 0 { 183 return fmt.Errorf(`expected no rules before "BEGIN ICANN DOMAINS"`) 184 } 185 icann = true 186 continue 187 } 188 if strings.Contains(s, "END ICANN DOMAINS") { 189 icann, numICANNRules = false, len(rules) 190 continue 191 } 192 if s == "" || strings.HasPrefix(s, "//") { 193 continue 194 } 195 s, err = idna.ToASCII(s) 196 if err != nil { 197 return err 198 } 199 if !validSuffixRE.MatchString(s) { 200 return fmt.Errorf("bad publicsuffix.org list data: %q", s) 201 } 202 203 if *subset { 204 switch { 205 case s == "ac.jp" || strings.HasSuffix(s, ".ac.jp"): 206 case s == "ak.us" || strings.HasSuffix(s, ".ak.us"): 207 case s == "ao" || strings.HasSuffix(s, ".ao"): 208 case s == "ar" || strings.HasSuffix(s, ".ar"): 209 case s == "arpa" || strings.HasSuffix(s, ".arpa"): 210 case s == "cy" || strings.HasSuffix(s, ".cy"): 211 case s == "dyndns.org" || strings.HasSuffix(s, ".dyndns.org"): 212 case s == "jp": 213 case s == "kobe.jp" || strings.HasSuffix(s, ".kobe.jp"): 214 case s == "kyoto.jp" || strings.HasSuffix(s, ".kyoto.jp"): 215 case s == "om" || strings.HasSuffix(s, ".om"): 216 case s == "uk" || strings.HasSuffix(s, ".uk"): 217 case s == "uk.com" || strings.HasSuffix(s, ".uk.com"): 218 case s == "tw" || strings.HasSuffix(s, ".tw"): 219 case s == "zw" || strings.HasSuffix(s, ".zw"): 220 case s == "xn--p1ai" || strings.HasSuffix(s, ".xn--p1ai"): 221 // xn--p1ai is Russian-Cyrillic "рф". 222 default: 223 continue 224 } 225 } 226 227 rules = append(rules, s) 228 229 nt, wildcard := nodeTypeNormal, false 230 switch { 231 case strings.HasPrefix(s, "*."): 232 s, nt = s[2:], nodeTypeParentOnly 233 wildcard = true 234 case strings.HasPrefix(s, "!"): 235 s, nt = s[1:], nodeTypeException 236 } 237 labels := strings.Split(s, ".") 238 for n, i := &root, len(labels)-1; i >= 0; i-- { 239 label := labels[i] 240 n = n.child(label) 241 if i == 0 { 242 if nt != nodeTypeParentOnly && n.nodeType == nodeTypeParentOnly { 243 n.nodeType = nt 244 } 245 n.icann = n.icann && icann 246 n.wildcard = n.wildcard || wildcard 247 } 248 labelsMap[label] = true 249 } 250 } 251 labelsList = make([]string, 0, len(labelsMap)) 252 for label := range labelsMap { 253 labelsList = append(labelsList, label) 254 } 255 sort.Strings(labelsList) 256 257 combinedText = combineText(labelsList) 258 if combinedText == "" { 259 return fmt.Errorf("internal error: combineText returned no text") 260 } 261 for _, label := range labelsList { 262 offset, length := strings.Index(combinedText, label), len(label) 263 if offset < 0 { 264 return fmt.Errorf("internal error: could not find %q in text %q", label, combinedText) 265 } 266 maxTextOffset, maxTextLength = max(maxTextOffset, offset), max(maxTextLength, length) 267 if offset >= 1<<nodesBitsTextOffset { 268 return fmt.Errorf("text offset %d is too large, or nodeBitsTextOffset is too small", offset) 269 } 270 if length >= 1<<nodesBitsTextLength { 271 return fmt.Errorf("text length %d is too large, or nodeBitsTextLength is too small", length) 272 } 273 labelEncoding[label] = uint64(offset)<<nodesBitsTextLength | uint64(length) 274 } 275 276 if err := root.walk(assignIndexes); err != nil { 277 return err 278 } 279 280 if err := generate(printMetadata, &root, "table.go"); err != nil { 281 return err 282 } 283 if err := generateBinaryData(&root, combinedText); err != nil { 284 return err 285 } 286 if err := generate(printTest, &root, "table_test.go"); err != nil { 287 return err 288 } 289 return nil 290 } 291 292 func generate(p func(io.Writer, *node) error, root *node, filename string) error { 293 buf := new(bytes.Buffer) 294 if err := p(buf, root); err != nil { 295 return err 296 } 297 b, err := format.Source(buf.Bytes()) 298 if err != nil { 299 return err 300 } 301 return ioutil.WriteFile(filename, b, 0644) 302 } 303 304 func gitCommit() (sha, date string, retErr error) { 305 res, err := http.Get(gitCommitURL) 306 if err != nil { 307 return "", "", err 308 } 309 if res.StatusCode != http.StatusOK { 310 return "", "", fmt.Errorf("bad GET status for %s: %s", gitCommitURL, res.Status) 311 } 312 defer res.Body.Close() 313 b, err := ioutil.ReadAll(res.Body) 314 if err != nil { 315 return "", "", err 316 } 317 if m := shaRE.FindSubmatch(b); m != nil { 318 sha = string(m[1]) 319 } 320 if m := dateRE.FindSubmatch(b); m != nil { 321 date = string(m[1]) 322 } 323 if sha == "" || date == "" { 324 retErr = fmt.Errorf("could not find commit SHA and date in %s", gitCommitURL) 325 } 326 return sha, date, retErr 327 } 328 329 func printTest(w io.Writer, n *node) error { 330 fmt.Fprintf(w, "// generated by go run gen.go; DO NOT EDIT\n\n") 331 fmt.Fprintf(w, "package publicsuffix\n\nconst numICANNRules = %d\n\nvar rules = [...]string{\n", numICANNRules) 332 for _, rule := range rules { 333 fmt.Fprintf(w, "%q,\n", rule) 334 } 335 fmt.Fprintf(w, "}\n\nvar nodeLabels = [...]string{\n") 336 if err := n.walk(func(n *node) error { 337 return printNodeLabel(w, n) 338 }); err != nil { 339 return err 340 } 341 fmt.Fprintf(w, "}\n") 342 return nil 343 } 344 345 func generateBinaryData(root *node, combinedText string) error { 346 if err := os.WriteFile("data/text", []byte(combinedText), 0666); err != nil { 347 return err 348 } 349 350 var nodes []byte 351 if err := root.walk(func(n *node) error { 352 for _, c := range n.children { 353 nodes = appendNodeEncoding(nodes, c) 354 } 355 return nil 356 }); err != nil { 357 return err 358 } 359 if err := os.WriteFile("data/nodes", nodes, 0666); err != nil { 360 return err 361 } 362 363 var children []byte 364 for _, c := range childrenEncoding { 365 children = binary.BigEndian.AppendUint32(children, c) 366 } 367 if err := os.WriteFile("data/children", children, 0666); err != nil { 368 return err 369 } 370 371 return nil 372 } 373 374 func appendNodeEncoding(b []byte, n *node) []byte { 375 encoding := labelEncoding[n.label] 376 if n.icann { 377 encoding |= 1 << (nodesBitsTextLength + nodesBitsTextOffset) 378 } 379 encoding |= uint64(n.childrenIndex) << (nodesBitsTextLength + nodesBitsTextOffset + nodesBitsICANN) 380 for i := nodesBits - 8; i >= 0; i -= 8 { 381 b = append(b, byte((encoding>>i)&0xff)) 382 } 383 return b 384 } 385 386 func printMetadata(w io.Writer, n *node) error { 387 const header = `// generated by go run gen.go; DO NOT EDIT 388 389 package publicsuffix 390 391 import _ "embed" 392 393 const version = %q 394 395 const ( 396 nodesBits = %d 397 nodesBitsChildren = %d 398 nodesBitsICANN = %d 399 nodesBitsTextOffset = %d 400 nodesBitsTextLength = %d 401 402 childrenBitsWildcard = %d 403 childrenBitsNodeType = %d 404 childrenBitsHi = %d 405 childrenBitsLo = %d 406 ) 407 408 const ( 409 nodeTypeNormal = %d 410 nodeTypeException = %d 411 nodeTypeParentOnly = %d 412 ) 413 414 // numTLD is the number of top level domains. 415 const numTLD = %d 416 417 // text is the combined text of all labels. 418 // 419 //go:embed data/text 420 var text string 421 422 ` 423 fmt.Fprintf(w, header, *version, 424 nodesBits, 425 nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength, 426 childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo, 427 nodeTypeNormal, nodeTypeException, nodeTypeParentOnly, len(n.children)) 428 fmt.Fprintf(w, ` 429 // nodes is the list of nodes. Each node is represented as a %v-bit integer, 430 // which encodes the node's children, wildcard bit and node type (as an index 431 // into the children array), ICANN bit and text. 432 // 433 // The layout within the node, from MSB to LSB, is: 434 // [%2d bits] unused 435 // [%2d bits] children index 436 // [%2d bits] ICANN bit 437 // [%2d bits] text index 438 // [%2d bits] text length 439 // 440 //go:embed data/nodes 441 var nodes uint40String 442 `, 443 nodesBits, 444 nodesBits-nodesBitsChildren-nodesBitsICANN-nodesBitsTextOffset-nodesBitsTextLength, 445 nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength) 446 fmt.Fprintf(w, ` 447 // children is the list of nodes' children, the parent's wildcard bit and the 448 // parent's node type. If a node has no children then their children index 449 // will be in the range [0, 6), depending on the wildcard bit and node type. 450 // 451 // The layout within the uint32, from MSB to LSB, is: 452 // [%2d bits] unused 453 // [%2d bits] wildcard bit 454 // [%2d bits] node type 455 // [%2d bits] high nodes index (exclusive) of children 456 // [%2d bits] low nodes index (inclusive) of children 457 // 458 //go:embed data/children 459 var children uint32String 460 `, 461 32-childrenBitsWildcard-childrenBitsNodeType-childrenBitsHi-childrenBitsLo, 462 childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo) 463 464 fmt.Fprintf(w, "// max children %d (capacity %d)\n", maxChildren, 1<<nodesBitsChildren-1) 465 fmt.Fprintf(w, "// max text offset %d (capacity %d)\n", maxTextOffset, 1<<nodesBitsTextOffset-1) 466 fmt.Fprintf(w, "// max text length %d (capacity %d)\n", maxTextLength, 1<<nodesBitsTextLength-1) 467 fmt.Fprintf(w, "// max hi %d (capacity %d)\n", maxHi, 1<<childrenBitsHi-1) 468 fmt.Fprintf(w, "// max lo %d (capacity %d)\n", maxLo, 1<<childrenBitsLo-1) 469 return nil 470 } 471 472 type node struct { 473 label string 474 nodeType int 475 icann bool 476 wildcard bool 477 // nodesIndex and childrenIndex are the index of this node in the nodes 478 // and the index of its children offset/length in the children arrays. 479 nodesIndex, childrenIndex int 480 // firstChild is the index of this node's first child, or zero if this 481 // node has no children. 482 firstChild int 483 // children are the node's children, in strictly increasing node label order. 484 children []*node 485 } 486 487 func (n *node) walk(f func(*node) error) error { 488 if err := f(n); err != nil { 489 return err 490 } 491 for _, c := range n.children { 492 if err := c.walk(f); err != nil { 493 return err 494 } 495 } 496 return nil 497 } 498 499 // child returns the child of n with the given label. The child is created if 500 // it did not exist beforehand. 501 func (n *node) child(label string) *node { 502 for _, c := range n.children { 503 if c.label == label { 504 return c 505 } 506 } 507 c := &node{ 508 label: label, 509 nodeType: nodeTypeParentOnly, 510 icann: true, 511 } 512 n.children = append(n.children, c) 513 sort.Sort(byLabel(n.children)) 514 return c 515 } 516 517 type byLabel []*node 518 519 func (b byLabel) Len() int { return len(b) } 520 func (b byLabel) Swap(i, j int) { b[i], b[j] = b[j], b[i] } 521 func (b byLabel) Less(i, j int) bool { return b[i].label < b[j].label } 522 523 var nextNodesIndex int 524 525 // childrenEncoding are the encoded entries in the generated children array. 526 // All these pre-defined entries have no children. 527 var childrenEncoding = []uint32{ 528 0 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeNormal. 529 1 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeException. 530 2 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeParentOnly. 531 4 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeNormal. 532 5 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeException. 533 6 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeParentOnly. 534 } 535 536 var firstCallToAssignIndexes = true 537 538 func assignIndexes(n *node) error { 539 if len(n.children) != 0 { 540 // Assign nodesIndex. 541 n.firstChild = nextNodesIndex 542 for _, c := range n.children { 543 c.nodesIndex = nextNodesIndex 544 nextNodesIndex++ 545 } 546 547 // The root node's children is implicit. 548 if firstCallToAssignIndexes { 549 firstCallToAssignIndexes = false 550 return nil 551 } 552 553 // Assign childrenIndex. 554 maxChildren = max(maxChildren, len(childrenEncoding)) 555 if len(childrenEncoding) >= 1<<nodesBitsChildren { 556 return fmt.Errorf("children table size %d is too large, or nodeBitsChildren is too small", len(childrenEncoding)) 557 } 558 n.childrenIndex = len(childrenEncoding) 559 lo := uint32(n.firstChild) 560 hi := lo + uint32(len(n.children)) 561 maxLo, maxHi = u32max(maxLo, lo), u32max(maxHi, hi) 562 if lo >= 1<<childrenBitsLo { 563 return fmt.Errorf("children lo %d is too large, or childrenBitsLo is too small", lo) 564 } 565 if hi >= 1<<childrenBitsHi { 566 return fmt.Errorf("children hi %d is too large, or childrenBitsHi is too small", hi) 567 } 568 enc := hi<<childrenBitsLo | lo 569 enc |= uint32(n.nodeType) << (childrenBitsLo + childrenBitsHi) 570 if n.wildcard { 571 enc |= 1 << (childrenBitsLo + childrenBitsHi + childrenBitsNodeType) 572 } 573 childrenEncoding = append(childrenEncoding, enc) 574 } else { 575 n.childrenIndex = n.nodeType 576 if n.wildcard { 577 n.childrenIndex += numNodeType 578 } 579 } 580 return nil 581 } 582 583 func printNodeLabel(w io.Writer, n *node) error { 584 for _, c := range n.children { 585 fmt.Fprintf(w, "%q,\n", c.label) 586 } 587 return nil 588 } 589 590 func icannStr(icann bool) string { 591 if icann { 592 return "I" 593 } 594 return " " 595 } 596 597 func wildcardStr(wildcard bool) string { 598 if wildcard { 599 return "*" 600 } 601 return " " 602 } 603 604 // combineText combines all the strings in labelsList to form one giant string. 605 // Overlapping strings will be merged: "arpa" and "parliament" could yield 606 // "arparliament". 607 func combineText(labelsList []string) string { 608 beforeLength := 0 609 for _, s := range labelsList { 610 beforeLength += len(s) 611 } 612 613 text := crush(removeSubstrings(labelsList)) 614 if *v { 615 fmt.Fprintf(os.Stderr, "crushed %d bytes to become %d bytes\n", beforeLength, len(text)) 616 } 617 return text 618 } 619 620 type byLength []string 621 622 func (s byLength) Len() int { return len(s) } 623 func (s byLength) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 624 func (s byLength) Less(i, j int) bool { return len(s[i]) < len(s[j]) } 625 626 // removeSubstrings returns a copy of its input with any strings removed 627 // that are substrings of other provided strings. 628 func removeSubstrings(input []string) []string { 629 // Make a copy of input. 630 ss := append(make([]string, 0, len(input)), input...) 631 sort.Sort(byLength(ss)) 632 633 for i, shortString := range ss { 634 // For each string, only consider strings higher than it in sort order, i.e. 635 // of equal length or greater. 636 for _, longString := range ss[i+1:] { 637 if strings.Contains(longString, shortString) { 638 ss[i] = "" 639 break 640 } 641 } 642 } 643 644 // Remove the empty strings. 645 sort.Strings(ss) 646 for len(ss) > 0 && ss[0] == "" { 647 ss = ss[1:] 648 } 649 return ss 650 } 651 652 // crush combines a list of strings, taking advantage of overlaps. It returns a 653 // single string that contains each input string as a substring. 654 func crush(ss []string) string { 655 maxLabelLen := 0 656 for _, s := range ss { 657 if maxLabelLen < len(s) { 658 maxLabelLen = len(s) 659 } 660 } 661 662 for prefixLen := maxLabelLen; prefixLen > 0; prefixLen-- { 663 prefixes := makePrefixMap(ss, prefixLen) 664 for i, s := range ss { 665 if len(s) <= prefixLen { 666 continue 667 } 668 mergeLabel(ss, i, prefixLen, prefixes) 669 } 670 } 671 672 return strings.Join(ss, "") 673 } 674 675 // mergeLabel merges the label at ss[i] with the first available matching label 676 // in prefixMap, where the last "prefixLen" characters in ss[i] match the first 677 // "prefixLen" characters in the matching label. 678 // It will merge ss[i] repeatedly until no more matches are available. 679 // All matching labels merged into ss[i] are replaced by "". 680 func mergeLabel(ss []string, i, prefixLen int, prefixes prefixMap) { 681 s := ss[i] 682 suffix := s[len(s)-prefixLen:] 683 for _, j := range prefixes[suffix] { 684 // Empty strings mean "already used." Also avoid merging with self. 685 if ss[j] == "" || i == j { 686 continue 687 } 688 if *v { 689 fmt.Fprintf(os.Stderr, "%d-length overlap at (%4d,%4d): %q and %q share %q\n", 690 prefixLen, i, j, ss[i], ss[j], suffix) 691 } 692 ss[i] += ss[j][prefixLen:] 693 ss[j] = "" 694 // ss[i] has a new suffix, so merge again if possible. 695 // Note: we only have to merge again at the same prefix length. Shorter 696 // prefix lengths will be handled in the next iteration of crush's for loop. 697 // Can there be matches for longer prefix lengths, introduced by the merge? 698 // I believe that any such matches would by necessity have been eliminated 699 // during substring removal or merged at a higher prefix length. For 700 // instance, in crush("abc", "cde", "bcdef"), combining "abc" and "cde" 701 // would yield "abcde", which could be merged with "bcdef." However, in 702 // practice "cde" would already have been elimintated by removeSubstrings. 703 mergeLabel(ss, i, prefixLen, prefixes) 704 return 705 } 706 } 707 708 // prefixMap maps from a prefix to a list of strings containing that prefix. The 709 // list of strings is represented as indexes into a slice of strings stored 710 // elsewhere. 711 type prefixMap map[string][]int 712 713 // makePrefixMap constructs a prefixMap from a slice of strings. 714 func makePrefixMap(ss []string, prefixLen int) prefixMap { 715 prefixes := make(prefixMap) 716 for i, s := range ss { 717 // We use < rather than <= because if a label matches on a prefix equal to 718 // its full length, that's actually a substring match handled by 719 // removeSubstrings. 720 if prefixLen < len(s) { 721 prefix := s[:prefixLen] 722 prefixes[prefix] = append(prefixes[prefix], i) 723 } 724 } 725 726 return prefixes 727 }