golang.org/x/net@v0.25.1-0.20240516223405-c87a5b62e243/html/atom/gen.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ignore 6 7 //go:generate go run gen.go 8 //go:generate go run gen.go -test 9 10 package main 11 12 import ( 13 "bytes" 14 "flag" 15 "fmt" 16 "go/format" 17 "io/ioutil" 18 "math/rand" 19 "os" 20 "sort" 21 "strings" 22 ) 23 24 // identifier converts s to a Go exported identifier. 25 // It converts "div" to "Div" and "accept-charset" to "AcceptCharset". 26 func identifier(s string) string { 27 b := make([]byte, 0, len(s)) 28 cap := true 29 for _, c := range s { 30 if c == '-' { 31 cap = true 32 continue 33 } 34 if cap && 'a' <= c && c <= 'z' { 35 c -= 'a' - 'A' 36 } 37 cap = false 38 b = append(b, byte(c)) 39 } 40 return string(b) 41 } 42 43 var test = flag.Bool("test", false, "generate table_test.go") 44 45 func genFile(name string, buf *bytes.Buffer) { 46 b, err := format.Source(buf.Bytes()) 47 if err != nil { 48 fmt.Fprintln(os.Stderr, err) 49 os.Exit(1) 50 } 51 if err := ioutil.WriteFile(name, b, 0644); err != nil { 52 fmt.Fprintln(os.Stderr, err) 53 os.Exit(1) 54 } 55 } 56 57 func main() { 58 flag.Parse() 59 60 var all []string 61 all = append(all, elements...) 62 all = append(all, attributes...) 63 all = append(all, eventHandlers...) 64 all = append(all, extra...) 65 sort.Strings(all) 66 67 // uniq - lists have dups 68 w := 0 69 for _, s := range all { 70 if w == 0 || all[w-1] != s { 71 all[w] = s 72 w++ 73 } 74 } 75 all = all[:w] 76 77 if *test { 78 var buf bytes.Buffer 79 fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n") 80 fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n") 81 fmt.Fprintln(&buf, "package atom\n") 82 fmt.Fprintln(&buf, "var testAtomList = []string{") 83 for _, s := range all { 84 fmt.Fprintf(&buf, "\t%q,\n", s) 85 } 86 fmt.Fprintln(&buf, "}") 87 88 genFile("table_test.go", &buf) 89 return 90 } 91 92 // Find hash that minimizes table size. 93 var best *table 94 for i := 0; i < 1000000; i++ { 95 if best != nil && 1<<(best.k-1) < len(all) { 96 break 97 } 98 h := rand.Uint32() 99 for k := uint(0); k <= 16; k++ { 100 if best != nil && k >= best.k { 101 break 102 } 103 var t table 104 if t.init(h, k, all) { 105 best = &t 106 break 107 } 108 } 109 } 110 if best == nil { 111 fmt.Fprintf(os.Stderr, "failed to construct string table\n") 112 os.Exit(1) 113 } 114 115 // Lay out strings, using overlaps when possible. 116 layout := append([]string{}, all...) 117 118 // Remove strings that are substrings of other strings 119 for changed := true; changed; { 120 changed = false 121 for i, s := range layout { 122 if s == "" { 123 continue 124 } 125 for j, t := range layout { 126 if i != j && t != "" && strings.Contains(s, t) { 127 changed = true 128 layout[j] = "" 129 } 130 } 131 } 132 } 133 134 // Join strings where one suffix matches another prefix. 135 for { 136 // Find best i, j, k such that layout[i][len-k:] == layout[j][:k], 137 // maximizing overlap length k. 138 besti := -1 139 bestj := -1 140 bestk := 0 141 for i, s := range layout { 142 if s == "" { 143 continue 144 } 145 for j, t := range layout { 146 if i == j { 147 continue 148 } 149 for k := bestk + 1; k <= len(s) && k <= len(t); k++ { 150 if s[len(s)-k:] == t[:k] { 151 besti = i 152 bestj = j 153 bestk = k 154 } 155 } 156 } 157 } 158 if bestk > 0 { 159 layout[besti] += layout[bestj][bestk:] 160 layout[bestj] = "" 161 continue 162 } 163 break 164 } 165 166 text := strings.Join(layout, "") 167 168 atom := map[string]uint32{} 169 for _, s := range all { 170 off := strings.Index(text, s) 171 if off < 0 { 172 panic("lost string " + s) 173 } 174 atom[s] = uint32(off<<8 | len(s)) 175 } 176 177 var buf bytes.Buffer 178 // Generate the Go code. 179 fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n") 180 fmt.Fprintln(&buf, "//go:generate go run gen.go\n") 181 fmt.Fprintln(&buf, "package atom\n\nconst (") 182 183 // compute max len 184 maxLen := 0 185 for _, s := range all { 186 if maxLen < len(s) { 187 maxLen = len(s) 188 } 189 fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s]) 190 } 191 fmt.Fprintln(&buf, ")\n") 192 193 fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0) 194 fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen) 195 196 fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k) 197 for i, s := range best.tab { 198 if s == "" { 199 continue 200 } 201 fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s) 202 } 203 fmt.Fprintf(&buf, "}\n") 204 datasize := (1 << best.k) * 4 205 206 fmt.Fprintln(&buf, "const atomText =") 207 textsize := len(text) 208 for len(text) > 60 { 209 fmt.Fprintf(&buf, "\t%q +\n", text[:60]) 210 text = text[60:] 211 } 212 fmt.Fprintf(&buf, "\t%q\n\n", text) 213 214 genFile("table.go", &buf) 215 216 fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize) 217 } 218 219 type byLen []string 220 221 func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) } 222 func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 223 func (x byLen) Len() int { return len(x) } 224 225 // fnv computes the FNV hash with an arbitrary starting value h. 226 func fnv(h uint32, s string) uint32 { 227 for i := 0; i < len(s); i++ { 228 h ^= uint32(s[i]) 229 h *= 16777619 230 } 231 return h 232 } 233 234 // A table represents an attempt at constructing the lookup table. 235 // The lookup table uses cuckoo hashing, meaning that each string 236 // can be found in one of two positions. 237 type table struct { 238 h0 uint32 239 k uint 240 mask uint32 241 tab []string 242 } 243 244 // hash returns the two hashes for s. 245 func (t *table) hash(s string) (h1, h2 uint32) { 246 h := fnv(t.h0, s) 247 h1 = h & t.mask 248 h2 = (h >> 16) & t.mask 249 return 250 } 251 252 // init initializes the table with the given parameters. 253 // h0 is the initial hash value, 254 // k is the number of bits of hash value to use, and 255 // x is the list of strings to store in the table. 256 // init returns false if the table cannot be constructed. 257 func (t *table) init(h0 uint32, k uint, x []string) bool { 258 t.h0 = h0 259 t.k = k 260 t.tab = make([]string, 1<<k) 261 t.mask = 1<<k - 1 262 for _, s := range x { 263 if !t.insert(s) { 264 return false 265 } 266 } 267 return true 268 } 269 270 // insert inserts s in the table. 271 func (t *table) insert(s string) bool { 272 h1, h2 := t.hash(s) 273 if t.tab[h1] == "" { 274 t.tab[h1] = s 275 return true 276 } 277 if t.tab[h2] == "" { 278 t.tab[h2] = s 279 return true 280 } 281 if t.push(h1, 0) { 282 t.tab[h1] = s 283 return true 284 } 285 if t.push(h2, 0) { 286 t.tab[h2] = s 287 return true 288 } 289 return false 290 } 291 292 // push attempts to push aside the entry in slot i. 293 func (t *table) push(i uint32, depth int) bool { 294 if depth > len(t.tab) { 295 return false 296 } 297 s := t.tab[i] 298 h1, h2 := t.hash(s) 299 j := h1 + h2 - i 300 if t.tab[j] != "" && !t.push(j, depth+1) { 301 return false 302 } 303 t.tab[j] = s 304 return true 305 } 306 307 // The lists of element names and attribute keys were taken from 308 // https://html.spec.whatwg.org/multipage/indices.html#index 309 // as of the "HTML Living Standard - Last Updated 16 April 2018" version. 310 311 // "command", "keygen" and "menuitem" have been removed from the spec, 312 // but are kept here for backwards compatibility. 313 var elements = []string{ 314 "a", 315 "abbr", 316 "address", 317 "area", 318 "article", 319 "aside", 320 "audio", 321 "b", 322 "base", 323 "bdi", 324 "bdo", 325 "blockquote", 326 "body", 327 "br", 328 "button", 329 "canvas", 330 "caption", 331 "cite", 332 "code", 333 "col", 334 "colgroup", 335 "command", 336 "data", 337 "datalist", 338 "dd", 339 "del", 340 "details", 341 "dfn", 342 "dialog", 343 "div", 344 "dl", 345 "dt", 346 "em", 347 "embed", 348 "fieldset", 349 "figcaption", 350 "figure", 351 "footer", 352 "form", 353 "h1", 354 "h2", 355 "h3", 356 "h4", 357 "h5", 358 "h6", 359 "head", 360 "header", 361 "hgroup", 362 "hr", 363 "html", 364 "i", 365 "iframe", 366 "img", 367 "input", 368 "ins", 369 "kbd", 370 "keygen", 371 "label", 372 "legend", 373 "li", 374 "link", 375 "main", 376 "map", 377 "mark", 378 "menu", 379 "menuitem", 380 "meta", 381 "meter", 382 "nav", 383 "noscript", 384 "object", 385 "ol", 386 "optgroup", 387 "option", 388 "output", 389 "p", 390 "param", 391 "picture", 392 "pre", 393 "progress", 394 "q", 395 "rp", 396 "rt", 397 "ruby", 398 "s", 399 "samp", 400 "script", 401 "section", 402 "select", 403 "slot", 404 "small", 405 "source", 406 "span", 407 "strong", 408 "style", 409 "sub", 410 "summary", 411 "sup", 412 "table", 413 "tbody", 414 "td", 415 "template", 416 "textarea", 417 "tfoot", 418 "th", 419 "thead", 420 "time", 421 "title", 422 "tr", 423 "track", 424 "u", 425 "ul", 426 "var", 427 "video", 428 "wbr", 429 } 430 431 // https://html.spec.whatwg.org/multipage/indices.html#attributes-3 432 // 433 // "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup", 434 // "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec, 435 // but are kept here for backwards compatibility. 436 var attributes = []string{ 437 "abbr", 438 "accept", 439 "accept-charset", 440 "accesskey", 441 "action", 442 "allowfullscreen", 443 "allowpaymentrequest", 444 "allowusermedia", 445 "alt", 446 "as", 447 "async", 448 "autocomplete", 449 "autofocus", 450 "autoplay", 451 "challenge", 452 "charset", 453 "checked", 454 "cite", 455 "class", 456 "color", 457 "cols", 458 "colspan", 459 "command", 460 "content", 461 "contenteditable", 462 "contextmenu", 463 "controls", 464 "coords", 465 "crossorigin", 466 "data", 467 "datetime", 468 "default", 469 "defer", 470 "dir", 471 "dirname", 472 "disabled", 473 "download", 474 "draggable", 475 "dropzone", 476 "enctype", 477 "for", 478 "form", 479 "formaction", 480 "formenctype", 481 "formmethod", 482 "formnovalidate", 483 "formtarget", 484 "headers", 485 "height", 486 "hidden", 487 "high", 488 "href", 489 "hreflang", 490 "http-equiv", 491 "icon", 492 "id", 493 "inputmode", 494 "integrity", 495 "is", 496 "ismap", 497 "itemid", 498 "itemprop", 499 "itemref", 500 "itemscope", 501 "itemtype", 502 "keytype", 503 "kind", 504 "label", 505 "lang", 506 "list", 507 "loop", 508 "low", 509 "manifest", 510 "max", 511 "maxlength", 512 "media", 513 "mediagroup", 514 "method", 515 "min", 516 "minlength", 517 "multiple", 518 "muted", 519 "name", 520 "nomodule", 521 "nonce", 522 "novalidate", 523 "open", 524 "optimum", 525 "pattern", 526 "ping", 527 "placeholder", 528 "playsinline", 529 "poster", 530 "preload", 531 "radiogroup", 532 "readonly", 533 "referrerpolicy", 534 "rel", 535 "required", 536 "reversed", 537 "rows", 538 "rowspan", 539 "sandbox", 540 "spellcheck", 541 "scope", 542 "scoped", 543 "seamless", 544 "selected", 545 "shape", 546 "size", 547 "sizes", 548 "sortable", 549 "sorted", 550 "slot", 551 "span", 552 "spellcheck", 553 "src", 554 "srcdoc", 555 "srclang", 556 "srcset", 557 "start", 558 "step", 559 "style", 560 "tabindex", 561 "target", 562 "title", 563 "translate", 564 "type", 565 "typemustmatch", 566 "updateviacache", 567 "usemap", 568 "value", 569 "width", 570 "workertype", 571 "wrap", 572 } 573 574 // "onautocomplete", "onautocompleteerror", "onmousewheel", 575 // "onshow" and "onsort" have been removed from the spec, 576 // but are kept here for backwards compatibility. 577 var eventHandlers = []string{ 578 "onabort", 579 "onautocomplete", 580 "onautocompleteerror", 581 "onauxclick", 582 "onafterprint", 583 "onbeforeprint", 584 "onbeforeunload", 585 "onblur", 586 "oncancel", 587 "oncanplay", 588 "oncanplaythrough", 589 "onchange", 590 "onclick", 591 "onclose", 592 "oncontextmenu", 593 "oncopy", 594 "oncuechange", 595 "oncut", 596 "ondblclick", 597 "ondrag", 598 "ondragend", 599 "ondragenter", 600 "ondragexit", 601 "ondragleave", 602 "ondragover", 603 "ondragstart", 604 "ondrop", 605 "ondurationchange", 606 "onemptied", 607 "onended", 608 "onerror", 609 "onfocus", 610 "onhashchange", 611 "oninput", 612 "oninvalid", 613 "onkeydown", 614 "onkeypress", 615 "onkeyup", 616 "onlanguagechange", 617 "onload", 618 "onloadeddata", 619 "onloadedmetadata", 620 "onloadend", 621 "onloadstart", 622 "onmessage", 623 "onmessageerror", 624 "onmousedown", 625 "onmouseenter", 626 "onmouseleave", 627 "onmousemove", 628 "onmouseout", 629 "onmouseover", 630 "onmouseup", 631 "onmousewheel", 632 "onwheel", 633 "onoffline", 634 "ononline", 635 "onpagehide", 636 "onpageshow", 637 "onpaste", 638 "onpause", 639 "onplay", 640 "onplaying", 641 "onpopstate", 642 "onprogress", 643 "onratechange", 644 "onreset", 645 "onresize", 646 "onrejectionhandled", 647 "onscroll", 648 "onsecuritypolicyviolation", 649 "onseeked", 650 "onseeking", 651 "onselect", 652 "onshow", 653 "onsort", 654 "onstalled", 655 "onstorage", 656 "onsubmit", 657 "onsuspend", 658 "ontimeupdate", 659 "ontoggle", 660 "onunhandledrejection", 661 "onunload", 662 "onvolumechange", 663 "onwaiting", 664 } 665 666 // extra are ad-hoc values not covered by any of the lists above. 667 var extra = []string{ 668 "acronym", 669 "align", 670 "annotation", 671 "annotation-xml", 672 "applet", 673 "basefont", 674 "bgsound", 675 "big", 676 "blink", 677 "center", 678 "color", 679 "desc", 680 "face", 681 "font", 682 "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive. 683 "foreignobject", 684 "frame", 685 "frameset", 686 "image", 687 "isindex", // "isindex" has been removed from the spec, but are kept here for backwards compatibility. 688 "listing", 689 "malignmark", 690 "marquee", 691 "math", 692 "mglyph", 693 "mi", 694 "mn", 695 "mo", 696 "ms", 697 "mtext", 698 "nobr", 699 "noembed", 700 "noframes", 701 "plaintext", 702 "prompt", 703 "public", 704 "rb", 705 "rtc", 706 "spacer", 707 "strike", 708 "svg", 709 "system", 710 "tt", 711 "xmp", 712 }