github.com/hxx258456/ccgo@v0.0.5-0.20230213014102-48b35f46f66f/net/html/atom/gen.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ignore 6 // +build ignore 7 8 //go:generate go run gen.go 9 //go:generate go run gen.go -test 10 11 package main 12 13 import ( 14 "bytes" 15 "flag" 16 "fmt" 17 "go/format" 18 "io/ioutil" 19 "math/rand" 20 "os" 21 "sort" 22 "strings" 23 ) 24 25 // identifier converts s to a Go exported identifier. 26 // It converts "div" to "Div" and "accept-charset" to "AcceptCharset". 27 func identifier(s string) string { 28 b := make([]byte, 0, len(s)) 29 cap := true 30 for _, c := range s { 31 if c == '-' { 32 cap = true 33 continue 34 } 35 if cap && 'a' <= c && c <= 'z' { 36 c -= 'a' - 'A' 37 } 38 cap = false 39 b = append(b, byte(c)) 40 } 41 return string(b) 42 } 43 44 var test = flag.Bool("test", false, "generate table_test.go") 45 46 func genFile(name string, buf *bytes.Buffer) { 47 b, err := format.Source(buf.Bytes()) 48 if err != nil { 49 fmt.Fprintln(os.Stderr, err) 50 os.Exit(1) 51 } 52 if err := ioutil.WriteFile(name, b, 0644); err != nil { 53 fmt.Fprintln(os.Stderr, err) 54 os.Exit(1) 55 } 56 } 57 58 func main() { 59 flag.Parse() 60 61 var all []string 62 all = append(all, elements...) 63 all = append(all, attributes...) 64 all = append(all, eventHandlers...) 65 all = append(all, extra...) 66 sort.Strings(all) 67 68 // uniq - lists have dups 69 w := 0 70 for _, s := range all { 71 if w == 0 || all[w-1] != s { 72 all[w] = s 73 w++ 74 } 75 } 76 all = all[:w] 77 78 if *test { 79 var buf bytes.Buffer 80 fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n") 81 fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n") 82 fmt.Fprintln(&buf, "package atom\n") 83 fmt.Fprintln(&buf, "var testAtomList = []string{") 84 for _, s := range all { 85 fmt.Fprintf(&buf, "\t%q,\n", s) 86 } 87 fmt.Fprintln(&buf, "}") 88 89 genFile("table_test.go", &buf) 90 return 91 } 92 93 // Find hash that minimizes table size. 94 var best *table 95 for i := 0; i < 1000000; i++ { 96 if best != nil && 1<<(best.k-1) < len(all) { 97 break 98 } 99 h := rand.Uint32() 100 for k := uint(0); k <= 16; k++ { 101 if best != nil && k >= best.k { 102 break 103 } 104 var t table 105 if t.init(h, k, all) { 106 best = &t 107 break 108 } 109 } 110 } 111 if best == nil { 112 fmt.Fprintf(os.Stderr, "failed to construct string table\n") 113 os.Exit(1) 114 } 115 116 // Lay out strings, using overlaps when possible. 117 layout := append([]string{}, all...) 118 119 // Remove strings that are substrings of other strings 120 for changed := true; changed; { 121 changed = false 122 for i, s := range layout { 123 if s == "" { 124 continue 125 } 126 for j, t := range layout { 127 if i != j && t != "" && strings.Contains(s, t) { 128 changed = true 129 layout[j] = "" 130 } 131 } 132 } 133 } 134 135 // Join strings where one suffix matches another prefix. 136 for { 137 // Find best i, j, k such that layout[i][len-k:] == layout[j][:k], 138 // maximizing overlap length k. 139 besti := -1 140 bestj := -1 141 bestk := 0 142 for i, s := range layout { 143 if s == "" { 144 continue 145 } 146 for j, t := range layout { 147 if i == j { 148 continue 149 } 150 for k := bestk + 1; k <= len(s) && k <= len(t); k++ { 151 if s[len(s)-k:] == t[:k] { 152 besti = i 153 bestj = j 154 bestk = k 155 } 156 } 157 } 158 } 159 if bestk > 0 { 160 layout[besti] += layout[bestj][bestk:] 161 layout[bestj] = "" 162 continue 163 } 164 break 165 } 166 167 text := strings.Join(layout, "") 168 169 atom := map[string]uint32{} 170 for _, s := range all { 171 off := strings.Index(text, s) 172 if off < 0 { 173 panic("lost string " + s) 174 } 175 atom[s] = uint32(off<<8 | len(s)) 176 } 177 178 var buf bytes.Buffer 179 // Generate the Go code. 180 fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n") 181 fmt.Fprintln(&buf, "//go:generate go run gen.go\n") 182 fmt.Fprintln(&buf, "package atom\n\nconst (") 183 184 // compute max len 185 maxLen := 0 186 for _, s := range all { 187 if maxLen < len(s) { 188 maxLen = len(s) 189 } 190 fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s]) 191 } 192 fmt.Fprintln(&buf, ")\n") 193 194 fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0) 195 fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen) 196 197 fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k) 198 for i, s := range best.tab { 199 if s == "" { 200 continue 201 } 202 fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s) 203 } 204 fmt.Fprintf(&buf, "}\n") 205 datasize := (1 << best.k) * 4 206 207 fmt.Fprintln(&buf, "const atomText =") 208 textsize := len(text) 209 for len(text) > 60 { 210 fmt.Fprintf(&buf, "\t%q +\n", text[:60]) 211 text = text[60:] 212 } 213 fmt.Fprintf(&buf, "\t%q\n\n", text) 214 215 genFile("table.go", &buf) 216 217 fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize) 218 } 219 220 type byLen []string 221 222 func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) } 223 func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 224 func (x byLen) Len() int { return len(x) } 225 226 // fnv computes the FNV hash with an arbitrary starting value h. 227 func fnv(h uint32, s string) uint32 { 228 for i := 0; i < len(s); i++ { 229 h ^= uint32(s[i]) 230 h *= 16777619 231 } 232 return h 233 } 234 235 // A table represents an attempt at constructing the lookup table. 236 // The lookup table uses cuckoo hashing, meaning that each string 237 // can be found in one of two positions. 238 type table struct { 239 h0 uint32 240 k uint 241 mask uint32 242 tab []string 243 } 244 245 // hash returns the two hashes for s. 246 func (t *table) hash(s string) (h1, h2 uint32) { 247 h := fnv(t.h0, s) 248 h1 = h & t.mask 249 h2 = (h >> 16) & t.mask 250 return 251 } 252 253 // init initializes the table with the given parameters. 254 // h0 is the initial hash value, 255 // k is the number of bits of hash value to use, and 256 // x is the list of strings to store in the table. 257 // init returns false if the table cannot be constructed. 258 func (t *table) init(h0 uint32, k uint, x []string) bool { 259 t.h0 = h0 260 t.k = k 261 t.tab = make([]string, 1<<k) 262 t.mask = 1<<k - 1 263 for _, s := range x { 264 if !t.insert(s) { 265 return false 266 } 267 } 268 return true 269 } 270 271 // insert inserts s in the table. 272 func (t *table) insert(s string) bool { 273 h1, h2 := t.hash(s) 274 if t.tab[h1] == "" { 275 t.tab[h1] = s 276 return true 277 } 278 if t.tab[h2] == "" { 279 t.tab[h2] = s 280 return true 281 } 282 if t.push(h1, 0) { 283 t.tab[h1] = s 284 return true 285 } 286 if t.push(h2, 0) { 287 t.tab[h2] = s 288 return true 289 } 290 return false 291 } 292 293 // push attempts to push aside the entry in slot i. 294 func (t *table) push(i uint32, depth int) bool { 295 if depth > len(t.tab) { 296 return false 297 } 298 s := t.tab[i] 299 h1, h2 := t.hash(s) 300 j := h1 + h2 - i 301 if t.tab[j] != "" && !t.push(j, depth+1) { 302 return false 303 } 304 t.tab[j] = s 305 return true 306 } 307 308 // The lists of element names and attribute keys were taken from 309 // https://html.spec.whatwg.org/multipage/indices.html#index 310 // as of the "HTML Living Standard - Last Updated 16 April 2018" version. 311 312 // "command", "keygen" and "menuitem" have been removed from the spec, 313 // but are kept here for backwards compatibility. 314 var elements = []string{ 315 "a", 316 "abbr", 317 "address", 318 "area", 319 "article", 320 "aside", 321 "audio", 322 "b", 323 "base", 324 "bdi", 325 "bdo", 326 "blockquote", 327 "body", 328 "br", 329 "button", 330 "canvas", 331 "caption", 332 "cite", 333 "code", 334 "col", 335 "colgroup", 336 "command", 337 "data", 338 "datalist", 339 "dd", 340 "del", 341 "details", 342 "dfn", 343 "dialog", 344 "div", 345 "dl", 346 "dt", 347 "em", 348 "embed", 349 "fieldset", 350 "figcaption", 351 "figure", 352 "footer", 353 "form", 354 "h1", 355 "h2", 356 "h3", 357 "h4", 358 "h5", 359 "h6", 360 "head", 361 "header", 362 "hgroup", 363 "hr", 364 "html", 365 "i", 366 "iframe", 367 "img", 368 "input", 369 "ins", 370 "kbd", 371 "keygen", 372 "label", 373 "legend", 374 "li", 375 "link", 376 "main", 377 "map", 378 "mark", 379 "menu", 380 "menuitem", 381 "meta", 382 "meter", 383 "nav", 384 "noscript", 385 "object", 386 "ol", 387 "optgroup", 388 "option", 389 "output", 390 "p", 391 "param", 392 "picture", 393 "pre", 394 "progress", 395 "q", 396 "rp", 397 "rt", 398 "ruby", 399 "s", 400 "samp", 401 "script", 402 "section", 403 "select", 404 "slot", 405 "small", 406 "source", 407 "span", 408 "strong", 409 "style", 410 "sub", 411 "summary", 412 "sup", 413 "table", 414 "tbody", 415 "td", 416 "template", 417 "textarea", 418 "tfoot", 419 "th", 420 "thead", 421 "time", 422 "title", 423 "tr", 424 "track", 425 "u", 426 "ul", 427 "var", 428 "video", 429 "wbr", 430 } 431 432 // https://html.spec.whatwg.org/multipage/indices.html#attributes-3 433 // 434 // "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup", 435 // "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec, 436 // but are kept here for backwards compatibility. 437 var attributes = []string{ 438 "abbr", 439 "accept", 440 "accept-charset", 441 "accesskey", 442 "action", 443 "allowfullscreen", 444 "allowpaymentrequest", 445 "allowusermedia", 446 "alt", 447 "as", 448 "async", 449 "autocomplete", 450 "autofocus", 451 "autoplay", 452 "challenge", 453 "charset", 454 "checked", 455 "cite", 456 "class", 457 "color", 458 "cols", 459 "colspan", 460 "command", 461 "content", 462 "contenteditable", 463 "contextmenu", 464 "controls", 465 "coords", 466 "crossorigin", 467 "data", 468 "datetime", 469 "default", 470 "defer", 471 "dir", 472 "dirname", 473 "disabled", 474 "download", 475 "draggable", 476 "dropzone", 477 "enctype", 478 "for", 479 "form", 480 "formaction", 481 "formenctype", 482 "formmethod", 483 "formnovalidate", 484 "formtarget", 485 "headers", 486 "height", 487 "hidden", 488 "high", 489 "href", 490 "hreflang", 491 "http-equiv", 492 "icon", 493 "id", 494 "inputmode", 495 "integrity", 496 "is", 497 "ismap", 498 "itemid", 499 "itemprop", 500 "itemref", 501 "itemscope", 502 "itemtype", 503 "keytype", 504 "kind", 505 "label", 506 "lang", 507 "list", 508 "loop", 509 "low", 510 "manifest", 511 "max", 512 "maxlength", 513 "media", 514 "mediagroup", 515 "method", 516 "min", 517 "minlength", 518 "multiple", 519 "muted", 520 "name", 521 "nomodule", 522 "nonce", 523 "novalidate", 524 "open", 525 "optimum", 526 "pattern", 527 "ping", 528 "placeholder", 529 "playsinline", 530 "poster", 531 "preload", 532 "radiogroup", 533 "readonly", 534 "referrerpolicy", 535 "rel", 536 "required", 537 "reversed", 538 "rows", 539 "rowspan", 540 "sandbox", 541 "spellcheck", 542 "scope", 543 "scoped", 544 "seamless", 545 "selected", 546 "shape", 547 "size", 548 "sizes", 549 "sortable", 550 "sorted", 551 "slot", 552 "span", 553 "spellcheck", 554 "src", 555 "srcdoc", 556 "srclang", 557 "srcset", 558 "start", 559 "step", 560 "style", 561 "tabindex", 562 "target", 563 "title", 564 "translate", 565 "type", 566 "typemustmatch", 567 "updateviacache", 568 "usemap", 569 "value", 570 "width", 571 "workertype", 572 "wrap", 573 } 574 575 // "onautocomplete", "onautocompleteerror", "onmousewheel", 576 // "onshow" and "onsort" have been removed from the spec, 577 // but are kept here for backwards compatibility. 578 var eventHandlers = []string{ 579 "onabort", 580 "onautocomplete", 581 "onautocompleteerror", 582 "onauxclick", 583 "onafterprint", 584 "onbeforeprint", 585 "onbeforeunload", 586 "onblur", 587 "oncancel", 588 "oncanplay", 589 "oncanplaythrough", 590 "onchange", 591 "onclick", 592 "onclose", 593 "oncontextmenu", 594 "oncopy", 595 "oncuechange", 596 "oncut", 597 "ondblclick", 598 "ondrag", 599 "ondragend", 600 "ondragenter", 601 "ondragexit", 602 "ondragleave", 603 "ondragover", 604 "ondragstart", 605 "ondrop", 606 "ondurationchange", 607 "onemptied", 608 "onended", 609 "onerror", 610 "onfocus", 611 "onhashchange", 612 "oninput", 613 "oninvalid", 614 "onkeydown", 615 "onkeypress", 616 "onkeyup", 617 "onlanguagechange", 618 "onload", 619 "onloadeddata", 620 "onloadedmetadata", 621 "onloadend", 622 "onloadstart", 623 "onmessage", 624 "onmessageerror", 625 "onmousedown", 626 "onmouseenter", 627 "onmouseleave", 628 "onmousemove", 629 "onmouseout", 630 "onmouseover", 631 "onmouseup", 632 "onmousewheel", 633 "onwheel", 634 "onoffline", 635 "ononline", 636 "onpagehide", 637 "onpageshow", 638 "onpaste", 639 "onpause", 640 "onplay", 641 "onplaying", 642 "onpopstate", 643 "onprogress", 644 "onratechange", 645 "onreset", 646 "onresize", 647 "onrejectionhandled", 648 "onscroll", 649 "onsecuritypolicyviolation", 650 "onseeked", 651 "onseeking", 652 "onselect", 653 "onshow", 654 "onsort", 655 "onstalled", 656 "onstorage", 657 "onsubmit", 658 "onsuspend", 659 "ontimeupdate", 660 "ontoggle", 661 "onunhandledrejection", 662 "onunload", 663 "onvolumechange", 664 "onwaiting", 665 } 666 667 // extra are ad-hoc values not covered by any of the lists above. 668 var extra = []string{ 669 "acronym", 670 "align", 671 "annotation", 672 "annotation-xml", 673 "applet", 674 "basefont", 675 "bgsound", 676 "big", 677 "blink", 678 "center", 679 "color", 680 "desc", 681 "face", 682 "font", 683 "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive. 684 "foreignobject", 685 "frame", 686 "frameset", 687 "image", 688 "isindex", // "isindex" has been removed from the spec, but are kept here for backwards compatibility. 689 "listing", 690 "malignmark", 691 "marquee", 692 "math", 693 "mglyph", 694 "mi", 695 "mn", 696 "mo", 697 "ms", 698 "mtext", 699 "nobr", 700 "noembed", 701 "noframes", 702 "plaintext", 703 "prompt", 704 "public", 705 "rb", 706 "rtc", 707 "spacer", 708 "strike", 709 "svg", 710 "system", 711 "tt", 712 "xmp", 713 }