github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/net/html/atom/gen.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ignore 6 7 package main 8 9 // This program generates table.go and table_test.go. 10 // Invoke as 11 // 12 // go run gen.go |gofmt >table.go 13 // go run gen.go -test |gofmt >table_test.go 14 15 import ( 16 "flag" 17 "fmt" 18 "math/rand" 19 "os" 20 "sort" 21 "strings" 22 ) 23 24 // identifier converts s to a Go exported identifier. 25 // It converts "div" to "Div" and "accept-charset" to "AcceptCharset". 26 func identifier(s string) string { 27 b := make([]byte, 0, len(s)) 28 cap := true 29 for _, c := range s { 30 if c == '-' { 31 cap = true 32 continue 33 } 34 if cap && 'a' <= c && c <= 'z' { 35 c -= 'a' - 'A' 36 } 37 cap = false 38 b = append(b, byte(c)) 39 } 40 return string(b) 41 } 42 43 var test = flag.Bool("test", false, "generate table_test.go") 44 45 func main() { 46 flag.Parse() 47 48 var all []string 49 all = append(all, elements...) 50 all = append(all, attributes...) 51 all = append(all, eventHandlers...) 52 all = append(all, extra...) 53 sort.Strings(all) 54 55 if *test { 56 fmt.Printf("// generated by go run gen.go -test; DO NOT EDIT\n\n") 57 fmt.Printf("package atom\n\n") 58 fmt.Printf("var testAtomList = []string{\n") 59 for _, s := range all { 60 fmt.Printf("\t%q,\n", s) 61 } 62 fmt.Printf("}\n") 63 return 64 } 65 66 // uniq - lists have dups 67 // compute max len too 68 maxLen := 0 69 w := 0 70 for _, s := range all { 71 if w == 0 || all[w-1] != s { 72 if maxLen < len(s) { 73 maxLen = len(s) 74 } 75 all[w] = s 76 w++ 77 } 78 } 79 all = all[:w] 80 81 // Find hash that minimizes table size. 82 var best *table 83 for i := 0; i < 1000000; i++ { 84 if best != nil && 1<<(best.k-1) < len(all) { 85 break 86 } 87 h := rand.Uint32() 88 for k := uint(0); k <= 16; k++ { 89 if best != nil && k >= best.k { 90 break 91 } 92 var t table 93 if t.init(h, k, all) { 94 best = &t 95 break 96 } 97 } 98 } 99 if best == nil { 100 fmt.Fprintf(os.Stderr, "failed to construct string table\n") 101 os.Exit(1) 102 } 103 104 // Lay out strings, using overlaps when possible. 105 layout := append([]string{}, all...) 106 107 // Remove strings that are substrings of other strings 108 for changed := true; changed; { 109 changed = false 110 for i, s := range layout { 111 if s == "" { 112 continue 113 } 114 for j, t := range layout { 115 if i != j && t != "" && strings.Contains(s, t) { 116 changed = true 117 layout[j] = "" 118 } 119 } 120 } 121 } 122 123 // Join strings where one suffix matches another prefix. 124 for { 125 // Find best i, j, k such that layout[i][len-k:] == layout[j][:k], 126 // maximizing overlap length k. 127 besti := -1 128 bestj := -1 129 bestk := 0 130 for i, s := range layout { 131 if s == "" { 132 continue 133 } 134 for j, t := range layout { 135 if i == j { 136 continue 137 } 138 for k := bestk + 1; k <= len(s) && k <= len(t); k++ { 139 if s[len(s)-k:] == t[:k] { 140 besti = i 141 bestj = j 142 bestk = k 143 } 144 } 145 } 146 } 147 if bestk > 0 { 148 layout[besti] += layout[bestj][bestk:] 149 layout[bestj] = "" 150 continue 151 } 152 break 153 } 154 155 text := strings.Join(layout, "") 156 157 atom := map[string]uint32{} 158 for _, s := range all { 159 off := strings.Index(text, s) 160 if off < 0 { 161 panic("lost string " + s) 162 } 163 atom[s] = uint32(off<<8 | len(s)) 164 } 165 166 // Generate the Go code. 167 fmt.Printf("// generated by go run gen.go; DO NOT EDIT\n\n") 168 fmt.Printf("package atom\n\nconst (\n") 169 for _, s := range all { 170 fmt.Printf("\t%s Atom = %#x\n", identifier(s), atom[s]) 171 } 172 fmt.Printf(")\n\n") 173 174 fmt.Printf("const hash0 = %#x\n\n", best.h0) 175 fmt.Printf("const maxAtomLen = %d\n\n", maxLen) 176 177 fmt.Printf("var table = [1<<%d]Atom{\n", best.k) 178 for i, s := range best.tab { 179 if s == "" { 180 continue 181 } 182 fmt.Printf("\t%#x: %#x, // %s\n", i, atom[s], s) 183 } 184 fmt.Printf("}\n") 185 datasize := (1 << best.k) * 4 186 187 fmt.Printf("const atomText =\n") 188 textsize := len(text) 189 for len(text) > 60 { 190 fmt.Printf("\t%q +\n", text[:60]) 191 text = text[60:] 192 } 193 fmt.Printf("\t%q\n\n", text) 194 195 fmt.Fprintf(os.Stderr, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize) 196 } 197 198 type byLen []string 199 200 func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) } 201 func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 202 func (x byLen) Len() int { return len(x) } 203 204 // fnv computes the FNV hash with an arbitrary starting value h. 205 func fnv(h uint32, s string) uint32 { 206 for i := 0; i < len(s); i++ { 207 h ^= uint32(s[i]) 208 h *= 16777619 209 } 210 return h 211 } 212 213 // A table represents an attempt at constructing the lookup table. 214 // The lookup table uses cuckoo hashing, meaning that each string 215 // can be found in one of two positions. 216 type table struct { 217 h0 uint32 218 k uint 219 mask uint32 220 tab []string 221 } 222 223 // hash returns the two hashes for s. 224 func (t *table) hash(s string) (h1, h2 uint32) { 225 h := fnv(t.h0, s) 226 h1 = h & t.mask 227 h2 = (h >> 16) & t.mask 228 return 229 } 230 231 // init initializes the table with the given parameters. 232 // h0 is the initial hash value, 233 // k is the number of bits of hash value to use, and 234 // x is the list of strings to store in the table. 235 // init returns false if the table cannot be constructed. 236 func (t *table) init(h0 uint32, k uint, x []string) bool { 237 t.h0 = h0 238 t.k = k 239 t.tab = make([]string, 1<<k) 240 t.mask = 1<<k - 1 241 for _, s := range x { 242 if !t.insert(s) { 243 return false 244 } 245 } 246 return true 247 } 248 249 // insert inserts s in the table. 250 func (t *table) insert(s string) bool { 251 h1, h2 := t.hash(s) 252 if t.tab[h1] == "" { 253 t.tab[h1] = s 254 return true 255 } 256 if t.tab[h2] == "" { 257 t.tab[h2] = s 258 return true 259 } 260 if t.push(h1, 0) { 261 t.tab[h1] = s 262 return true 263 } 264 if t.push(h2, 0) { 265 t.tab[h2] = s 266 return true 267 } 268 return false 269 } 270 271 // push attempts to push aside the entry in slot i. 272 func (t *table) push(i uint32, depth int) bool { 273 if depth > len(t.tab) { 274 return false 275 } 276 s := t.tab[i] 277 h1, h2 := t.hash(s) 278 j := h1 + h2 - i 279 if t.tab[j] != "" && !t.push(j, depth+1) { 280 return false 281 } 282 t.tab[j] = s 283 return true 284 } 285 286 // The lists of element names and attribute keys were taken from 287 // https://html.spec.whatwg.org/multipage/indices.html#index 288 // as of the "HTML Living Standard - Last Updated 21 February 2015" version. 289 290 var elements = []string{ 291 "a", 292 "abbr", 293 "address", 294 "area", 295 "article", 296 "aside", 297 "audio", 298 "b", 299 "base", 300 "bdi", 301 "bdo", 302 "blockquote", 303 "body", 304 "br", 305 "button", 306 "canvas", 307 "caption", 308 "cite", 309 "code", 310 "col", 311 "colgroup", 312 "command", 313 "data", 314 "datalist", 315 "dd", 316 "del", 317 "details", 318 "dfn", 319 "dialog", 320 "div", 321 "dl", 322 "dt", 323 "em", 324 "embed", 325 "fieldset", 326 "figcaption", 327 "figure", 328 "footer", 329 "form", 330 "h1", 331 "h2", 332 "h3", 333 "h4", 334 "h5", 335 "h6", 336 "head", 337 "header", 338 "hgroup", 339 "hr", 340 "html", 341 "i", 342 "iframe", 343 "img", 344 "input", 345 "ins", 346 "kbd", 347 "keygen", 348 "label", 349 "legend", 350 "li", 351 "link", 352 "map", 353 "mark", 354 "menu", 355 "menuitem", 356 "meta", 357 "meter", 358 "nav", 359 "noscript", 360 "object", 361 "ol", 362 "optgroup", 363 "option", 364 "output", 365 "p", 366 "param", 367 "pre", 368 "progress", 369 "q", 370 "rp", 371 "rt", 372 "ruby", 373 "s", 374 "samp", 375 "script", 376 "section", 377 "select", 378 "small", 379 "source", 380 "span", 381 "strong", 382 "style", 383 "sub", 384 "summary", 385 "sup", 386 "table", 387 "tbody", 388 "td", 389 "template", 390 "textarea", 391 "tfoot", 392 "th", 393 "thead", 394 "time", 395 "title", 396 "tr", 397 "track", 398 "u", 399 "ul", 400 "var", 401 "video", 402 "wbr", 403 } 404 405 // https://html.spec.whatwg.org/multipage/indices.html#attributes-3 406 407 var attributes = []string{ 408 "abbr", 409 "accept", 410 "accept-charset", 411 "accesskey", 412 "action", 413 "alt", 414 "async", 415 "autocomplete", 416 "autofocus", 417 "autoplay", 418 "challenge", 419 "charset", 420 "checked", 421 "cite", 422 "class", 423 "cols", 424 "colspan", 425 "command", 426 "content", 427 "contenteditable", 428 "contextmenu", 429 "controls", 430 "coords", 431 "crossorigin", 432 "data", 433 "datetime", 434 "default", 435 "defer", 436 "dir", 437 "dirname", 438 "disabled", 439 "download", 440 "draggable", 441 "dropzone", 442 "enctype", 443 "for", 444 "form", 445 "formaction", 446 "formenctype", 447 "formmethod", 448 "formnovalidate", 449 "formtarget", 450 "headers", 451 "height", 452 "hidden", 453 "high", 454 "href", 455 "hreflang", 456 "http-equiv", 457 "icon", 458 "id", 459 "inputmode", 460 "ismap", 461 "itemid", 462 "itemprop", 463 "itemref", 464 "itemscope", 465 "itemtype", 466 "keytype", 467 "kind", 468 "label", 469 "lang", 470 "list", 471 "loop", 472 "low", 473 "manifest", 474 "max", 475 "maxlength", 476 "media", 477 "mediagroup", 478 "method", 479 "min", 480 "minlength", 481 "multiple", 482 "muted", 483 "name", 484 "novalidate", 485 "open", 486 "optimum", 487 "pattern", 488 "ping", 489 "placeholder", 490 "poster", 491 "preload", 492 "radiogroup", 493 "readonly", 494 "rel", 495 "required", 496 "reversed", 497 "rows", 498 "rowspan", 499 "sandbox", 500 "spellcheck", 501 "scope", 502 "scoped", 503 "seamless", 504 "selected", 505 "shape", 506 "size", 507 "sizes", 508 "sortable", 509 "sorted", 510 "span", 511 "src", 512 "srcdoc", 513 "srclang", 514 "start", 515 "step", 516 "style", 517 "tabindex", 518 "target", 519 "title", 520 "translate", 521 "type", 522 "typemustmatch", 523 "usemap", 524 "value", 525 "width", 526 "wrap", 527 } 528 529 var eventHandlers = []string{ 530 "onabort", 531 "onautocomplete", 532 "onautocompleteerror", 533 "onafterprint", 534 "onbeforeprint", 535 "onbeforeunload", 536 "onblur", 537 "oncancel", 538 "oncanplay", 539 "oncanplaythrough", 540 "onchange", 541 "onclick", 542 "onclose", 543 "oncontextmenu", 544 "oncuechange", 545 "ondblclick", 546 "ondrag", 547 "ondragend", 548 "ondragenter", 549 "ondragleave", 550 "ondragover", 551 "ondragstart", 552 "ondrop", 553 "ondurationchange", 554 "onemptied", 555 "onended", 556 "onerror", 557 "onfocus", 558 "onhashchange", 559 "oninput", 560 "oninvalid", 561 "onkeydown", 562 "onkeypress", 563 "onkeyup", 564 "onlanguagechange", 565 "onload", 566 "onloadeddata", 567 "onloadedmetadata", 568 "onloadstart", 569 "onmessage", 570 "onmousedown", 571 "onmousemove", 572 "onmouseout", 573 "onmouseover", 574 "onmouseup", 575 "onmousewheel", 576 "onoffline", 577 "ononline", 578 "onpagehide", 579 "onpageshow", 580 "onpause", 581 "onplay", 582 "onplaying", 583 "onpopstate", 584 "onprogress", 585 "onratechange", 586 "onreset", 587 "onresize", 588 "onscroll", 589 "onseeked", 590 "onseeking", 591 "onselect", 592 "onshow", 593 "onsort", 594 "onstalled", 595 "onstorage", 596 "onsubmit", 597 "onsuspend", 598 "ontimeupdate", 599 "ontoggle", 600 "onunload", 601 "onvolumechange", 602 "onwaiting", 603 } 604 605 // extra are ad-hoc values not covered by any of the lists above. 606 var extra = []string{ 607 "align", 608 "annotation", 609 "annotation-xml", 610 "applet", 611 "basefont", 612 "bgsound", 613 "big", 614 "blink", 615 "center", 616 "color", 617 "desc", 618 "face", 619 "font", 620 "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive. 621 "foreignobject", 622 "frame", 623 "frameset", 624 "image", 625 "isindex", 626 "listing", 627 "malignmark", 628 "marquee", 629 "math", 630 "mglyph", 631 "mi", 632 "mn", 633 "mo", 634 "ms", 635 "mtext", 636 "nobr", 637 "noembed", 638 "noframes", 639 "plaintext", 640 "prompt", 641 "public", 642 "spacer", 643 "strike", 644 "svg", 645 "system", 646 "tt", 647 "xmp", 648 }