github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/net/html/atom/gen.go (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ignore
     6  
     7  package main
     8  
     9  // This program generates table.go and table_test.go.
    10  // Invoke as
    11  //
    12  //	go run gen.go |gofmt >table.go
    13  //	go run gen.go -test |gofmt >table_test.go
    14  
    15  import (
    16  	"flag"
    17  	"fmt"
    18  	"math/rand"
    19  	"os"
    20  	"sort"
    21  	"strings"
    22  )
    23  
    24  // identifier converts s to a Go exported identifier.
    25  // It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
    26  func identifier(s string) string {
    27  	b := make([]byte, 0, len(s))
    28  	cap := true
    29  	for _, c := range s {
    30  		if c == '-' {
    31  			cap = true
    32  			continue
    33  		}
    34  		if cap && 'a' <= c && c <= 'z' {
    35  			c -= 'a' - 'A'
    36  		}
    37  		cap = false
    38  		b = append(b, byte(c))
    39  	}
    40  	return string(b)
    41  }
    42  
    43  var test = flag.Bool("test", false, "generate table_test.go")
    44  
    45  func main() {
    46  	flag.Parse()
    47  
    48  	var all []string
    49  	all = append(all, elements...)
    50  	all = append(all, attributes...)
    51  	all = append(all, eventHandlers...)
    52  	all = append(all, extra...)
    53  	sort.Strings(all)
    54  
    55  	if *test {
    56  		fmt.Printf("// generated by go run gen.go -test; DO NOT EDIT\n\n")
    57  		fmt.Printf("package atom\n\n")
    58  		fmt.Printf("var testAtomList = []string{\n")
    59  		for _, s := range all {
    60  			fmt.Printf("\t%q,\n", s)
    61  		}
    62  		fmt.Printf("}\n")
    63  		return
    64  	}
    65  
    66  	// uniq - lists have dups
    67  	// compute max len too
    68  	maxLen := 0
    69  	w := 0
    70  	for _, s := range all {
    71  		if w == 0 || all[w-1] != s {
    72  			if maxLen < len(s) {
    73  				maxLen = len(s)
    74  			}
    75  			all[w] = s
    76  			w++
    77  		}
    78  	}
    79  	all = all[:w]
    80  
    81  	// Find hash that minimizes table size.
    82  	var best *table
    83  	for i := 0; i < 1000000; i++ {
    84  		if best != nil && 1<<(best.k-1) < len(all) {
    85  			break
    86  		}
    87  		h := rand.Uint32()
    88  		for k := uint(0); k <= 16; k++ {
    89  			if best != nil && k >= best.k {
    90  				break
    91  			}
    92  			var t table
    93  			if t.init(h, k, all) {
    94  				best = &t
    95  				break
    96  			}
    97  		}
    98  	}
    99  	if best == nil {
   100  		fmt.Fprintf(os.Stderr, "failed to construct string table\n")
   101  		os.Exit(1)
   102  	}
   103  
   104  	// Lay out strings, using overlaps when possible.
   105  	layout := append([]string{}, all...)
   106  
   107  	// Remove strings that are substrings of other strings
   108  	for changed := true; changed; {
   109  		changed = false
   110  		for i, s := range layout {
   111  			if s == "" {
   112  				continue
   113  			}
   114  			for j, t := range layout {
   115  				if i != j && t != "" && strings.Contains(s, t) {
   116  					changed = true
   117  					layout[j] = ""
   118  				}
   119  			}
   120  		}
   121  	}
   122  
   123  	// Join strings where one suffix matches another prefix.
   124  	for {
   125  		// Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
   126  		// maximizing overlap length k.
   127  		besti := -1
   128  		bestj := -1
   129  		bestk := 0
   130  		for i, s := range layout {
   131  			if s == "" {
   132  				continue
   133  			}
   134  			for j, t := range layout {
   135  				if i == j {
   136  					continue
   137  				}
   138  				for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
   139  					if s[len(s)-k:] == t[:k] {
   140  						besti = i
   141  						bestj = j
   142  						bestk = k
   143  					}
   144  				}
   145  			}
   146  		}
   147  		if bestk > 0 {
   148  			layout[besti] += layout[bestj][bestk:]
   149  			layout[bestj] = ""
   150  			continue
   151  		}
   152  		break
   153  	}
   154  
   155  	text := strings.Join(layout, "")
   156  
   157  	atom := map[string]uint32{}
   158  	for _, s := range all {
   159  		off := strings.Index(text, s)
   160  		if off < 0 {
   161  			panic("lost string " + s)
   162  		}
   163  		atom[s] = uint32(off<<8 | len(s))
   164  	}
   165  
   166  	// Generate the Go code.
   167  	fmt.Printf("// generated by go run gen.go; DO NOT EDIT\n\n")
   168  	fmt.Printf("package atom\n\nconst (\n")
   169  	for _, s := range all {
   170  		fmt.Printf("\t%s Atom = %#x\n", identifier(s), atom[s])
   171  	}
   172  	fmt.Printf(")\n\n")
   173  
   174  	fmt.Printf("const hash0 = %#x\n\n", best.h0)
   175  	fmt.Printf("const maxAtomLen = %d\n\n", maxLen)
   176  
   177  	fmt.Printf("var table = [1<<%d]Atom{\n", best.k)
   178  	for i, s := range best.tab {
   179  		if s == "" {
   180  			continue
   181  		}
   182  		fmt.Printf("\t%#x: %#x, // %s\n", i, atom[s], s)
   183  	}
   184  	fmt.Printf("}\n")
   185  	datasize := (1 << best.k) * 4
   186  
   187  	fmt.Printf("const atomText =\n")
   188  	textsize := len(text)
   189  	for len(text) > 60 {
   190  		fmt.Printf("\t%q +\n", text[:60])
   191  		text = text[60:]
   192  	}
   193  	fmt.Printf("\t%q\n\n", text)
   194  
   195  	fmt.Fprintf(os.Stderr, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
   196  }
   197  
   198  type byLen []string
   199  
   200  func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
   201  func (x byLen) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   202  func (x byLen) Len() int           { return len(x) }
   203  
   204  // fnv computes the FNV hash with an arbitrary starting value h.
   205  func fnv(h uint32, s string) uint32 {
   206  	for i := 0; i < len(s); i++ {
   207  		h ^= uint32(s[i])
   208  		h *= 16777619
   209  	}
   210  	return h
   211  }
   212  
   213  // A table represents an attempt at constructing the lookup table.
   214  // The lookup table uses cuckoo hashing, meaning that each string
   215  // can be found in one of two positions.
   216  type table struct {
   217  	h0   uint32
   218  	k    uint
   219  	mask uint32
   220  	tab  []string
   221  }
   222  
   223  // hash returns the two hashes for s.
   224  func (t *table) hash(s string) (h1, h2 uint32) {
   225  	h := fnv(t.h0, s)
   226  	h1 = h & t.mask
   227  	h2 = (h >> 16) & t.mask
   228  	return
   229  }
   230  
   231  // init initializes the table with the given parameters.
   232  // h0 is the initial hash value,
   233  // k is the number of bits of hash value to use, and
   234  // x is the list of strings to store in the table.
   235  // init returns false if the table cannot be constructed.
   236  func (t *table) init(h0 uint32, k uint, x []string) bool {
   237  	t.h0 = h0
   238  	t.k = k
   239  	t.tab = make([]string, 1<<k)
   240  	t.mask = 1<<k - 1
   241  	for _, s := range x {
   242  		if !t.insert(s) {
   243  			return false
   244  		}
   245  	}
   246  	return true
   247  }
   248  
   249  // insert inserts s in the table.
   250  func (t *table) insert(s string) bool {
   251  	h1, h2 := t.hash(s)
   252  	if t.tab[h1] == "" {
   253  		t.tab[h1] = s
   254  		return true
   255  	}
   256  	if t.tab[h2] == "" {
   257  		t.tab[h2] = s
   258  		return true
   259  	}
   260  	if t.push(h1, 0) {
   261  		t.tab[h1] = s
   262  		return true
   263  	}
   264  	if t.push(h2, 0) {
   265  		t.tab[h2] = s
   266  		return true
   267  	}
   268  	return false
   269  }
   270  
   271  // push attempts to push aside the entry in slot i.
   272  func (t *table) push(i uint32, depth int) bool {
   273  	if depth > len(t.tab) {
   274  		return false
   275  	}
   276  	s := t.tab[i]
   277  	h1, h2 := t.hash(s)
   278  	j := h1 + h2 - i
   279  	if t.tab[j] != "" && !t.push(j, depth+1) {
   280  		return false
   281  	}
   282  	t.tab[j] = s
   283  	return true
   284  }
   285  
   286  // The lists of element names and attribute keys were taken from
   287  // https://html.spec.whatwg.org/multipage/indices.html#index
   288  // as of the "HTML Living Standard - Last Updated 21 February 2015" version.
   289  
   290  var elements = []string{
   291  	"a",
   292  	"abbr",
   293  	"address",
   294  	"area",
   295  	"article",
   296  	"aside",
   297  	"audio",
   298  	"b",
   299  	"base",
   300  	"bdi",
   301  	"bdo",
   302  	"blockquote",
   303  	"body",
   304  	"br",
   305  	"button",
   306  	"canvas",
   307  	"caption",
   308  	"cite",
   309  	"code",
   310  	"col",
   311  	"colgroup",
   312  	"command",
   313  	"data",
   314  	"datalist",
   315  	"dd",
   316  	"del",
   317  	"details",
   318  	"dfn",
   319  	"dialog",
   320  	"div",
   321  	"dl",
   322  	"dt",
   323  	"em",
   324  	"embed",
   325  	"fieldset",
   326  	"figcaption",
   327  	"figure",
   328  	"footer",
   329  	"form",
   330  	"h1",
   331  	"h2",
   332  	"h3",
   333  	"h4",
   334  	"h5",
   335  	"h6",
   336  	"head",
   337  	"header",
   338  	"hgroup",
   339  	"hr",
   340  	"html",
   341  	"i",
   342  	"iframe",
   343  	"img",
   344  	"input",
   345  	"ins",
   346  	"kbd",
   347  	"keygen",
   348  	"label",
   349  	"legend",
   350  	"li",
   351  	"link",
   352  	"map",
   353  	"mark",
   354  	"menu",
   355  	"menuitem",
   356  	"meta",
   357  	"meter",
   358  	"nav",
   359  	"noscript",
   360  	"object",
   361  	"ol",
   362  	"optgroup",
   363  	"option",
   364  	"output",
   365  	"p",
   366  	"param",
   367  	"pre",
   368  	"progress",
   369  	"q",
   370  	"rp",
   371  	"rt",
   372  	"ruby",
   373  	"s",
   374  	"samp",
   375  	"script",
   376  	"section",
   377  	"select",
   378  	"small",
   379  	"source",
   380  	"span",
   381  	"strong",
   382  	"style",
   383  	"sub",
   384  	"summary",
   385  	"sup",
   386  	"table",
   387  	"tbody",
   388  	"td",
   389  	"template",
   390  	"textarea",
   391  	"tfoot",
   392  	"th",
   393  	"thead",
   394  	"time",
   395  	"title",
   396  	"tr",
   397  	"track",
   398  	"u",
   399  	"ul",
   400  	"var",
   401  	"video",
   402  	"wbr",
   403  }
   404  
   405  // https://html.spec.whatwg.org/multipage/indices.html#attributes-3
   406  
   407  var attributes = []string{
   408  	"abbr",
   409  	"accept",
   410  	"accept-charset",
   411  	"accesskey",
   412  	"action",
   413  	"alt",
   414  	"async",
   415  	"autocomplete",
   416  	"autofocus",
   417  	"autoplay",
   418  	"challenge",
   419  	"charset",
   420  	"checked",
   421  	"cite",
   422  	"class",
   423  	"cols",
   424  	"colspan",
   425  	"command",
   426  	"content",
   427  	"contenteditable",
   428  	"contextmenu",
   429  	"controls",
   430  	"coords",
   431  	"crossorigin",
   432  	"data",
   433  	"datetime",
   434  	"default",
   435  	"defer",
   436  	"dir",
   437  	"dirname",
   438  	"disabled",
   439  	"download",
   440  	"draggable",
   441  	"dropzone",
   442  	"enctype",
   443  	"for",
   444  	"form",
   445  	"formaction",
   446  	"formenctype",
   447  	"formmethod",
   448  	"formnovalidate",
   449  	"formtarget",
   450  	"headers",
   451  	"height",
   452  	"hidden",
   453  	"high",
   454  	"href",
   455  	"hreflang",
   456  	"http-equiv",
   457  	"icon",
   458  	"id",
   459  	"inputmode",
   460  	"ismap",
   461  	"itemid",
   462  	"itemprop",
   463  	"itemref",
   464  	"itemscope",
   465  	"itemtype",
   466  	"keytype",
   467  	"kind",
   468  	"label",
   469  	"lang",
   470  	"list",
   471  	"loop",
   472  	"low",
   473  	"manifest",
   474  	"max",
   475  	"maxlength",
   476  	"media",
   477  	"mediagroup",
   478  	"method",
   479  	"min",
   480  	"minlength",
   481  	"multiple",
   482  	"muted",
   483  	"name",
   484  	"novalidate",
   485  	"open",
   486  	"optimum",
   487  	"pattern",
   488  	"ping",
   489  	"placeholder",
   490  	"poster",
   491  	"preload",
   492  	"radiogroup",
   493  	"readonly",
   494  	"rel",
   495  	"required",
   496  	"reversed",
   497  	"rows",
   498  	"rowspan",
   499  	"sandbox",
   500  	"spellcheck",
   501  	"scope",
   502  	"scoped",
   503  	"seamless",
   504  	"selected",
   505  	"shape",
   506  	"size",
   507  	"sizes",
   508  	"sortable",
   509  	"sorted",
   510  	"span",
   511  	"src",
   512  	"srcdoc",
   513  	"srclang",
   514  	"start",
   515  	"step",
   516  	"style",
   517  	"tabindex",
   518  	"target",
   519  	"title",
   520  	"translate",
   521  	"type",
   522  	"typemustmatch",
   523  	"usemap",
   524  	"value",
   525  	"width",
   526  	"wrap",
   527  }
   528  
   529  var eventHandlers = []string{
   530  	"onabort",
   531  	"onautocomplete",
   532  	"onautocompleteerror",
   533  	"onafterprint",
   534  	"onbeforeprint",
   535  	"onbeforeunload",
   536  	"onblur",
   537  	"oncancel",
   538  	"oncanplay",
   539  	"oncanplaythrough",
   540  	"onchange",
   541  	"onclick",
   542  	"onclose",
   543  	"oncontextmenu",
   544  	"oncuechange",
   545  	"ondblclick",
   546  	"ondrag",
   547  	"ondragend",
   548  	"ondragenter",
   549  	"ondragleave",
   550  	"ondragover",
   551  	"ondragstart",
   552  	"ondrop",
   553  	"ondurationchange",
   554  	"onemptied",
   555  	"onended",
   556  	"onerror",
   557  	"onfocus",
   558  	"onhashchange",
   559  	"oninput",
   560  	"oninvalid",
   561  	"onkeydown",
   562  	"onkeypress",
   563  	"onkeyup",
   564  	"onlanguagechange",
   565  	"onload",
   566  	"onloadeddata",
   567  	"onloadedmetadata",
   568  	"onloadstart",
   569  	"onmessage",
   570  	"onmousedown",
   571  	"onmousemove",
   572  	"onmouseout",
   573  	"onmouseover",
   574  	"onmouseup",
   575  	"onmousewheel",
   576  	"onoffline",
   577  	"ononline",
   578  	"onpagehide",
   579  	"onpageshow",
   580  	"onpause",
   581  	"onplay",
   582  	"onplaying",
   583  	"onpopstate",
   584  	"onprogress",
   585  	"onratechange",
   586  	"onreset",
   587  	"onresize",
   588  	"onscroll",
   589  	"onseeked",
   590  	"onseeking",
   591  	"onselect",
   592  	"onshow",
   593  	"onsort",
   594  	"onstalled",
   595  	"onstorage",
   596  	"onsubmit",
   597  	"onsuspend",
   598  	"ontimeupdate",
   599  	"ontoggle",
   600  	"onunload",
   601  	"onvolumechange",
   602  	"onwaiting",
   603  }
   604  
   605  // extra are ad-hoc values not covered by any of the lists above.
   606  var extra = []string{
   607  	"align",
   608  	"annotation",
   609  	"annotation-xml",
   610  	"applet",
   611  	"basefont",
   612  	"bgsound",
   613  	"big",
   614  	"blink",
   615  	"center",
   616  	"color",
   617  	"desc",
   618  	"face",
   619  	"font",
   620  	"foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
   621  	"foreignobject",
   622  	"frame",
   623  	"frameset",
   624  	"image",
   625  	"isindex",
   626  	"listing",
   627  	"malignmark",
   628  	"marquee",
   629  	"math",
   630  	"mglyph",
   631  	"mi",
   632  	"mn",
   633  	"mo",
   634  	"ms",
   635  	"mtext",
   636  	"nobr",
   637  	"noembed",
   638  	"noframes",
   639  	"plaintext",
   640  	"prompt",
   641  	"public",
   642  	"spacer",
   643  	"strike",
   644  	"svg",
   645  	"system",
   646  	"tt",
   647  	"xmp",
   648  }