github.com/hxx258456/ccgo@v0.0.5-0.20230213014102-48b35f46f66f/net/html/atom/gen.go (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ignore
     6  // +build ignore
     7  
     8  //go:generate go run gen.go
     9  //go:generate go run gen.go -test
    10  
    11  package main
    12  
    13  import (
    14  	"bytes"
    15  	"flag"
    16  	"fmt"
    17  	"go/format"
    18  	"io/ioutil"
    19  	"math/rand"
    20  	"os"
    21  	"sort"
    22  	"strings"
    23  )
    24  
    25  // identifier converts s to a Go exported identifier.
    26  // It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
    27  func identifier(s string) string {
    28  	b := make([]byte, 0, len(s))
    29  	cap := true
    30  	for _, c := range s {
    31  		if c == '-' {
    32  			cap = true
    33  			continue
    34  		}
    35  		if cap && 'a' <= c && c <= 'z' {
    36  			c -= 'a' - 'A'
    37  		}
    38  		cap = false
    39  		b = append(b, byte(c))
    40  	}
    41  	return string(b)
    42  }
    43  
    44  var test = flag.Bool("test", false, "generate table_test.go")
    45  
    46  func genFile(name string, buf *bytes.Buffer) {
    47  	b, err := format.Source(buf.Bytes())
    48  	if err != nil {
    49  		fmt.Fprintln(os.Stderr, err)
    50  		os.Exit(1)
    51  	}
    52  	if err := ioutil.WriteFile(name, b, 0644); err != nil {
    53  		fmt.Fprintln(os.Stderr, err)
    54  		os.Exit(1)
    55  	}
    56  }
    57  
    58  func main() {
    59  	flag.Parse()
    60  
    61  	var all []string
    62  	all = append(all, elements...)
    63  	all = append(all, attributes...)
    64  	all = append(all, eventHandlers...)
    65  	all = append(all, extra...)
    66  	sort.Strings(all)
    67  
    68  	// uniq - lists have dups
    69  	w := 0
    70  	for _, s := range all {
    71  		if w == 0 || all[w-1] != s {
    72  			all[w] = s
    73  			w++
    74  		}
    75  	}
    76  	all = all[:w]
    77  
    78  	if *test {
    79  		var buf bytes.Buffer
    80  		fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
    81  		fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n")
    82  		fmt.Fprintln(&buf, "package atom\n")
    83  		fmt.Fprintln(&buf, "var testAtomList = []string{")
    84  		for _, s := range all {
    85  			fmt.Fprintf(&buf, "\t%q,\n", s)
    86  		}
    87  		fmt.Fprintln(&buf, "}")
    88  
    89  		genFile("table_test.go", &buf)
    90  		return
    91  	}
    92  
    93  	// Find hash that minimizes table size.
    94  	var best *table
    95  	for i := 0; i < 1000000; i++ {
    96  		if best != nil && 1<<(best.k-1) < len(all) {
    97  			break
    98  		}
    99  		h := rand.Uint32()
   100  		for k := uint(0); k <= 16; k++ {
   101  			if best != nil && k >= best.k {
   102  				break
   103  			}
   104  			var t table
   105  			if t.init(h, k, all) {
   106  				best = &t
   107  				break
   108  			}
   109  		}
   110  	}
   111  	if best == nil {
   112  		fmt.Fprintf(os.Stderr, "failed to construct string table\n")
   113  		os.Exit(1)
   114  	}
   115  
   116  	// Lay out strings, using overlaps when possible.
   117  	layout := append([]string{}, all...)
   118  
   119  	// Remove strings that are substrings of other strings
   120  	for changed := true; changed; {
   121  		changed = false
   122  		for i, s := range layout {
   123  			if s == "" {
   124  				continue
   125  			}
   126  			for j, t := range layout {
   127  				if i != j && t != "" && strings.Contains(s, t) {
   128  					changed = true
   129  					layout[j] = ""
   130  				}
   131  			}
   132  		}
   133  	}
   134  
   135  	// Join strings where one suffix matches another prefix.
   136  	for {
   137  		// Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
   138  		// maximizing overlap length k.
   139  		besti := -1
   140  		bestj := -1
   141  		bestk := 0
   142  		for i, s := range layout {
   143  			if s == "" {
   144  				continue
   145  			}
   146  			for j, t := range layout {
   147  				if i == j {
   148  					continue
   149  				}
   150  				for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
   151  					if s[len(s)-k:] == t[:k] {
   152  						besti = i
   153  						bestj = j
   154  						bestk = k
   155  					}
   156  				}
   157  			}
   158  		}
   159  		if bestk > 0 {
   160  			layout[besti] += layout[bestj][bestk:]
   161  			layout[bestj] = ""
   162  			continue
   163  		}
   164  		break
   165  	}
   166  
   167  	text := strings.Join(layout, "")
   168  
   169  	atom := map[string]uint32{}
   170  	for _, s := range all {
   171  		off := strings.Index(text, s)
   172  		if off < 0 {
   173  			panic("lost string " + s)
   174  		}
   175  		atom[s] = uint32(off<<8 | len(s))
   176  	}
   177  
   178  	var buf bytes.Buffer
   179  	// Generate the Go code.
   180  	fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
   181  	fmt.Fprintln(&buf, "//go:generate go run gen.go\n")
   182  	fmt.Fprintln(&buf, "package atom\n\nconst (")
   183  
   184  	// compute max len
   185  	maxLen := 0
   186  	for _, s := range all {
   187  		if maxLen < len(s) {
   188  			maxLen = len(s)
   189  		}
   190  		fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s])
   191  	}
   192  	fmt.Fprintln(&buf, ")\n")
   193  
   194  	fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0)
   195  	fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen)
   196  
   197  	fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k)
   198  	for i, s := range best.tab {
   199  		if s == "" {
   200  			continue
   201  		}
   202  		fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s)
   203  	}
   204  	fmt.Fprintf(&buf, "}\n")
   205  	datasize := (1 << best.k) * 4
   206  
   207  	fmt.Fprintln(&buf, "const atomText =")
   208  	textsize := len(text)
   209  	for len(text) > 60 {
   210  		fmt.Fprintf(&buf, "\t%q +\n", text[:60])
   211  		text = text[60:]
   212  	}
   213  	fmt.Fprintf(&buf, "\t%q\n\n", text)
   214  
   215  	genFile("table.go", &buf)
   216  
   217  	fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
   218  }
   219  
   220  type byLen []string
   221  
   222  func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
   223  func (x byLen) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   224  func (x byLen) Len() int           { return len(x) }
   225  
   226  // fnv computes the FNV hash with an arbitrary starting value h.
   227  func fnv(h uint32, s string) uint32 {
   228  	for i := 0; i < len(s); i++ {
   229  		h ^= uint32(s[i])
   230  		h *= 16777619
   231  	}
   232  	return h
   233  }
   234  
   235  // A table represents an attempt at constructing the lookup table.
   236  // The lookup table uses cuckoo hashing, meaning that each string
   237  // can be found in one of two positions.
   238  type table struct {
   239  	h0   uint32
   240  	k    uint
   241  	mask uint32
   242  	tab  []string
   243  }
   244  
   245  // hash returns the two hashes for s.
   246  func (t *table) hash(s string) (h1, h2 uint32) {
   247  	h := fnv(t.h0, s)
   248  	h1 = h & t.mask
   249  	h2 = (h >> 16) & t.mask
   250  	return
   251  }
   252  
   253  // init initializes the table with the given parameters.
   254  // h0 is the initial hash value,
   255  // k is the number of bits of hash value to use, and
   256  // x is the list of strings to store in the table.
   257  // init returns false if the table cannot be constructed.
   258  func (t *table) init(h0 uint32, k uint, x []string) bool {
   259  	t.h0 = h0
   260  	t.k = k
   261  	t.tab = make([]string, 1<<k)
   262  	t.mask = 1<<k - 1
   263  	for _, s := range x {
   264  		if !t.insert(s) {
   265  			return false
   266  		}
   267  	}
   268  	return true
   269  }
   270  
   271  // insert inserts s in the table.
   272  func (t *table) insert(s string) bool {
   273  	h1, h2 := t.hash(s)
   274  	if t.tab[h1] == "" {
   275  		t.tab[h1] = s
   276  		return true
   277  	}
   278  	if t.tab[h2] == "" {
   279  		t.tab[h2] = s
   280  		return true
   281  	}
   282  	if t.push(h1, 0) {
   283  		t.tab[h1] = s
   284  		return true
   285  	}
   286  	if t.push(h2, 0) {
   287  		t.tab[h2] = s
   288  		return true
   289  	}
   290  	return false
   291  }
   292  
   293  // push attempts to push aside the entry in slot i.
   294  func (t *table) push(i uint32, depth int) bool {
   295  	if depth > len(t.tab) {
   296  		return false
   297  	}
   298  	s := t.tab[i]
   299  	h1, h2 := t.hash(s)
   300  	j := h1 + h2 - i
   301  	if t.tab[j] != "" && !t.push(j, depth+1) {
   302  		return false
   303  	}
   304  	t.tab[j] = s
   305  	return true
   306  }
   307  
   308  // The lists of element names and attribute keys were taken from
   309  // https://html.spec.whatwg.org/multipage/indices.html#index
   310  // as of the "HTML Living Standard - Last Updated 16 April 2018" version.
   311  
   312  // "command", "keygen" and "menuitem" have been removed from the spec,
   313  // but are kept here for backwards compatibility.
   314  var elements = []string{
   315  	"a",
   316  	"abbr",
   317  	"address",
   318  	"area",
   319  	"article",
   320  	"aside",
   321  	"audio",
   322  	"b",
   323  	"base",
   324  	"bdi",
   325  	"bdo",
   326  	"blockquote",
   327  	"body",
   328  	"br",
   329  	"button",
   330  	"canvas",
   331  	"caption",
   332  	"cite",
   333  	"code",
   334  	"col",
   335  	"colgroup",
   336  	"command",
   337  	"data",
   338  	"datalist",
   339  	"dd",
   340  	"del",
   341  	"details",
   342  	"dfn",
   343  	"dialog",
   344  	"div",
   345  	"dl",
   346  	"dt",
   347  	"em",
   348  	"embed",
   349  	"fieldset",
   350  	"figcaption",
   351  	"figure",
   352  	"footer",
   353  	"form",
   354  	"h1",
   355  	"h2",
   356  	"h3",
   357  	"h4",
   358  	"h5",
   359  	"h6",
   360  	"head",
   361  	"header",
   362  	"hgroup",
   363  	"hr",
   364  	"html",
   365  	"i",
   366  	"iframe",
   367  	"img",
   368  	"input",
   369  	"ins",
   370  	"kbd",
   371  	"keygen",
   372  	"label",
   373  	"legend",
   374  	"li",
   375  	"link",
   376  	"main",
   377  	"map",
   378  	"mark",
   379  	"menu",
   380  	"menuitem",
   381  	"meta",
   382  	"meter",
   383  	"nav",
   384  	"noscript",
   385  	"object",
   386  	"ol",
   387  	"optgroup",
   388  	"option",
   389  	"output",
   390  	"p",
   391  	"param",
   392  	"picture",
   393  	"pre",
   394  	"progress",
   395  	"q",
   396  	"rp",
   397  	"rt",
   398  	"ruby",
   399  	"s",
   400  	"samp",
   401  	"script",
   402  	"section",
   403  	"select",
   404  	"slot",
   405  	"small",
   406  	"source",
   407  	"span",
   408  	"strong",
   409  	"style",
   410  	"sub",
   411  	"summary",
   412  	"sup",
   413  	"table",
   414  	"tbody",
   415  	"td",
   416  	"template",
   417  	"textarea",
   418  	"tfoot",
   419  	"th",
   420  	"thead",
   421  	"time",
   422  	"title",
   423  	"tr",
   424  	"track",
   425  	"u",
   426  	"ul",
   427  	"var",
   428  	"video",
   429  	"wbr",
   430  }
   431  
   432  // https://html.spec.whatwg.org/multipage/indices.html#attributes-3
   433  //
   434  // "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup",
   435  // "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec,
   436  // but are kept here for backwards compatibility.
   437  var attributes = []string{
   438  	"abbr",
   439  	"accept",
   440  	"accept-charset",
   441  	"accesskey",
   442  	"action",
   443  	"allowfullscreen",
   444  	"allowpaymentrequest",
   445  	"allowusermedia",
   446  	"alt",
   447  	"as",
   448  	"async",
   449  	"autocomplete",
   450  	"autofocus",
   451  	"autoplay",
   452  	"challenge",
   453  	"charset",
   454  	"checked",
   455  	"cite",
   456  	"class",
   457  	"color",
   458  	"cols",
   459  	"colspan",
   460  	"command",
   461  	"content",
   462  	"contenteditable",
   463  	"contextmenu",
   464  	"controls",
   465  	"coords",
   466  	"crossorigin",
   467  	"data",
   468  	"datetime",
   469  	"default",
   470  	"defer",
   471  	"dir",
   472  	"dirname",
   473  	"disabled",
   474  	"download",
   475  	"draggable",
   476  	"dropzone",
   477  	"enctype",
   478  	"for",
   479  	"form",
   480  	"formaction",
   481  	"formenctype",
   482  	"formmethod",
   483  	"formnovalidate",
   484  	"formtarget",
   485  	"headers",
   486  	"height",
   487  	"hidden",
   488  	"high",
   489  	"href",
   490  	"hreflang",
   491  	"http-equiv",
   492  	"icon",
   493  	"id",
   494  	"inputmode",
   495  	"integrity",
   496  	"is",
   497  	"ismap",
   498  	"itemid",
   499  	"itemprop",
   500  	"itemref",
   501  	"itemscope",
   502  	"itemtype",
   503  	"keytype",
   504  	"kind",
   505  	"label",
   506  	"lang",
   507  	"list",
   508  	"loop",
   509  	"low",
   510  	"manifest",
   511  	"max",
   512  	"maxlength",
   513  	"media",
   514  	"mediagroup",
   515  	"method",
   516  	"min",
   517  	"minlength",
   518  	"multiple",
   519  	"muted",
   520  	"name",
   521  	"nomodule",
   522  	"nonce",
   523  	"novalidate",
   524  	"open",
   525  	"optimum",
   526  	"pattern",
   527  	"ping",
   528  	"placeholder",
   529  	"playsinline",
   530  	"poster",
   531  	"preload",
   532  	"radiogroup",
   533  	"readonly",
   534  	"referrerpolicy",
   535  	"rel",
   536  	"required",
   537  	"reversed",
   538  	"rows",
   539  	"rowspan",
   540  	"sandbox",
   541  	"spellcheck",
   542  	"scope",
   543  	"scoped",
   544  	"seamless",
   545  	"selected",
   546  	"shape",
   547  	"size",
   548  	"sizes",
   549  	"sortable",
   550  	"sorted",
   551  	"slot",
   552  	"span",
   553  	"spellcheck",
   554  	"src",
   555  	"srcdoc",
   556  	"srclang",
   557  	"srcset",
   558  	"start",
   559  	"step",
   560  	"style",
   561  	"tabindex",
   562  	"target",
   563  	"title",
   564  	"translate",
   565  	"type",
   566  	"typemustmatch",
   567  	"updateviacache",
   568  	"usemap",
   569  	"value",
   570  	"width",
   571  	"workertype",
   572  	"wrap",
   573  }
   574  
   575  // "onautocomplete", "onautocompleteerror", "onmousewheel",
   576  // "onshow" and "onsort" have been removed from the spec,
   577  // but are kept here for backwards compatibility.
   578  var eventHandlers = []string{
   579  	"onabort",
   580  	"onautocomplete",
   581  	"onautocompleteerror",
   582  	"onauxclick",
   583  	"onafterprint",
   584  	"onbeforeprint",
   585  	"onbeforeunload",
   586  	"onblur",
   587  	"oncancel",
   588  	"oncanplay",
   589  	"oncanplaythrough",
   590  	"onchange",
   591  	"onclick",
   592  	"onclose",
   593  	"oncontextmenu",
   594  	"oncopy",
   595  	"oncuechange",
   596  	"oncut",
   597  	"ondblclick",
   598  	"ondrag",
   599  	"ondragend",
   600  	"ondragenter",
   601  	"ondragexit",
   602  	"ondragleave",
   603  	"ondragover",
   604  	"ondragstart",
   605  	"ondrop",
   606  	"ondurationchange",
   607  	"onemptied",
   608  	"onended",
   609  	"onerror",
   610  	"onfocus",
   611  	"onhashchange",
   612  	"oninput",
   613  	"oninvalid",
   614  	"onkeydown",
   615  	"onkeypress",
   616  	"onkeyup",
   617  	"onlanguagechange",
   618  	"onload",
   619  	"onloadeddata",
   620  	"onloadedmetadata",
   621  	"onloadend",
   622  	"onloadstart",
   623  	"onmessage",
   624  	"onmessageerror",
   625  	"onmousedown",
   626  	"onmouseenter",
   627  	"onmouseleave",
   628  	"onmousemove",
   629  	"onmouseout",
   630  	"onmouseover",
   631  	"onmouseup",
   632  	"onmousewheel",
   633  	"onwheel",
   634  	"onoffline",
   635  	"ononline",
   636  	"onpagehide",
   637  	"onpageshow",
   638  	"onpaste",
   639  	"onpause",
   640  	"onplay",
   641  	"onplaying",
   642  	"onpopstate",
   643  	"onprogress",
   644  	"onratechange",
   645  	"onreset",
   646  	"onresize",
   647  	"onrejectionhandled",
   648  	"onscroll",
   649  	"onsecuritypolicyviolation",
   650  	"onseeked",
   651  	"onseeking",
   652  	"onselect",
   653  	"onshow",
   654  	"onsort",
   655  	"onstalled",
   656  	"onstorage",
   657  	"onsubmit",
   658  	"onsuspend",
   659  	"ontimeupdate",
   660  	"ontoggle",
   661  	"onunhandledrejection",
   662  	"onunload",
   663  	"onvolumechange",
   664  	"onwaiting",
   665  }
   666  
   667  // extra are ad-hoc values not covered by any of the lists above.
   668  var extra = []string{
   669  	"acronym",
   670  	"align",
   671  	"annotation",
   672  	"annotation-xml",
   673  	"applet",
   674  	"basefont",
   675  	"bgsound",
   676  	"big",
   677  	"blink",
   678  	"center",
   679  	"color",
   680  	"desc",
   681  	"face",
   682  	"font",
   683  	"foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
   684  	"foreignobject",
   685  	"frame",
   686  	"frameset",
   687  	"image",
   688  	"isindex", // "isindex" has been removed from the spec, but are kept here for backwards compatibility.
   689  	"listing",
   690  	"malignmark",
   691  	"marquee",
   692  	"math",
   693  	"mglyph",
   694  	"mi",
   695  	"mn",
   696  	"mo",
   697  	"ms",
   698  	"mtext",
   699  	"nobr",
   700  	"noembed",
   701  	"noframes",
   702  	"plaintext",
   703  	"prompt",
   704  	"public",
   705  	"rb",
   706  	"rtc",
   707  	"spacer",
   708  	"strike",
   709  	"svg",
   710  	"system",
   711  	"tt",
   712  	"xmp",
   713  }