github.com/Andyfoo/golang/x/net@v0.0.0-20190901054642-57c1bf301704/html/doctype.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package html 6 7 import ( 8 "strings" 9 ) 10 11 // parseDoctype parses the data from a DoctypeToken into a name, 12 // public identifier, and system identifier. It returns a Node whose Type 13 // is DoctypeNode, whose Data is the name, and which has attributes 14 // named "system" and "public" for the two identifiers if they were present. 15 // quirks is whether the document should be parsed in "quirks mode". 16 func parseDoctype(s string) (n *Node, quirks bool) { 17 n = &Node{Type: DoctypeNode} 18 19 // Find the name. 20 space := strings.IndexAny(s, whitespace) 21 if space == -1 { 22 space = len(s) 23 } 24 n.Data = s[:space] 25 // The comparison to "html" is case-sensitive. 26 if n.Data != "html" { 27 quirks = true 28 } 29 n.Data = strings.ToLower(n.Data) 30 s = strings.TrimLeft(s[space:], whitespace) 31 32 if len(s) < 6 { 33 // It can't start with "PUBLIC" or "SYSTEM". 34 // Ignore the rest of the string. 35 return n, quirks || s != "" 36 } 37 38 key := strings.ToLower(s[:6]) 39 s = s[6:] 40 for key == "public" || key == "system" { 41 s = strings.TrimLeft(s, whitespace) 42 if s == "" { 43 break 44 } 45 quote := s[0] 46 if quote != '"' && quote != '\'' { 47 break 48 } 49 s = s[1:] 50 q := strings.IndexRune(s, rune(quote)) 51 var id string 52 if q == -1 { 53 id = s 54 s = "" 55 } else { 56 id = s[:q] 57 s = s[q+1:] 58 } 59 n.Attr = append(n.Attr, Attribute{Key: key, Val: id}) 60 if key == "public" { 61 key = "system" 62 } else { 63 key = "" 64 } 65 } 66 67 if key != "" || s != "" { 68 quirks = true 69 } else if len(n.Attr) > 0 { 70 if n.Attr[0].Key == "public" { 71 public := strings.ToLower(n.Attr[0].Val) 72 switch public { 73 case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html": 74 quirks = true 75 default: 76 for _, q := range quirkyIDs { 77 if strings.HasPrefix(public, q) { 78 quirks = true 79 break 80 } 81 } 82 } 83 // The following two public IDs only cause quirks mode if there is no system ID. 84 if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") || 85 strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) { 86 quirks = true 87 } 88 } 89 if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" && 90 strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" { 91 quirks = true 92 } 93 } 94 95 return n, quirks 96 } 97 98 // quirkyIDs is a list of public doctype identifiers that cause a document 99 // to be interpreted in quirks mode. The identifiers should be in lower case. 100 var quirkyIDs = []string{ 101 "+//silmaril//dtd html pro v0r11 19970101//", 102 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", 103 "-//as//dtd html 3.0 aswedit + extensions//", 104 "-//ietf//dtd html 2.0 level 1//", 105 "-//ietf//dtd html 2.0 level 2//", 106 "-//ietf//dtd html 2.0 strict level 1//", 107 "-//ietf//dtd html 2.0 strict level 2//", 108 "-//ietf//dtd html 2.0 strict//", 109 "-//ietf//dtd html 2.0//", 110 "-//ietf//dtd html 2.1e//", 111 "-//ietf//dtd html 3.0//", 112 "-//ietf//dtd html 3.2 final//", 113 "-//ietf//dtd html 3.2//", 114 "-//ietf//dtd html 3//", 115 "-//ietf//dtd html level 0//", 116 "-//ietf//dtd html level 1//", 117 "-//ietf//dtd html level 2//", 118 "-//ietf//dtd html level 3//", 119 "-//ietf//dtd html strict level 0//", 120 "-//ietf//dtd html strict level 1//", 121 "-//ietf//dtd html strict level 2//", 122 "-//ietf//dtd html strict level 3//", 123 "-//ietf//dtd html strict//", 124 "-//ietf//dtd html//", 125 "-//metrius//dtd metrius presentational//", 126 "-//microsoft//dtd internet explorer 2.0 html strict//", 127 "-//microsoft//dtd internet explorer 2.0 html//", 128 "-//microsoft//dtd internet explorer 2.0 tables//", 129 "-//microsoft//dtd internet explorer 3.0 html strict//", 130 "-//microsoft//dtd internet explorer 3.0 html//", 131 "-//microsoft//dtd internet explorer 3.0 tables//", 132 "-//netscape comm. corp.//dtd html//", 133 "-//netscape comm. corp.//dtd strict html//", 134 "-//o'reilly and associates//dtd html 2.0//", 135 "-//o'reilly and associates//dtd html extended 1.0//", 136 "-//o'reilly and associates//dtd html extended relaxed 1.0//", 137 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", 138 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", 139 "-//spyglass//dtd html 2.0 extended//", 140 "-//sq//dtd html 2.0 hotmetal + extensions//", 141 "-//sun microsystems corp.//dtd hotjava html//", 142 "-//sun microsystems corp.//dtd hotjava strict html//", 143 "-//w3c//dtd html 3 1995-03-24//", 144 "-//w3c//dtd html 3.2 draft//", 145 "-//w3c//dtd html 3.2 final//", 146 "-//w3c//dtd html 3.2//", 147 "-//w3c//dtd html 3.2s draft//", 148 "-//w3c//dtd html 4.0 frameset//", 149 "-//w3c//dtd html 4.0 transitional//", 150 "-//w3c//dtd html experimental 19960712//", 151 "-//w3c//dtd html experimental 970421//", 152 "-//w3c//dtd w3 html//", 153 "-//w3o//dtd w3 html 3.0//", 154 "-//webtechs//dtd mozilla html 2.0//", 155 "-//webtechs//dtd mozilla html//", 156 }