github.com/zmap/zlint@v1.1.0/cmd/zlint-gtld-update/main.go (about) 1 /* 2 * ZLint Copyright 2018 Regents of the University of Michigan 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy 6 * of the License at http://www.apache.org/licenses/LICENSE-2.0 7 * 8 * Unless required by applicable law or agreed to in writing, software 9 * distributed under the License is distributed on an "AS IS" BASIS, 10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 11 * implied. See the License for the specific language governing 12 * permissions and limitations under the License. 13 */ 14 15 package main 16 17 import ( 18 "bytes" 19 "encoding/json" 20 "flag" 21 "fmt" 22 "go/format" 23 "html/template" 24 "io" 25 "io/ioutil" 26 "net" 27 "net/http" 28 "os" 29 "strings" 30 "time" 31 32 log "github.com/sirupsen/logrus" 33 "github.com/zmap/zlint/util" 34 ) 35 36 const ( 37 // ICANN_GTLD_JSON is the URL for the ICANN gTLD JSON registry (version 2). 38 // This registry does not contain ccTLDs but does carry full gTLD information 39 // needed to determine validity periods. 40 // See https://www.icann.org/resources/pages/registries/registries-en for more 41 // information. 42 ICANN_GTLD_JSON = "https://www.icann.org/resources/registries/gtlds/v2/gtlds.json" 43 // ICANN_TLDS is the URL for the ICANN list of valid top-level domains 44 // maintained by the IANA. It contains both ccTLDs and gTLDs but does not 45 // carry sufficient granularity to determine validity periods. 46 // See https://www.icann.org/resources/pages/tlds-2012-02-25-en for more 47 // information. 48 ICANN_TLDS = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt" 49 ) 50 51 var ( 52 // httpClient is a http.Client instance configured with timeouts. 53 httpClient = &http.Client{ 54 Transport: &http.Transport{ 55 Dial: (&net.Dialer{ 56 Timeout: 15 * time.Second, 57 KeepAlive: 15 * time.Second, 58 }).Dial, 59 TLSHandshakeTimeout: 5 * time.Second, 60 ResponseHeaderTimeout: 5 * time.Second, 61 ExpectContinueTimeout: 1 * time.Second, 62 }, 63 } 64 // gTLDMapTemplate is a template that produces a Golang source code file in 65 // the "util" package containing a single member variable, a map of strings to 66 // `util.GTLDPeriod` objects called `tldMap`. 67 gTLDMapTemplate = template.Must(template.New("gTLDMapTemplate").Parse( 68 `// Code generated by go generate; DO NOT EDIT. 69 // This file was generated by zlint-gtld-update. 70 71 /* 72 * ZLint Copyright 2018 Regents of the University of Michigan 73 * 74 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 75 * use this file except in compliance with the License. You may obtain a copy 76 * of the License at http://www.apache.org/licenses/LICENSE-2.0 77 * 78 * Unless required by applicable law or agreed to in writing, software 79 * distributed under the License is distributed on an "AS IS" BASIS, 80 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 81 * implied. See the License for the specific language governing 82 * permissions and limitations under the License. 83 */ 84 85 package util 86 87 var tldMap = map[string]GTLDPeriod{ 88 {{- range .GTLDs }} 89 "{{ .GTLD }}": { 90 GTLD: "{{ .GTLD }}", 91 DelegationDate: "{{ .DelegationDate }}", 92 RemovalDate: "{{ .RemovalDate }}", 93 }, 94 {{- end }} 95 // .onion is a special case and not a general gTLD. However, it is allowed in 96 // some circumstances in the web PKI so the Zlint gtldMap includes it with 97 // a delegationDate based on the CABF ballot to allow EV issuance for .onion 98 // domains: https://cabforum.org/2015/02/18/ballot-144-validation-rules-dot-onion-names/ 99 "onion": { 100 GTLD: "onion", 101 DelegationDate: "2015-02-18", 102 RemovalDate: "", 103 }, 104 } 105 `)) 106 ) 107 108 // getData fetches the response body bytes from an HTTP get to the provider url, 109 // or returns an error. 110 func getData(url string) ([]byte, error) { 111 resp, err := httpClient.Get(url) 112 if err != nil { 113 return nil, fmt.Errorf("unable to fetch data from %q : %s", 114 url, err) 115 } 116 defer resp.Body.Close() 117 118 if resp.StatusCode != http.StatusOK { 119 return nil, fmt.Errorf("unexpected status code fetching data "+ 120 "from %q : expected status %d got %d", 121 url, http.StatusOK, resp.StatusCode) 122 } 123 124 respBody, err := ioutil.ReadAll(resp.Body) 125 if err != nil { 126 return nil, fmt.Errorf("unexpected error reading response "+ 127 "body from %q : %s", 128 url, err) 129 } 130 return respBody, nil 131 } 132 133 // getTLDData fetches the ICANN_TLDS list and uses the information to build 134 // and return a list of util.GTLDPeriod objects (or an error if anything fails). 135 // Since this data source only contains TLD names and not any information 136 // about delegation/removal all of the returned `util.GTLDPeriod` objects will 137 // have the DelegationDate "1985-01-01" (matching the `.com` delegation date) 138 // and no RemovalDate. 139 func getTLDData() ([]util.GTLDPeriod, error) { 140 respBody, err := getData(ICANN_TLDS) 141 if err != nil { 142 return nil, fmt.Errorf("error getting ICANN TLD list : %s", err) 143 } 144 tlds := strings.Split(string(respBody), "\n") 145 146 var results []util.GTLDPeriod 147 for _, tld := range tlds { 148 // Skip empty lines and the header comment line 149 if strings.TrimSpace(tld) == "" || strings.HasPrefix(tld, "#") { 150 continue 151 } 152 results = append(results, util.GTLDPeriod{ 153 GTLD: strings.ToLower(tld), 154 // The TLD list doesn't indicate when any of the TLDs were delegated so 155 // assume these TLDs were all delegated at the same time as "com". 156 DelegationDate: "1985-01-01", 157 }) 158 } 159 return results, nil 160 } 161 162 // getGTLDData fetches the ICANN_GTLD_JSON and parses it into a list of 163 // util.GTLDPeriod objects, or returns an error. The gTLDEntries are returned 164 // as-is and may contain entries that were never delegated from the root DNS. 165 func getGTLDData() ([]util.GTLDPeriod, error) { 166 respBody, err := getData(ICANN_GTLD_JSON) 167 if err != nil { 168 return nil, fmt.Errorf("error getting ICANN gTLD JSON : %s", err) 169 } 170 171 var results struct { 172 GTLDs []util.GTLDPeriod 173 } 174 if err := json.Unmarshal(respBody, &results); err != nil { 175 return nil, fmt.Errorf("unexpected error unmarshaling ICANN gTLD JSON response "+ 176 "body from %q : %s", 177 ICANN_GTLD_JSON, err) 178 } 179 return results.GTLDs, nil 180 } 181 182 // delegatedGTLDs filters the provided list of GTLDPeriods removing any entries 183 // that were never delegated from the root DNS. 184 func delegatedGTLDs(entries []util.GTLDPeriod) []util.GTLDPeriod { 185 var results []util.GTLDPeriod 186 for _, gTLD := range entries { 187 if gTLD.DelegationDate == "" { 188 continue 189 } 190 results = append(results, gTLD) 191 } 192 return results 193 } 194 195 // validateGTLDs checks that all entries have a valid parseable DelegationDate 196 // string, and if not-empty, a valid parseable RemovalDate string. This function 197 // assumes an entry with an empty DelegationDate is an error. Use 198 // `delegatedGTLDs` to filter out entries that were never delegated before 199 // validating. 200 func validateGTLDs(entries []util.GTLDPeriod) error { 201 for _, gTLD := range entries { 202 // All entries should have a valid delegation date 203 if _, err := time.Parse(util.GTLDPeriodDateFormat, gTLD.DelegationDate); err != nil { 204 return err 205 } 206 // a gTLD that has not been removed has an empty RemovalDate and that's OK 207 if _, err := time.Parse(util.GTLDPeriodDateFormat, gTLD.RemovalDate); gTLD.RemovalDate != "" && err != nil { 208 return err 209 } 210 } 211 return nil 212 } 213 214 // renderGTLDMap fetches the ICANN gTLD data, filters out undelegated entries, 215 // validates the remaining entries have parseable dates, and renders the 216 // gTLDMapTemplate to the provided writer using the validated entries (or 217 // returns an error if any of the aforementioned steps fail). It then fetches 218 // the ICANN TLD data, and uses it to populate any missing entries for ccTLDs. 219 // These entries will have a default delegationDate because the data source is 220 // not specific enough to provide one. The produced output text is a Golang 221 // source code file in the `util` package that contains a single map variable 222 // containing GTLDPeriod objects created with the ICANN data. 223 func renderGTLDMap(writer io.Writer) error { 224 // Get all of ICANN's gTLDs including ones that haven't been delegated. 225 allGTLDs, err := getGTLDData() 226 if err != nil { 227 return err 228 } 229 230 // Filter out the non-delegated gTLD entries 231 delegatedGTLDs := delegatedGTLDs(allGTLDs) 232 233 // Validate that all of the delegated gTLDs have correct dates 234 if err := validateGTLDs(delegatedGTLDs); err != nil { 235 return err 236 } 237 238 // Get all of the TLDs. This data source doesn't provide delegationDates and 239 // so we only want to use it to populate missing entries in `delegatedGTLDs`, 240 // not to replace any existing entries that have more specific information 241 // about the validity period for the TLD. 242 allTLDs, err := getTLDData() 243 if err != nil { 244 return err 245 } 246 247 tldMap := make(map[string]util.GTLDPeriod) 248 249 // Deduplicate delegatedGTLDs into the tldMap first 250 for _, tld := range delegatedGTLDs { 251 tldMap[tld.GTLD] = tld 252 } 253 254 // Then populate any missing entries from the allTLDs list 255 for _, tld := range allTLDs { 256 if _, found := tldMap[tld.GTLD]; !found { 257 tldMap[tld.GTLD] = tld 258 } 259 } 260 261 templateData := struct { 262 GTLDs map[string]util.GTLDPeriod 263 }{ 264 GTLDs: tldMap, 265 } 266 267 // Render the gTLD map to a buffer with the delegated gTLD data 268 var buf bytes.Buffer 269 if err := gTLDMapTemplate.Execute(&buf, templateData); err != nil { 270 return err 271 } 272 273 // format the buffer so it won't trip up the `gofmt_test.go` checks 274 formatted, err := format.Source(buf.Bytes()) 275 if err != nil { 276 return err 277 } 278 279 // Write the formatted buffer to the writer 280 _, err = writer.Write(formatted) 281 if err != nil { 282 return err 283 } 284 return nil 285 } 286 287 // init sets up command line flags 288 func init() { 289 flag.Usage = func() { 290 fmt.Fprintf(os.Stderr, "Usage: %s [flags]\n", os.Args[0]) 291 flag.PrintDefaults() 292 } 293 flag.Parse() 294 log.SetLevel(log.InfoLevel) 295 } 296 297 // main handles rendering a gTLD map to either standard out (when no argument is 298 // provided) or to the provided filename. If an error occurs it is printed to 299 // standard err and the program terminates with a non-zero exit status. 300 func main() { 301 errQuit := func(err error) { 302 fmt.Fprintf(os.Stderr, "error updating gTLD map: %s\n", err) 303 os.Exit(1) 304 } 305 306 // Default to writing to standard out 307 writer := os.Stdout 308 if flag.NArg() > 0 { 309 // If a filename is specified as a command line flag then open it (creating 310 // if needed), truncate the existing contents, and use the file as the 311 // writer instead of standard out 312 filename := flag.Args()[0] 313 f, err := os.OpenFile(filename, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0664) 314 if err != nil { 315 errQuit(err) 316 } 317 defer f.Close() 318 writer = f 319 } 320 321 if err := renderGTLDMap(writer); err != nil { 322 errQuit(err) 323 } 324 }