github.com/zmap/zlint@v1.1.0/cmd/zlint-gtld-update/main.go (about)

     1  /*
     2   * ZLint Copyright 2018 Regents of the University of Michigan
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License"); you may not
     5   * use this file except in compliance with the License. You may obtain a copy
     6   * of the License at http://www.apache.org/licenses/LICENSE-2.0
     7   *
     8   * Unless required by applicable law or agreed to in writing, software
     9   * distributed under the License is distributed on an "AS IS" BASIS,
    10   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
    11   * implied. See the License for the specific language governing
    12   * permissions and limitations under the License.
    13   */
    14  
    15  package main
    16  
    17  import (
    18  	"bytes"
    19  	"encoding/json"
    20  	"flag"
    21  	"fmt"
    22  	"go/format"
    23  	"html/template"
    24  	"io"
    25  	"io/ioutil"
    26  	"net"
    27  	"net/http"
    28  	"os"
    29  	"strings"
    30  	"time"
    31  
    32  	log "github.com/sirupsen/logrus"
    33  	"github.com/zmap/zlint/util"
    34  )
    35  
    36  const (
    37  	// ICANN_GTLD_JSON is the URL for the ICANN gTLD JSON registry (version 2).
    38  	// This registry does not contain ccTLDs but does carry full gTLD information
    39  	// needed to determine validity periods.
    40  	// See https://www.icann.org/resources/pages/registries/registries-en for more
    41  	// information.
    42  	ICANN_GTLD_JSON = "https://www.icann.org/resources/registries/gtlds/v2/gtlds.json"
    43  	// ICANN_TLDS is the URL for the ICANN list of valid top-level domains
    44  	// maintained by the IANA. It contains both ccTLDs and gTLDs but does not
    45  	// carry sufficient granularity to determine validity periods.
    46  	// See https://www.icann.org/resources/pages/tlds-2012-02-25-en for more
    47  	// information.
    48  	ICANN_TLDS = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt"
    49  )
    50  
    51  var (
    52  	// httpClient is a http.Client instance configured with timeouts.
    53  	httpClient = &http.Client{
    54  		Transport: &http.Transport{
    55  			Dial: (&net.Dialer{
    56  				Timeout:   15 * time.Second,
    57  				KeepAlive: 15 * time.Second,
    58  			}).Dial,
    59  			TLSHandshakeTimeout:   5 * time.Second,
    60  			ResponseHeaderTimeout: 5 * time.Second,
    61  			ExpectContinueTimeout: 1 * time.Second,
    62  		},
    63  	}
    64  	// gTLDMapTemplate is a template that produces a Golang source code file in
    65  	// the "util" package containing a single member variable, a map of strings to
    66  	// `util.GTLDPeriod` objects called `tldMap`.
    67  	gTLDMapTemplate = template.Must(template.New("gTLDMapTemplate").Parse(
    68  		`// Code generated by go generate; DO NOT EDIT.
    69  // This file was generated by zlint-gtld-update.
    70  
    71  /*
    72   * ZLint Copyright 2018 Regents of the University of Michigan
    73   *
    74   * Licensed under the Apache License, Version 2.0 (the "License"); you may not
    75   * use this file except in compliance with the License. You may obtain a copy
    76   * of the License at http://www.apache.org/licenses/LICENSE-2.0
    77   *
    78   * Unless required by applicable law or agreed to in writing, software
    79   * distributed under the License is distributed on an "AS IS" BASIS,
    80   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
    81   * implied. See the License for the specific language governing
    82   * permissions and limitations under the License.
    83   */
    84  
    85  package util
    86  
    87  var tldMap = map[string]GTLDPeriod{
    88  {{- range .GTLDs }}
    89  	"{{ .GTLD }}": {
    90  		GTLD: "{{ .GTLD }}",
    91  		DelegationDate: "{{ .DelegationDate }}",
    92  		RemovalDate: "{{ .RemovalDate }}",
    93  	},
    94  {{- end }}
    95  	// .onion is a special case and not a general gTLD. However, it is allowed in
    96  	// some circumstances in the web PKI so the Zlint gtldMap includes it with
    97  	// a delegationDate based on the CABF ballot to allow EV issuance for .onion
    98  	// domains: https://cabforum.org/2015/02/18/ballot-144-validation-rules-dot-onion-names/
    99  	"onion": {
   100  		GTLD: "onion",
   101  		DelegationDate: "2015-02-18",
   102  		RemovalDate: "",
   103  	},
   104  }
   105  `))
   106  )
   107  
   108  // getData fetches the response body bytes from an HTTP get to the provider url,
   109  // or returns an error.
   110  func getData(url string) ([]byte, error) {
   111  	resp, err := httpClient.Get(url)
   112  	if err != nil {
   113  		return nil, fmt.Errorf("unable to fetch data from %q : %s",
   114  			url, err)
   115  	}
   116  	defer resp.Body.Close()
   117  
   118  	if resp.StatusCode != http.StatusOK {
   119  		return nil, fmt.Errorf("unexpected status code fetching data "+
   120  			"from %q : expected status %d got %d",
   121  			url, http.StatusOK, resp.StatusCode)
   122  	}
   123  
   124  	respBody, err := ioutil.ReadAll(resp.Body)
   125  	if err != nil {
   126  		return nil, fmt.Errorf("unexpected error reading response "+
   127  			"body from %q : %s",
   128  			url, err)
   129  	}
   130  	return respBody, nil
   131  }
   132  
   133  // getTLDData fetches the ICANN_TLDS list and uses the information to build
   134  // and return a list of util.GTLDPeriod objects (or an error if anything fails).
   135  // Since this data source only contains TLD names and not any information
   136  // about delegation/removal all of the returned `util.GTLDPeriod` objects will
   137  // have the DelegationDate "1985-01-01" (matching the `.com` delegation date)
   138  // and no RemovalDate.
   139  func getTLDData() ([]util.GTLDPeriod, error) {
   140  	respBody, err := getData(ICANN_TLDS)
   141  	if err != nil {
   142  		return nil, fmt.Errorf("error getting ICANN TLD list : %s", err)
   143  	}
   144  	tlds := strings.Split(string(respBody), "\n")
   145  
   146  	var results []util.GTLDPeriod
   147  	for _, tld := range tlds {
   148  		// Skip empty lines and the header comment line
   149  		if strings.TrimSpace(tld) == "" || strings.HasPrefix(tld, "#") {
   150  			continue
   151  		}
   152  		results = append(results, util.GTLDPeriod{
   153  			GTLD: strings.ToLower(tld),
   154  			// The TLD list doesn't indicate when any of the TLDs were delegated so
   155  			// assume these TLDs were all delegated at the same time as "com".
   156  			DelegationDate: "1985-01-01",
   157  		})
   158  	}
   159  	return results, nil
   160  }
   161  
   162  // getGTLDData fetches the ICANN_GTLD_JSON and parses it into a list of
   163  // util.GTLDPeriod objects, or returns an error. The gTLDEntries are returned
   164  // as-is and may contain entries that were never delegated from the root DNS.
   165  func getGTLDData() ([]util.GTLDPeriod, error) {
   166  	respBody, err := getData(ICANN_GTLD_JSON)
   167  	if err != nil {
   168  		return nil, fmt.Errorf("error getting ICANN gTLD JSON : %s", err)
   169  	}
   170  
   171  	var results struct {
   172  		GTLDs []util.GTLDPeriod
   173  	}
   174  	if err := json.Unmarshal(respBody, &results); err != nil {
   175  		return nil, fmt.Errorf("unexpected error unmarshaling ICANN gTLD JSON response "+
   176  			"body from %q : %s",
   177  			ICANN_GTLD_JSON, err)
   178  	}
   179  	return results.GTLDs, nil
   180  }
   181  
   182  // delegatedGTLDs filters the provided list of GTLDPeriods removing any entries
   183  // that were never delegated from the root DNS.
   184  func delegatedGTLDs(entries []util.GTLDPeriod) []util.GTLDPeriod {
   185  	var results []util.GTLDPeriod
   186  	for _, gTLD := range entries {
   187  		if gTLD.DelegationDate == "" {
   188  			continue
   189  		}
   190  		results = append(results, gTLD)
   191  	}
   192  	return results
   193  }
   194  
   195  // validateGTLDs checks that all entries have a valid parseable DelegationDate
   196  // string, and if not-empty, a valid parseable RemovalDate string. This function
   197  // assumes an entry with an empty DelegationDate is an error. Use
   198  // `delegatedGTLDs` to filter out entries that were never delegated before
   199  // validating.
   200  func validateGTLDs(entries []util.GTLDPeriod) error {
   201  	for _, gTLD := range entries {
   202  		// All entries should have a valid delegation date
   203  		if _, err := time.Parse(util.GTLDPeriodDateFormat, gTLD.DelegationDate); err != nil {
   204  			return err
   205  		}
   206  		// a gTLD that has not been removed has an empty RemovalDate and that's OK
   207  		if _, err := time.Parse(util.GTLDPeriodDateFormat, gTLD.RemovalDate); gTLD.RemovalDate != "" && err != nil {
   208  			return err
   209  		}
   210  	}
   211  	return nil
   212  }
   213  
   214  // renderGTLDMap fetches the ICANN gTLD data, filters out undelegated entries,
   215  // validates the remaining entries have parseable dates, and renders the
   216  // gTLDMapTemplate to the provided writer using the validated entries (or
   217  // returns an error if any of the aforementioned steps fail). It then fetches
   218  // the ICANN TLD data, and uses it to populate any missing entries for ccTLDs.
   219  // These entries will have a default delegationDate because the data source is
   220  // not specific enough to provide one. The produced output text is a Golang
   221  // source code file in the `util` package that contains a single map variable
   222  // containing GTLDPeriod objects created with the ICANN data.
   223  func renderGTLDMap(writer io.Writer) error {
   224  	// Get all of ICANN's gTLDs including ones that haven't been delegated.
   225  	allGTLDs, err := getGTLDData()
   226  	if err != nil {
   227  		return err
   228  	}
   229  
   230  	// Filter out the non-delegated gTLD entries
   231  	delegatedGTLDs := delegatedGTLDs(allGTLDs)
   232  
   233  	// Validate that all of the delegated gTLDs have correct dates
   234  	if err := validateGTLDs(delegatedGTLDs); err != nil {
   235  		return err
   236  	}
   237  
   238  	// Get all of the TLDs. This data source doesn't provide delegationDates and
   239  	// so we only want to use it to populate missing entries in `delegatedGTLDs`,
   240  	// not to replace any existing entries that have more specific information
   241  	// about the validity period for the TLD.
   242  	allTLDs, err := getTLDData()
   243  	if err != nil {
   244  		return err
   245  	}
   246  
   247  	tldMap := make(map[string]util.GTLDPeriod)
   248  
   249  	// Deduplicate delegatedGTLDs into the tldMap first
   250  	for _, tld := range delegatedGTLDs {
   251  		tldMap[tld.GTLD] = tld
   252  	}
   253  
   254  	// Then populate any missing entries from the allTLDs list
   255  	for _, tld := range allTLDs {
   256  		if _, found := tldMap[tld.GTLD]; !found {
   257  			tldMap[tld.GTLD] = tld
   258  		}
   259  	}
   260  
   261  	templateData := struct {
   262  		GTLDs map[string]util.GTLDPeriod
   263  	}{
   264  		GTLDs: tldMap,
   265  	}
   266  
   267  	// Render the gTLD map to a buffer with the delegated gTLD data
   268  	var buf bytes.Buffer
   269  	if err := gTLDMapTemplate.Execute(&buf, templateData); err != nil {
   270  		return err
   271  	}
   272  
   273  	// format the buffer so it won't trip up the `gofmt_test.go` checks
   274  	formatted, err := format.Source(buf.Bytes())
   275  	if err != nil {
   276  		return err
   277  	}
   278  
   279  	// Write the formatted buffer to the writer
   280  	_, err = writer.Write(formatted)
   281  	if err != nil {
   282  		return err
   283  	}
   284  	return nil
   285  }
   286  
   287  // init sets up command line flags
   288  func init() {
   289  	flag.Usage = func() {
   290  		fmt.Fprintf(os.Stderr, "Usage: %s [flags]\n", os.Args[0])
   291  		flag.PrintDefaults()
   292  	}
   293  	flag.Parse()
   294  	log.SetLevel(log.InfoLevel)
   295  }
   296  
   297  // main handles rendering a gTLD map to either standard out (when no argument is
   298  // provided) or to the provided filename. If an error occurs it is printed to
   299  // standard err and the program terminates with a non-zero exit status.
   300  func main() {
   301  	errQuit := func(err error) {
   302  		fmt.Fprintf(os.Stderr, "error updating gTLD map: %s\n", err)
   303  		os.Exit(1)
   304  	}
   305  
   306  	// Default to writing to standard out
   307  	writer := os.Stdout
   308  	if flag.NArg() > 0 {
   309  		// If a filename is specified as a command line flag then open it (creating
   310  		// if needed), truncate the existing contents, and use the file as the
   311  		// writer instead of standard out
   312  		filename := flag.Args()[0]
   313  		f, err := os.OpenFile(filename, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0664)
   314  		if err != nil {
   315  			errQuit(err)
   316  		}
   317  		defer f.Close()
   318  		writer = f
   319  	}
   320  
   321  	if err := renderGTLDMap(writer); err != nil {
   322  		errQuit(err)
   323  	}
   324  }