github.com/zmap/zlint@v1.1.0/integration/csv.go (about)

     1  // +build integration
     2  
     3  package integration
     4  
     5  import (
     6  	"encoding/base64"
     7  	"encoding/csv"
     8  	"io"
     9  	"log"
    10  	"os"
    11  	"path"
    12  
    13  	"github.com/zmap/zcrypto/x509"
    14  )
    15  
    16  // csvFieldIndex represents an index into a CSV Record.
    17  type csvFieldIndex int
    18  
    19  const (
    20  	// csvSubjectDN is the index for the Subject DN CSV field.
    21  	csvSubjectDN csvFieldIndex = iota
    22  	// csvIssuerDN is the index for the Issued DN CSV field.
    23  	csvIssuerDN
    24  	// csvRaw is the index for the raw base64 encoded certificate DER CSV field.
    25  	csvRaw
    26  	// csvFingerprint is the index for the certificate fingerprint CSV field.
    27  	csvFingerprint
    28  	// end is a marker used to calculate number of fields in the CSV reader.
    29  	end
    30  )
    31  
    32  // workItem is a struct collecting together a fingerprint and a parsed
    33  // certificate that were read from a CSV record in a data file.
    34  type workItem struct {
    35  	// Fingerprint is the SHA256 hash of the raw certificate DER. It is provided
    36  	// in the CSV so we capture it into a work item to avoid having to rehash the
    37  	// DER later on.
    38  	Fingerprint string
    39  	// Certificate is the parsed x509 Certificate created from the CSV record's
    40  	// Base64 encoded raw DER.
    41  	Certificate *x509.Certificate
    42  }
    43  
    44  // loadCSV processes the configured data files with the provided cache
    45  // directory, writing work items to the workChannel as they are available.
    46  //
    47  // Expected CSV format:
    48  //   subject_dn, issuer_dn, raw, fingerprint_sha256
    49  func loadCSV(workChannel chan<- workItem, directory string) {
    50  	log.Printf("Reading data from %d CSV files", len(conf.Files))
    51  	for i, dataFile := range conf.Files {
    52  		path := path.Join(conf.CacheDir, dataFile.Name)
    53  		log.Printf("Reading data from %q (%d of %d)\n",
    54  			path, i+1, len(conf.Files))
    55  		if err := loadCSVFile(workChannel, path, i == 0); err != nil {
    56  			log.Fatalf("Failed reading CSV file %q: %v", path, err)
    57  		}
    58  		log.Printf("Done reading CSV file %q", path)
    59  	}
    60  
    61  	log.Printf("Finished reading data from %d CSV files. Closing work channel",
    62  		len(conf.Files))
    63  	close(workChannel)
    64  }
    65  
    66  // loadCSVFile reads and parses a certificate and fingerprint from the csvRaw
    67  // index of each record in the provided CSV file, putting a matching work item
    68  // into the workChannel.
    69  func loadCSVFile(workChannel chan<- workItem, path string, skipHeader bool) error {
    70  	// Open the input file and create a CSV reader configured for the expected
    71  	// number of record fields.
    72  	f, err := os.Open(path)
    73  	if err != nil {
    74  		return err
    75  	}
    76  	defer f.Close()
    77  	in := csv.NewReader(f)
    78  	in.FieldsPerRecord = int(end)
    79  	in.ReuseRecord = true
    80  
    81  	// Start reading records until there are none left.
    82  	var skippedFirst bool
    83  	for {
    84  		record, err := in.Read()
    85  		// If we read EOF its time to end the loop and return nil
    86  		if err == io.EOF {
    87  			return nil
    88  		} else if err != nil {
    89  			// If there was an error, end the loop and return non-nil
    90  			return err
    91  		}
    92  
    93  		// If we haven't skipped a header yet and are configured to do so then skip
    94  		// this record.
    95  		if !skippedFirst && skipHeader {
    96  			skippedFirst = true
    97  			continue
    98  		}
    99  
   100  		// If a fingerprint filter is configured only include records with
   101  		// a fingerprint that matches the filter regexp.
   102  		if fpFilter != nil && !fpFilter.MatchString(record[csvFingerprint]) {
   103  			continue
   104  		}
   105  
   106  		// Parse a certificate from the record's csvRaw index and write it to the
   107  		// work channel.
   108  		cert, err := parseCertificate(record[csvRaw])
   109  		if err != nil {
   110  			log.Printf("Warning: failed to parse record in %q: subjectDN %q fingerprint %q raw %q: %v",
   111  				path, record[csvSubjectDN], record[csvFingerprint], record[csvRaw], err)
   112  			continue
   113  		}
   114  		workChannel <- workItem{
   115  			Fingerprint: record[csvFingerprint],
   116  			Certificate: cert,
   117  		}
   118  	}
   119  	// Control should never reach this point...
   120  	return nil
   121  }
   122  
   123  // parseCertificate parses an *x509.Certificate instance from the given csvRaw
   124  // string assumed to be the BASE64 encoding of a DER encoded x509 certificate.
   125  func parseCertificate(csvRaw string) (*x509.Certificate, error) {
   126  	derBytes, err := base64.StdEncoding.DecodeString(csvRaw)
   127  	if err != nil {
   128  		return nil, err
   129  	}
   130  	cert, err := x509.ParseCertificate(derBytes)
   131  	if err != nil {
   132  		return nil, err
   133  	}
   134  	return cert, nil
   135  }