github.com/seeker-insurance/kit@v0.0.13/tsv/tsv.go (about)

     1  //Package tsv contains helper functions for working with tab-separated data and getting it from a local path or URL.
     2  package tsv
     3  
     4  import (
     5  	"bufio"
     6  	"bytes"
     7  	"encoding/json"
     8  	"fmt"
     9  	"io"
    10  	"io/ioutil"
    11  	"log"
    12  	"net/http"
    13  	"os"
    14  	"strconv"
    15  	"strings"
    16  	"sync"
    17  
    18  	"github.com/seeker-insurance/kit/assets"
    19  )
    20  
    21  //Record represents a single line of a TSV
    22  type (
    23  	Record map[string]string
    24  	labels []string
    25  	Error  string
    26  )
    27  
    28  const (
    29  	errKeyDoesNotExist   Error = "key does not exist"
    30  	errCannotConvertKey  Error = "cannot convert key"
    31  	errWrongElementCount Error = "line has different number of elements than record has fields"
    32  )
    33  
    34  func (l labels) ParseLine(line string) (Record, bool) { return parseLine(line, l) }
    35  func (err Error) Error() string                       { return string(err) }
    36  func errorF(format string, a ...interface{}) Error    { return Error(fmt.Sprintf(format, a...)) }
    37  
    38  //FromPath parses a path to see whether it is a URL or local path,
    39  //downloads the file if necessary, then parses it and returns the records
    40  func FromPath(path string) (records []Record, err error) {
    41  	readCloser, err := asReadCloser(path)
    42  	if err != nil {
    43  		return nil, err
    44  	}
    45  	defer readCloser.Close()
    46  	return Parse(readCloser)
    47  }
    48  
    49  func StreamFromPaths(out chan Record, paths ...string) error {
    50  	readClosers := make([]io.ReadCloser, len(paths))
    51  	for i, path := range paths {
    52  		rc, err := asReadCloser(path)
    53  		if err != nil {
    54  			close(out)
    55  			return err
    56  		}
    57  		readClosers[i] = rc
    58  	}
    59  	go parseStreams(out, readClosers...)
    60  	return nil
    61  }
    62  
    63  func StreamFromBindataPaths(out chan Record, paths ...string) error {
    64  	readClosers := make([]io.ReadCloser, len(paths))
    65  	for i, path := range paths {
    66  		rc, err := asBinReadCloser(path)
    67  		if err != nil {
    68  			close(out)
    69  			return errorF("StreamFromBinDataPaths: %v", err)
    70  		}
    71  		readClosers[i] = rc
    72  	}
    73  	go parseStreams(out, readClosers...)
    74  	return nil
    75  }
    76  
    77  //ParseLine parses a single line of a TSV using the given labels
    78  func parseLine(line string, labels labels) (Record, bool) {
    79  	split := strings.Split(line, "\t")
    80  	if len(split) != len(labels) {
    81  		return nil, false
    82  	}
    83  	record := make(Record)
    84  	for i, label := range labels {
    85  		record[label] = split[i]
    86  	}
    87  	return record, true
    88  
    89  }
    90  
    91  //asReadCloser bindata path, and returns a ReadCloser containing the information
    92  func asBinReadCloser(s string) (readCloser io.ReadCloser, err error) {
    93  	b, err := assets.Get(s)
    94  	if err != nil {
    95  		return nil, err
    96  	}
    97  	return ioutil.NopCloser(bytes.NewReader(b)), nil
    98  }
    99  
   100  //asReadCloser takes a URL or local path, downloads if necessary, and returns a ReadCloser containing the information
   101  func asReadCloser(s string) (readCloser io.ReadCloser, err error) {
   102  	resp, err := http.Get(s)
   103  	if err == nil {
   104  		return resp.Body, nil
   105  	}
   106  
   107  	return os.Open(s)
   108  }
   109  
   110  //Parse an io.Reader and extract the Records.
   111  func Parse(reader io.Reader) (records []Record, err error) {
   112  	scanner := bufio.NewScanner(reader)
   113  	scanner.Scan()
   114  	labels := labels(strings.Fields(scanner.Text()))
   115  	for scanner.Scan() {
   116  		if scanner.Text() == "" {
   117  			continue
   118  		}
   119  		record, ok := labels.ParseLine(scanner.Text())
   120  		if !ok {
   121  			return nil, errWrongElementCount
   122  		}
   123  		records = append(records, record)
   124  	}
   125  	return records, nil
   126  }
   127  
   128  func parseStream(out chan<- Record, readCloser io.ReadCloser) error {
   129  	defer readCloser.Close()
   130  	scanner := bufio.NewScanner(readCloser)
   131  	scanner.Scan()
   132  	labels := labels(strings.Fields(scanner.Text()))
   133  	for scanner.Scan() {
   134  		if scanner.Text() == "" {
   135  			continue
   136  		}
   137  		record, ok := labels.ParseLine(scanner.Text())
   138  		if !ok {
   139  			return errWrongElementCount
   140  		}
   141  		out <- record
   142  	}
   143  	return nil
   144  }
   145  
   146  func parseStreams(out chan<- Record, readClosers ...io.ReadCloser) {
   147  	defer close(out)
   148  	wg := &sync.WaitGroup{}
   149  	for _, rc := range readClosers {
   150  		wg.Add(1)
   151  		go func(r io.ReadCloser) {
   152  			defer wg.Done()
   153  			if err := parseStream(out, r); err != nil {
   154  				log.Println(err)
   155  			}
   156  		}(rc)
   157  	}
   158  	wg.Wait()
   159  }
   160  
   161  //Float64 gets the specified key as a Float64, if possible
   162  func (r Record) Float64(key string) (float64, error) {
   163  	s, ok := r[key]
   164  	if !ok {
   165  		return 0, errKeyDoesNotExist
   166  	}
   167  	val, err := strconv.ParseFloat(s, 64)
   168  	if err != nil {
   169  		return 0, errCannotConvertKey
   170  	}
   171  	return val, nil
   172  }
   173  
   174  //Bool gets the specified key as a Bool, if possible
   175  func (r Record) Bool(key string) (bool, error) {
   176  	s, ok := r[key]
   177  	if !ok {
   178  		return false, errKeyDoesNotExist
   179  	}
   180  	if s == "true" {
   181  		return true, nil
   182  	}
   183  	if s == "false" {
   184  		return false, nil
   185  	}
   186  	return false, errCannotConvertKey
   187  }
   188  
   189  //Int gets the specified key as an Int, if possible
   190  func (r Record) Int(key string) (int, error) {
   191  	s, ok := r[key]
   192  	if !ok {
   193  		return 0, errKeyDoesNotExist
   194  	}
   195  	return strconv.Atoi(s)
   196  }
   197  
   198  //StringSlice gets the specified key as a []string, if possible
   199  func (r Record) StringSlice(key string) (a []string, err error) {
   200  	s, ok := r[key]
   201  	if !ok {
   202  		return nil, errKeyDoesNotExist
   203  	}
   204  	err = json.Unmarshal([]byte(s), &a)
   205  	return a, err
   206  }
   207  
   208  //IntSlice gets the specified key as a  []Int, if possible
   209  func (r Record) IntSlice(key string) (a []int, err error) {
   210  	s, ok := r[key]
   211  	if !ok {
   212  		return nil, errKeyDoesNotExist
   213  	}
   214  	err = json.Unmarshal([]byte(s), &a)
   215  	return a, err
   216  }
   217  
   218  //FloatSlice gets the specified key as a []Float64, if possible
   219  func (r Record) FloatSlice(key string) (a []float64, err error) {
   220  	s, ok := r[key]
   221  	if !ok {
   222  		return nil, errKeyDoesNotExist
   223  	}
   224  	err = json.Unmarshal([]byte(s), &a)
   225  	return a, err
   226  }