github.com/seeker-insurance/kit@v0.0.13/tsv/tsv.go (about) 1 //Package tsv contains helper functions for working with tab-separated data and getting it from a local path or URL. 2 package tsv 3 4 import ( 5 "bufio" 6 "bytes" 7 "encoding/json" 8 "fmt" 9 "io" 10 "io/ioutil" 11 "log" 12 "net/http" 13 "os" 14 "strconv" 15 "strings" 16 "sync" 17 18 "github.com/seeker-insurance/kit/assets" 19 ) 20 21 //Record represents a single line of a TSV 22 type ( 23 Record map[string]string 24 labels []string 25 Error string 26 ) 27 28 const ( 29 errKeyDoesNotExist Error = "key does not exist" 30 errCannotConvertKey Error = "cannot convert key" 31 errWrongElementCount Error = "line has different number of elements than record has fields" 32 ) 33 34 func (l labels) ParseLine(line string) (Record, bool) { return parseLine(line, l) } 35 func (err Error) Error() string { return string(err) } 36 func errorF(format string, a ...interface{}) Error { return Error(fmt.Sprintf(format, a...)) } 37 38 //FromPath parses a path to see whether it is a URL or local path, 39 //downloads the file if necessary, then parses it and returns the records 40 func FromPath(path string) (records []Record, err error) { 41 readCloser, err := asReadCloser(path) 42 if err != nil { 43 return nil, err 44 } 45 defer readCloser.Close() 46 return Parse(readCloser) 47 } 48 49 func StreamFromPaths(out chan Record, paths ...string) error { 50 readClosers := make([]io.ReadCloser, len(paths)) 51 for i, path := range paths { 52 rc, err := asReadCloser(path) 53 if err != nil { 54 close(out) 55 return err 56 } 57 readClosers[i] = rc 58 } 59 go parseStreams(out, readClosers...) 60 return nil 61 } 62 63 func StreamFromBindataPaths(out chan Record, paths ...string) error { 64 readClosers := make([]io.ReadCloser, len(paths)) 65 for i, path := range paths { 66 rc, err := asBinReadCloser(path) 67 if err != nil { 68 close(out) 69 return errorF("StreamFromBinDataPaths: %v", err) 70 } 71 readClosers[i] = rc 72 } 73 go parseStreams(out, readClosers...) 74 return nil 75 } 76 77 //ParseLine parses a single line of a TSV using the given labels 78 func parseLine(line string, labels labels) (Record, bool) { 79 split := strings.Split(line, "\t") 80 if len(split) != len(labels) { 81 return nil, false 82 } 83 record := make(Record) 84 for i, label := range labels { 85 record[label] = split[i] 86 } 87 return record, true 88 89 } 90 91 //asReadCloser bindata path, and returns a ReadCloser containing the information 92 func asBinReadCloser(s string) (readCloser io.ReadCloser, err error) { 93 b, err := assets.Get(s) 94 if err != nil { 95 return nil, err 96 } 97 return ioutil.NopCloser(bytes.NewReader(b)), nil 98 } 99 100 //asReadCloser takes a URL or local path, downloads if necessary, and returns a ReadCloser containing the information 101 func asReadCloser(s string) (readCloser io.ReadCloser, err error) { 102 resp, err := http.Get(s) 103 if err == nil { 104 return resp.Body, nil 105 } 106 107 return os.Open(s) 108 } 109 110 //Parse an io.Reader and extract the Records. 111 func Parse(reader io.Reader) (records []Record, err error) { 112 scanner := bufio.NewScanner(reader) 113 scanner.Scan() 114 labels := labels(strings.Fields(scanner.Text())) 115 for scanner.Scan() { 116 if scanner.Text() == "" { 117 continue 118 } 119 record, ok := labels.ParseLine(scanner.Text()) 120 if !ok { 121 return nil, errWrongElementCount 122 } 123 records = append(records, record) 124 } 125 return records, nil 126 } 127 128 func parseStream(out chan<- Record, readCloser io.ReadCloser) error { 129 defer readCloser.Close() 130 scanner := bufio.NewScanner(readCloser) 131 scanner.Scan() 132 labels := labels(strings.Fields(scanner.Text())) 133 for scanner.Scan() { 134 if scanner.Text() == "" { 135 continue 136 } 137 record, ok := labels.ParseLine(scanner.Text()) 138 if !ok { 139 return errWrongElementCount 140 } 141 out <- record 142 } 143 return nil 144 } 145 146 func parseStreams(out chan<- Record, readClosers ...io.ReadCloser) { 147 defer close(out) 148 wg := &sync.WaitGroup{} 149 for _, rc := range readClosers { 150 wg.Add(1) 151 go func(r io.ReadCloser) { 152 defer wg.Done() 153 if err := parseStream(out, r); err != nil { 154 log.Println(err) 155 } 156 }(rc) 157 } 158 wg.Wait() 159 } 160 161 //Float64 gets the specified key as a Float64, if possible 162 func (r Record) Float64(key string) (float64, error) { 163 s, ok := r[key] 164 if !ok { 165 return 0, errKeyDoesNotExist 166 } 167 val, err := strconv.ParseFloat(s, 64) 168 if err != nil { 169 return 0, errCannotConvertKey 170 } 171 return val, nil 172 } 173 174 //Bool gets the specified key as a Bool, if possible 175 func (r Record) Bool(key string) (bool, error) { 176 s, ok := r[key] 177 if !ok { 178 return false, errKeyDoesNotExist 179 } 180 if s == "true" { 181 return true, nil 182 } 183 if s == "false" { 184 return false, nil 185 } 186 return false, errCannotConvertKey 187 } 188 189 //Int gets the specified key as an Int, if possible 190 func (r Record) Int(key string) (int, error) { 191 s, ok := r[key] 192 if !ok { 193 return 0, errKeyDoesNotExist 194 } 195 return strconv.Atoi(s) 196 } 197 198 //StringSlice gets the specified key as a []string, if possible 199 func (r Record) StringSlice(key string) (a []string, err error) { 200 s, ok := r[key] 201 if !ok { 202 return nil, errKeyDoesNotExist 203 } 204 err = json.Unmarshal([]byte(s), &a) 205 return a, err 206 } 207 208 //IntSlice gets the specified key as a []Int, if possible 209 func (r Record) IntSlice(key string) (a []int, err error) { 210 s, ok := r[key] 211 if !ok { 212 return nil, errKeyDoesNotExist 213 } 214 err = json.Unmarshal([]byte(s), &a) 215 return a, err 216 } 217 218 //FloatSlice gets the specified key as a []Float64, if possible 219 func (r Record) FloatSlice(key string) (a []float64, err error) { 220 s, ok := r[key] 221 if !ok { 222 return nil, errKeyDoesNotExist 223 } 224 err = json.Unmarshal([]byte(s), &a) 225 return a, err 226 }