github.com/vertgenlab/gonomics@v1.0.0/cmd/vcfFormat/table.go (about) 1 package main 2 3 import ( 4 "fmt" 5 "io" 6 "log" 7 "strconv" 8 "strings" 9 10 "github.com/vertgenlab/gonomics/exception" 11 "github.com/vertgenlab/gonomics/vcf" 12 ) 13 14 func writeTableHeader(outfile io.Writer, header vcf.Header, maxAlts int) (infoOrder []vcf.InfoHeader, formatOrder []vcf.FormatHeader) { 15 s := new(strings.Builder) 16 if len(header.Text) == 0 { 17 log.Fatal("ERROR: no vcf header found. Must have well-formed header to use -tsv") 18 } 19 20 // write common header components 21 s.WriteString("Chromosome,Position,ID,Reference") 22 if maxAlts == 1 { 23 s.WriteString(",Alternate") 24 } else { 25 for i := 0; i < maxAlts; i++ { 26 s.WriteString(",Alternate_" + fmt.Sprintf("%d", i)) 27 } 28 } 29 s.WriteString(",Quality,Filter") 30 31 // write INFO field 32 var numFields int 33 for key, val := range header.Info { 34 numFields = numberOfFields(maxAlts, val.Key) 35 if numFields == 1 { 36 s.WriteString("," + key) 37 } else { 38 for i := 0; i < numFields; i++ { 39 s.WriteString("," + key + fmt.Sprintf("_%d", i)) 40 } 41 } 42 infoOrder = append(infoOrder, val) 43 } 44 45 // get formatOrder 46 for _, val := range header.Format { 47 formatOrder = append(formatOrder, val) 48 } 49 50 // get sample order 51 sampleOrder := make([]string, len(header.Samples)) 52 for key, val := range header.Samples { 53 sampleOrder[val] = key 54 } 55 56 // write format per sample 57 for _, fmtHeader := range formatOrder { 58 for _, sample := range sampleOrder { 59 numFields = numberOfFields(maxAlts, fmtHeader.Key) 60 if numFields == 1 { 61 s.WriteString("," + fmtHeader.Id + "_" + sample) 62 } else { 63 for i := 0; i < numFields; i++ { 64 s.WriteString("," + fmtHeader.Id + "_" + sample + fmt.Sprintf("_%d", i)) 65 } 66 } 67 } 68 } 69 70 _, err := fmt.Fprintln(outfile, s.String()) 71 exception.PanicOnErr(err) 72 return 73 } 74 75 func writeAsTable(s *strings.Builder, outfile io.Writer, v vcf.Vcf, header vcf.Header, infoOrder []vcf.InfoHeader, formatOrder []vcf.FormatHeader, maxAlts int) { 76 s.Reset() 77 78 // write basic data 79 s.WriteString(fmt.Sprintf("%s,%d,%s,%s,%s", 80 v.Chr, v.Pos, v.Id, v.Ref, strings.Join(v.Alt, ","))) 81 for i := len(v.Alt); i < maxAlts; i++ { 82 s.WriteString(",") 83 } 84 s.WriteString(fmt.Sprintf(",%g,%s", v.Qual, v.Filter)) 85 86 // write info data 87 v = vcf.ParseInfo(v, header) 88 for i := range infoOrder { 89 writeData(s, v, infoOrder[i].Key, numberOfFields(maxAlts, infoOrder[i].Key), 1) 90 } 91 92 // write format data 93 v = vcf.ParseFormat(v, header) 94 for i := range formatOrder { 95 writeData(s, v, formatOrder[i].Key, numberOfFields(maxAlts, formatOrder[i].Key), len(v.Samples)) 96 } 97 98 _, err := fmt.Fprintln(outfile, s.String()) 99 exception.PanicOnErr(err) 100 } 101 102 // getMaxAltCount reads through the input vcf file to determine the maximum number of alternate alleles present. 103 func getMaxAltCount(infile string) int { 104 var maxAlts int 105 records, _ := vcf.GoReadToChan(infile) 106 for v := range records { 107 if len(v.Alt) > maxAlts { 108 maxAlts = len(v.Alt) 109 } 110 } 111 return maxAlts 112 } 113 114 func numberOfFields(maxAlts int, k vcf.Key) int { 115 switch k.Number { 116 case "A": // == num alt alleles 117 return maxAlts 118 119 case "R": // == num ref + alt alleles 120 return maxAlts + 1 121 122 case "G": // one value for each possible genotype 123 return 1 //TODO once parser is updated 124 125 case ".": // wildcard. they never make it easy do they... 126 return 1 127 128 default: 129 num, err := strconv.Atoi(k.Number) 130 if err != nil { 131 log.Panicf("'%s' is not a valid Number for header info", k.Number) 132 } 133 return num 134 } 135 } 136 137 func writeData(s *strings.Builder, v vcf.Vcf, key vcf.Key, numberOfFieldsPerSample int, repeats int) { 138 var innerFieldsWritten, fieldsWritten, i, j int 139 140 switch key.DataType { 141 case vcf.Integer: 142 ints, found := vcf.QueryInt(v, key) 143 if !found { 144 break 145 } 146 for i = range ints { // per sample data 147 for j = range ints[i] { // per field data 148 s.WriteString(fmt.Sprintf(",%d", ints[i][j])) 149 fieldsWritten++ 150 innerFieldsWritten++ 151 } 152 for j = innerFieldsWritten; j < numberOfFieldsPerSample; j++ { 153 s.WriteString(",") 154 fieldsWritten++ 155 } 156 innerFieldsWritten = 0 157 } 158 159 case vcf.Float: 160 flts, found := vcf.QueryFloat(v, key) 161 if !found { 162 break 163 } 164 for i = range flts { // per sample data 165 for j = range flts[i] { // per field data 166 s.WriteString(fmt.Sprintf(",%g", flts[i][j])) 167 fieldsWritten++ 168 innerFieldsWritten++ 169 } 170 for j = innerFieldsWritten; j < numberOfFieldsPerSample; j++ { 171 s.WriteString(",") 172 fieldsWritten++ 173 } 174 innerFieldsWritten = 0 175 } 176 177 case vcf.String: 178 strs, found := vcf.QueryString(v, key) 179 if !found { 180 break 181 } 182 for i = range strs { // per sample data 183 for j = range strs[i] { // per field data 184 s.WriteString(fmt.Sprintf(",%s", strs[i][j])) 185 fieldsWritten++ 186 innerFieldsWritten++ 187 } 188 for j = innerFieldsWritten; j < numberOfFieldsPerSample; j++ { 189 s.WriteString(",") 190 fieldsWritten++ 191 } 192 innerFieldsWritten = 0 193 } 194 195 case vcf.Character: 196 chars, found := vcf.QueryRune(v, key) 197 if !found { 198 break 199 } 200 for i = range chars { // per sample data 201 for j = range chars[i] { // per field data 202 s.WriteString(fmt.Sprintf(",%c", chars[i][j])) 203 fieldsWritten++ 204 innerFieldsWritten++ 205 } 206 for j = innerFieldsWritten; j < numberOfFieldsPerSample; j++ { 207 s.WriteString(",") 208 fieldsWritten++ 209 } 210 innerFieldsWritten = 0 211 } 212 213 case vcf.Flag: 214 found := vcf.QueryFlag(v, key) 215 if found { 216 s.WriteString(",TRUE") 217 } else { 218 s.WriteString(",FALSE") 219 } 220 fieldsWritten++ 221 innerFieldsWritten++ 222 for j = innerFieldsWritten; j < numberOfFieldsPerSample; j++ { 223 s.WriteString(",") 224 fieldsWritten++ 225 } 226 innerFieldsWritten = 0 227 } 228 229 for j = fieldsWritten; j < numberOfFieldsPerSample*repeats; j++ { 230 s.WriteString(",") 231 } 232 }