github.com/vertgenlab/gonomics@v1.0.0/cmd/vcfFormat/table.go (about)

     1  package main
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"log"
     7  	"strconv"
     8  	"strings"
     9  
    10  	"github.com/vertgenlab/gonomics/exception"
    11  	"github.com/vertgenlab/gonomics/vcf"
    12  )
    13  
    14  func writeTableHeader(outfile io.Writer, header vcf.Header, maxAlts int) (infoOrder []vcf.InfoHeader, formatOrder []vcf.FormatHeader) {
    15  	s := new(strings.Builder)
    16  	if len(header.Text) == 0 {
    17  		log.Fatal("ERROR: no vcf header found. Must have well-formed header to use -tsv")
    18  	}
    19  
    20  	// write common header components
    21  	s.WriteString("Chromosome,Position,ID,Reference")
    22  	if maxAlts == 1 {
    23  		s.WriteString(",Alternate")
    24  	} else {
    25  		for i := 0; i < maxAlts; i++ {
    26  			s.WriteString(",Alternate_" + fmt.Sprintf("%d", i))
    27  		}
    28  	}
    29  	s.WriteString(",Quality,Filter")
    30  
    31  	// write INFO field
    32  	var numFields int
    33  	for key, val := range header.Info {
    34  		numFields = numberOfFields(maxAlts, val.Key)
    35  		if numFields == 1 {
    36  			s.WriteString("," + key)
    37  		} else {
    38  			for i := 0; i < numFields; i++ {
    39  				s.WriteString("," + key + fmt.Sprintf("_%d", i))
    40  			}
    41  		}
    42  		infoOrder = append(infoOrder, val)
    43  	}
    44  
    45  	// get formatOrder
    46  	for _, val := range header.Format {
    47  		formatOrder = append(formatOrder, val)
    48  	}
    49  
    50  	// get sample order
    51  	sampleOrder := make([]string, len(header.Samples))
    52  	for key, val := range header.Samples {
    53  		sampleOrder[val] = key
    54  	}
    55  
    56  	// write format per sample
    57  	for _, fmtHeader := range formatOrder {
    58  		for _, sample := range sampleOrder {
    59  			numFields = numberOfFields(maxAlts, fmtHeader.Key)
    60  			if numFields == 1 {
    61  				s.WriteString("," + fmtHeader.Id + "_" + sample)
    62  			} else {
    63  				for i := 0; i < numFields; i++ {
    64  					s.WriteString("," + fmtHeader.Id + "_" + sample + fmt.Sprintf("_%d", i))
    65  				}
    66  			}
    67  		}
    68  	}
    69  
    70  	_, err := fmt.Fprintln(outfile, s.String())
    71  	exception.PanicOnErr(err)
    72  	return
    73  }
    74  
    75  func writeAsTable(s *strings.Builder, outfile io.Writer, v vcf.Vcf, header vcf.Header, infoOrder []vcf.InfoHeader, formatOrder []vcf.FormatHeader, maxAlts int) {
    76  	s.Reset()
    77  
    78  	// write basic data
    79  	s.WriteString(fmt.Sprintf("%s,%d,%s,%s,%s",
    80  		v.Chr, v.Pos, v.Id, v.Ref, strings.Join(v.Alt, ",")))
    81  	for i := len(v.Alt); i < maxAlts; i++ {
    82  		s.WriteString(",")
    83  	}
    84  	s.WriteString(fmt.Sprintf(",%g,%s", v.Qual, v.Filter))
    85  
    86  	// write info data
    87  	v = vcf.ParseInfo(v, header)
    88  	for i := range infoOrder {
    89  		writeData(s, v, infoOrder[i].Key, numberOfFields(maxAlts, infoOrder[i].Key), 1)
    90  	}
    91  
    92  	// write format data
    93  	v = vcf.ParseFormat(v, header)
    94  	for i := range formatOrder {
    95  		writeData(s, v, formatOrder[i].Key, numberOfFields(maxAlts, formatOrder[i].Key), len(v.Samples))
    96  	}
    97  
    98  	_, err := fmt.Fprintln(outfile, s.String())
    99  	exception.PanicOnErr(err)
   100  }
   101  
   102  // getMaxAltCount reads through the input vcf file to determine the maximum number of alternate alleles present.
   103  func getMaxAltCount(infile string) int {
   104  	var maxAlts int
   105  	records, _ := vcf.GoReadToChan(infile)
   106  	for v := range records {
   107  		if len(v.Alt) > maxAlts {
   108  			maxAlts = len(v.Alt)
   109  		}
   110  	}
   111  	return maxAlts
   112  }
   113  
   114  func numberOfFields(maxAlts int, k vcf.Key) int {
   115  	switch k.Number {
   116  	case "A": // == num alt alleles
   117  		return maxAlts
   118  
   119  	case "R": // == num ref + alt alleles
   120  		return maxAlts + 1
   121  
   122  	case "G": // one value for each possible genotype
   123  		return 1 //TODO once parser is updated
   124  
   125  	case ".": // wildcard. they never make it easy do they...
   126  		return 1
   127  
   128  	default:
   129  		num, err := strconv.Atoi(k.Number)
   130  		if err != nil {
   131  			log.Panicf("'%s' is not a valid Number for header info", k.Number)
   132  		}
   133  		return num
   134  	}
   135  }
   136  
   137  func writeData(s *strings.Builder, v vcf.Vcf, key vcf.Key, numberOfFieldsPerSample int, repeats int) {
   138  	var innerFieldsWritten, fieldsWritten, i, j int
   139  
   140  	switch key.DataType {
   141  	case vcf.Integer:
   142  		ints, found := vcf.QueryInt(v, key)
   143  		if !found {
   144  			break
   145  		}
   146  		for i = range ints { // per sample data
   147  			for j = range ints[i] { // per field data
   148  				s.WriteString(fmt.Sprintf(",%d", ints[i][j]))
   149  				fieldsWritten++
   150  				innerFieldsWritten++
   151  			}
   152  			for j = innerFieldsWritten; j < numberOfFieldsPerSample; j++ {
   153  				s.WriteString(",")
   154  				fieldsWritten++
   155  			}
   156  			innerFieldsWritten = 0
   157  		}
   158  
   159  	case vcf.Float:
   160  		flts, found := vcf.QueryFloat(v, key)
   161  		if !found {
   162  			break
   163  		}
   164  		for i = range flts { // per sample data
   165  			for j = range flts[i] { // per field data
   166  				s.WriteString(fmt.Sprintf(",%g", flts[i][j]))
   167  				fieldsWritten++
   168  				innerFieldsWritten++
   169  			}
   170  			for j = innerFieldsWritten; j < numberOfFieldsPerSample; j++ {
   171  				s.WriteString(",")
   172  				fieldsWritten++
   173  			}
   174  			innerFieldsWritten = 0
   175  		}
   176  
   177  	case vcf.String:
   178  		strs, found := vcf.QueryString(v, key)
   179  		if !found {
   180  			break
   181  		}
   182  		for i = range strs { // per sample data
   183  			for j = range strs[i] { // per field data
   184  				s.WriteString(fmt.Sprintf(",%s", strs[i][j]))
   185  				fieldsWritten++
   186  				innerFieldsWritten++
   187  			}
   188  			for j = innerFieldsWritten; j < numberOfFieldsPerSample; j++ {
   189  				s.WriteString(",")
   190  				fieldsWritten++
   191  			}
   192  			innerFieldsWritten = 0
   193  		}
   194  
   195  	case vcf.Character:
   196  		chars, found := vcf.QueryRune(v, key)
   197  		if !found {
   198  			break
   199  		}
   200  		for i = range chars { // per sample data
   201  			for j = range chars[i] { // per field data
   202  				s.WriteString(fmt.Sprintf(",%c", chars[i][j]))
   203  				fieldsWritten++
   204  				innerFieldsWritten++
   205  			}
   206  			for j = innerFieldsWritten; j < numberOfFieldsPerSample; j++ {
   207  				s.WriteString(",")
   208  				fieldsWritten++
   209  			}
   210  			innerFieldsWritten = 0
   211  		}
   212  
   213  	case vcf.Flag:
   214  		found := vcf.QueryFlag(v, key)
   215  		if found {
   216  			s.WriteString(",TRUE")
   217  		} else {
   218  			s.WriteString(",FALSE")
   219  		}
   220  		fieldsWritten++
   221  		innerFieldsWritten++
   222  		for j = innerFieldsWritten; j < numberOfFieldsPerSample; j++ {
   223  			s.WriteString(",")
   224  			fieldsWritten++
   225  		}
   226  		innerFieldsWritten = 0
   227  	}
   228  
   229  	for j = fieldsWritten; j < numberOfFieldsPerSample*repeats; j++ {
   230  		s.WriteString(",")
   231  	}
   232  }