github.com/vertgenlab/gonomics@v1.0.0/cmd/vcfFilter/expression.go (about)

     1  package main
     2  
     3  import (
     4  	"log"
     5  	"strconv"
     6  	"strings"
     7  
     8  	"github.com/vertgenlab/gonomics/vcf"
     9  )
    10  
    11  type operator string
    12  
    13  const (
    14  	equal        operator = "="
    15  	notEqual     operator = "!="
    16  	greater      operator = ">"
    17  	less         operator = "<"
    18  	greaterEqual operator = ">="
    19  	lessEqual    operator = "<="
    20  	present      operator = ""
    21  )
    22  
    23  // TODO complex expression parsing
    24  // parseExpression parses a ';' delimited string to a slice of boolean functions to test a vcf record.
    25  func parseExpression(input string, header vcf.Header, isFormatElseInfo bool, includeMissingInfo bool) testingFuncs {
    26  	var answer testingFuncs
    27  	input = strings.Trim(input, "\"") // trim quotations from the ends
    28  	expSlice := strings.Split(input, ";")
    29  	for _, exp := range expSlice {
    30  		var op operator
    31  		var tagValuePair []string
    32  		var tag, value string
    33  		op = searchOp(exp)
    34  		if op != present {
    35  			tagValuePair = strings.Split(exp, string(op))
    36  		} else {
    37  			tagValuePair = []string{exp}
    38  		}
    39  		tag = strings.Trim(tagValuePair[0], " ")
    40  		if len(tagValuePair) == 2 {
    41  			value = strings.Trim(tagValuePair[1], " ")
    42  		}
    43  		answer = append(answer, getRelationshipTest(tag, value, op, header, isFormatElseInfo, includeMissingInfo))
    44  	}
    45  	return answer
    46  }
    47  
    48  func searchOp(exp string) operator {
    49  	if strings.Contains(exp, ">=") {
    50  		return greaterEqual
    51  	}
    52  
    53  	if strings.Contains(exp, "<=") {
    54  		return lessEqual
    55  	}
    56  
    57  	if strings.Contains(exp, "!=") {
    58  		return notEqual
    59  	}
    60  
    61  	if strings.Contains(exp, "=") {
    62  		return equal
    63  	}
    64  
    65  	if strings.Contains(exp, ">") {
    66  		return greater
    67  	}
    68  
    69  	if strings.Contains(exp, "<") {
    70  		return less
    71  	}
    72  
    73  	return present
    74  }
    75  
    76  func getRelationshipTest(tag string, value string, r operator, header vcf.Header, isFormatElseInfo bool, includeMissingInfo bool) func(vcf.Vcf) bool {
    77  	var tagKey vcf.Key
    78  	if isFormatElseInfo {
    79  		tagKey = header.Format[tag].Key
    80  	} else {
    81  		tagKey = header.Info[tag].Key
    82  	}
    83  
    84  	if tagKey.Number != "1" && tagKey.Number != "0" {
    85  		log.Printf("WARNING: expressions for tags with multiple values will be true if any value passes filter. Tag '%s' has '%s' fields.", tag, tagKey.Number)
    86  	}
    87  
    88  	switch tagKey.DataType {
    89  	case vcf.Integer:
    90  		test := r.TestInteger()
    91  		val, err := strconv.Atoi(value)
    92  		if err != nil {
    93  			log.Fatalf("Error: value '%s' is not an integer as expected for tag '%s'.", value, tag)
    94  		}
    95  		return func(v vcf.Vcf) bool {
    96  			var answer bool = true
    97  			queryResult, found := vcf.QueryInt(v, tagKey)
    98  			if !found {
    99  				return includeMissingInfo
   100  			}
   101  			for _, recordVal := range queryResult[0] {
   102  				if !test(recordVal, val) {
   103  					answer = false
   104  				}
   105  			}
   106  			return answer
   107  		}
   108  
   109  	case vcf.Float:
   110  		test := r.TestFloat()
   111  		val, err := strconv.ParseFloat(value, 64)
   112  		if err != nil {
   113  			log.Fatalf("Error: value '%s' is not a float as expected for tag '%s'.", value, tag)
   114  		}
   115  		return func(v vcf.Vcf) bool {
   116  			var answer bool = true
   117  			queryResult, found := vcf.QueryFloat(v, tagKey)
   118  			if !found {
   119  				return includeMissingInfo
   120  			}
   121  			for _, recordVal := range queryResult[0] {
   122  				if !test(recordVal, val) {
   123  					answer = false
   124  				}
   125  			}
   126  			return answer
   127  		}
   128  
   129  	case vcf.Character:
   130  		test := r.TestCharacter()
   131  		if len(value) != 1 {
   132  			log.Fatalf("Error: value '%s' is not a character as expected for tag '%s'.", value, tag)
   133  		}
   134  		return func(v vcf.Vcf) bool {
   135  			var answer bool = true
   136  			queryResult, found := vcf.QueryRune(v, tagKey)
   137  			if !found {
   138  				return includeMissingInfo
   139  			}
   140  			for _, recordVal := range queryResult[0] {
   141  				if !test(recordVal, rune(value[0])) {
   142  					answer = false
   143  				}
   144  			}
   145  			return answer
   146  		}
   147  
   148  	case vcf.String:
   149  		test := r.TestString()
   150  		return func(v vcf.Vcf) bool {
   151  			var answer bool = true
   152  			queryResult, found := vcf.QueryString(v, tagKey)
   153  			if !found {
   154  				return includeMissingInfo
   155  			}
   156  			for _, recordVal := range queryResult[0] {
   157  				if !test(recordVal, value) {
   158  					answer = false
   159  				}
   160  			}
   161  			return answer
   162  		}
   163  
   164  	case vcf.Flag:
   165  		if value != "" {
   166  			log.Fatalf("Error: expression specified a value for '%s' but '%s' is of type flag. Flags must be in the form of '%s' or '!%s'", tag, tag, tag, tag)
   167  		}
   168  		return func(v vcf.Vcf) bool {
   169  			return vcf.QueryFlag(v, tagKey)
   170  		}
   171  
   172  	default:
   173  		log.Panic()
   174  		return nil
   175  	}
   176  }
   177  
   178  func (r operator) TestInteger() func(a, b int) bool {
   179  	switch r {
   180  	case equal:
   181  		return func(a, b int) bool { return a == b }
   182  	case notEqual:
   183  		return func(a, b int) bool { return a != b }
   184  	case greater:
   185  		return func(a, b int) bool { return a > b }
   186  	case less:
   187  		return func(a, b int) bool { return a < b }
   188  	case greaterEqual:
   189  		return func(a, b int) bool { return a >= b }
   190  	case lessEqual:
   191  		return func(a, b int) bool { return a <= b }
   192  	default:
   193  		log.Panicf("unrecognized operator '%v'", r)
   194  		return nil
   195  	}
   196  }
   197  
   198  func (r operator) TestFloat() func(a, b float64) bool {
   199  	switch r {
   200  	case equal:
   201  		return func(a, b float64) bool { return a == b }
   202  	case notEqual:
   203  		return func(a, b float64) bool { return a != b }
   204  	case greater:
   205  		return func(a, b float64) bool { return a > b }
   206  	case less:
   207  		return func(a, b float64) bool { return a < b }
   208  	case greaterEqual:
   209  		return func(a, b float64) bool { return a >= b }
   210  	case lessEqual:
   211  		return func(a, b float64) bool { return a <= b }
   212  	default:
   213  		log.Panicf("unrecognized operator '%v'", r)
   214  		return nil
   215  	}
   216  }
   217  
   218  func (r operator) TestString() func(a, b string) bool {
   219  	switch r {
   220  	case equal:
   221  		return func(a, b string) bool { return a == b }
   222  	case notEqual:
   223  		return func(a, b string) bool { return a != b }
   224  	case greater:
   225  		return func(a, b string) bool { return a > b }
   226  	case less:
   227  		return func(a, b string) bool { return a < b }
   228  	case greaterEqual:
   229  		return func(a, b string) bool { return a >= b }
   230  	case lessEqual:
   231  		return func(a, b string) bool { return a <= b }
   232  	default:
   233  		log.Panicf("unrecognized operator '%v'", r)
   234  		return nil
   235  	}
   236  }
   237  
   238  func (r operator) TestCharacter() func(a, b rune) bool {
   239  	switch r {
   240  	case equal:
   241  		return func(a, b rune) bool { return a == b }
   242  	case notEqual:
   243  		return func(a, b rune) bool { return a != b }
   244  	case greater:
   245  		return func(a, b rune) bool { return a > b }
   246  	case less:
   247  		return func(a, b rune) bool { return a < b }
   248  	case greaterEqual:
   249  		return func(a, b rune) bool { return a >= b }
   250  	case lessEqual:
   251  		return func(a, b rune) bool { return a <= b }
   252  	default:
   253  		log.Panicf("unrecognized operator '%v'", r)
   254  		return nil
   255  	}
   256  }