github.com/vertgenlab/gonomics@v1.0.0/cigar/byteCigar.go (about) 1 package cigar 2 3 import ( 4 "github.com/vertgenlab/gonomics/exception" 5 "github.com/vertgenlab/gonomics/numbers/parse" 6 "log" 7 "strconv" 8 "strings" 9 ) 10 11 // ByteCigar struct encodes sequence comparison operations and includes run length info. 12 type ByteCigar struct { 13 RunLen uint16 14 Op byte 15 } 16 17 // Defined const for byte cigar. 18 const ( 19 Match byte = 'M' 20 Insertion byte = 'I' 21 Deletion byte = 'D' 22 N byte = 'N' 23 SoftClip byte = 'S' 24 HardClip byte = 'H' 25 Padded byte = 'P' 26 Equal byte = '=' 27 Mismatch byte = 'X' 28 Unknown byte = '*' 29 ) 30 31 // LookUpCigByte is a helper function decode uint32 into a byte cigar struct. 32 // In the sam/bam specs: CIGAR: op len<<4|op. Hash is as follows: ‘MIDNSHP=X’→‘012345678’. 33 func lookUpCigByte(op uint32) byte { 34 switch op { 35 case 0: 36 return Match 37 case 1: 38 return Insertion 39 case 2: 40 return Deletion 41 case 3: 42 return N 43 case 4: 44 return SoftClip 45 case 5: 46 return HardClip 47 case 6: 48 return Padded 49 case 7: 50 return Equal 51 case 8: 52 return Mismatch 53 default: 54 log.Fatalf("Error: cound not identify input byte") 55 return 0 56 } 57 } 58 59 // lookUpUint32 will return a uint32 representation of a cigar Op. 60 func lookUpUint32(op byte) uint32 { 61 switch op { 62 case Match: 63 return 0 64 case Insertion: 65 return 1 66 case Deletion: 67 return 2 68 case N: 69 return 3 70 case SoftClip: 71 return 4 72 case HardClip: 73 return 5 74 case Padded: 75 return 6 76 case Equal: 77 return 7 78 case Mismatch: 79 return 8 80 default: 81 log.Fatalf("Error: cound not identify input byte") 82 return 0 83 } 84 } 85 86 // ReadToBytesCigar will process a byte slice and define a small. 87 func ReadToBytesCigar(cigar []byte) []ByteCigar { 88 if cigar[0] == '*' { 89 return nil 90 } 91 var ans []ByteCigar = make([]ByteCigar, 0, 1) 92 var lastNum int = 0 93 for i := 0; i < len(cigar); i++ { 94 if IsValidCigar(cigar[i]) { 95 ans = append(ans, ByteCigar{RunLen: parse.StringToUint16(string(cigar[lastNum:i])), Op: cigar[i]}) 96 lastNum = i + 1 97 } 98 } 99 return ans 100 } 101 102 // IsValidCigar will perform a check to make sure op is a valid byte. 103 func IsValidCigar(op byte) bool { 104 switch op { 105 case 'M': 106 return true 107 case 'I': 108 return true 109 case 'D': 110 return true 111 case 'N': 112 return true 113 case 'S': 114 return true 115 case 'H': 116 return true 117 case 'P': 118 return true 119 case '=': 120 return true 121 case 'X': 122 return true 123 case '*': 124 return true 125 default: 126 return false 127 } 128 return false 129 } 130 131 // ByteCigarToString will process the cigar byte struct and parse and/or convert the data into a string. 132 func ByteCigarToString(cigar []ByteCigar) string { 133 if len(cigar) == 0 || cigar == nil { 134 return "*" 135 } 136 var str strings.Builder 137 var err error 138 for _, c := range cigar { 139 _, err = str.WriteString(strconv.Itoa(int(c.RunLen))) 140 exception.FatalOnErr(err) 141 err = str.WriteByte(c.Op) 142 exception.FatalOnErr(err) 143 } 144 return str.String() 145 } 146 147 // ByteMatrixTrace will trace smith-waterman matrix alignment and return one of 3 cigar Op's. 148 // M: matches or mismatches, I: insertions, D: for deletions. 149 func ByteMatrixTrace(a int64, b int64, c int64) (int64, byte) { 150 if a >= b && a >= c { 151 return a, 'M' 152 } else if b >= c { 153 return b, 'I' 154 } else { 155 return c, 'D' 156 } 157 } 158 159 // ReverseBytesCigar cigar will reverse the order of a cigar slice. Typically performed after matrix traceback 160 // from a local alignment. 161 func ReverseBytesCigar(alpha []ByteCigar) { 162 var i, off int 163 for i = len(alpha)/2 - 1; i >= 0; i-- { 164 off = len(alpha) - 1 - i 165 alpha[i], alpha[off] = alpha[off], alpha[i] 166 } 167 } 168 169 // QueryLength calculates the length of the query read from a slice of Cigar structs. 170 func QueryRunLen(c []ByteCigar) int { 171 if c == nil { 172 return 0 173 } 174 var ans uint16 175 for _, v := range c { 176 if v.Op == Match || v.Op == Insertion || v.Op == SoftClip || v.Op == Equal || v.Op == Mismatch { 177 ans = ans + v.RunLen 178 } 179 } 180 return int(ans) 181 } 182 183 // CatByteCigar will concatenate two cigar slices into one merged. 184 func CatByteCigar(cigs []ByteCigar, newCigs []ByteCigar) []ByteCigar { 185 if len(newCigs) == 0 || newCigs == nil { 186 return cigs 187 } else if len(cigs) == 0 { 188 return newCigs 189 } else { 190 cigs = AddCigarByte(cigs, newCigs[0]) 191 cigs = append(cigs, newCigs[1:]...) 192 return cigs 193 } 194 } 195 196 // AddCigarByte will add append a cigar byte to an existing slice. The function 197 // will perform a check on the tail of the slice and incurment the run length if the 198 // cigar Op values are the same. 199 func AddCigarByte(cigs []ByteCigar, newCig ByteCigar) []ByteCigar { 200 if len(cigs) == 0 { 201 cigs = append(cigs, newCig) 202 } else if cigs[len(cigs)-1].Op == newCig.Op { 203 cigs[len(cigs)-1].RunLen += newCig.RunLen 204 } else { 205 cigs = append(cigs, newCig) 206 } 207 return cigs 208 } 209 210 // MatrixSetup will allocate memory for smith-waterman matrix to be used with byte cigar opertations and trace back. 211 func MatrixSetup(size int) ([][]int64, [][]byte) { 212 m := make([][]int64, size) 213 trace := make([][]byte, size) 214 for idx := range m { 215 m[idx] = make([]int64, size) 216 trace[idx] = make([]byte, size) 217 } 218 return m, trace 219 } 220 221 // Uint32ToByteCigar will process a uint32 slice and decode each number into a byte cigar struct. 222 // CIGAR operation lengths are limited to 2^28-1 in the current sam/bam formats. 223 func Uint32ToByteCigar(cigar []uint32) []ByteCigar { 224 var answer []ByteCigar = make([]ByteCigar, len(cigar)) 225 for i := 0; i < len(cigar); i++ { 226 answer[i] = ByteCigar{RunLen: uint16(cigar[i] >> 4), Op: lookUpCigByte(cigar[i] & 0xf)} 227 } 228 return answer 229 } 230 231 // ByteCigarToUint32 will convert a slice of []ByteCigar to a slice of []uint32. 232 func ByteCigarToUint32(cigar []ByteCigar) []uint32 { 233 var answer []uint32 = make([]uint32, len(cigar)) 234 for i := 0; i < len(cigar); i++ { 235 answer[i] = lookUpUint32(cigar[i].Op) | uint32(cigar[i].RunLen)<<4 236 } 237 return answer 238 }