github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/sam/auxtags.go (about) 1 // Copyright ©2012 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package sam 6 7 import ( 8 "bytes" 9 "encoding/binary" 10 "encoding/hex" 11 "fmt" 12 "math" 13 "reflect" 14 "strconv" 15 "unsafe" 16 ) 17 18 // ASCII is a printable ASCII character included in an Aux tag. 19 type ASCII byte 20 21 // Hex is a byte slice represented as a hex string in an Aux tag. 22 type Hex []byte 23 24 // Text is a byte slice represented as a string in an Aux tag. 25 type Text []byte 26 27 // An Aux represents an auxiliary data field from a SAM alignment record. 28 type Aux []byte 29 30 // NewAux returns a new Aux with the given tag, type and value. Acceptable value 31 // types and their corresponding SAM type are: 32 // 33 // A - ASCII 34 // c - int8 35 // C - uint8 36 // s - int16 37 // S - uint16 38 // i - int, uint or int32 39 // I - int, uint or uint32 40 // f - float32 41 // Z - Text or string 42 // H - Hex 43 // B - []int8, []int16, []int32, []uint8, []uint16, []uint32 or []float32 44 // 45 // The handling of int and uint types is provided as a convenience - values must 46 // fit within either int32 or uint32 and are converted to the smallest possible 47 // representation. 48 // 49 func NewAux(t Tag, value interface{}) (Aux, error) { 50 var a Aux 51 switch v := value.(type) { 52 case ASCII: 53 a = Aux{t[0], t[1], 'A', byte(v)} 54 case int: 55 switch { 56 case math.MinInt8 <= v && v <= math.MaxInt8: 57 a = Aux{t[0], t[1], 'c', byte(v)} 58 case math.MinInt16 <= v && v <= math.MaxInt16: 59 a = Aux{t[0], t[1], 's', 0, 0} 60 binary.LittleEndian.PutUint16(a[3:5], uint16(v)) 61 case math.MinInt32 <= v && v <= math.MaxInt32: 62 a = Aux{t[0], t[1], 'i', 0, 0, 0, 0} 63 binary.LittleEndian.PutUint32(a[3:7], uint32(v)) 64 default: 65 return nil, fmt.Errorf("sam: integer value out of range %d > %d", v, math.MaxInt32) 66 } 67 case uint: 68 switch { 69 case v <= math.MaxUint8: 70 a = Aux{t[0], t[1], 'C', byte(v)} 71 case v <= math.MaxUint16: 72 a = Aux{t[0], t[1], 'S', 0, 0} 73 binary.LittleEndian.PutUint16(a[3:5], uint16(v)) 74 case v <= math.MaxUint32: 75 a = Aux{t[0], t[1], 'I', 0, 0, 0, 0} 76 binary.LittleEndian.PutUint32(a[3:7], uint32(v)) 77 default: 78 return nil, fmt.Errorf("sam: unsigned integer value out of range %d > %d", v, uint(math.MaxUint32)) 79 } 80 case int8: 81 a = Aux{t[0], t[1], 'c', byte(v)} 82 case uint8: 83 a = Aux{t[0], t[1], 'C', v} 84 case int16: 85 a = Aux{t[0], t[1], 's', 0, 0} 86 binary.LittleEndian.PutUint16(a[3:5], uint16(v)) 87 case uint16: 88 a = Aux{t[0], t[1], 'S', 0, 0} 89 binary.LittleEndian.PutUint16(a[3:5], v) 90 case int32: 91 a = Aux{t[0], t[1], 'i', 0, 0, 0, 0} 92 binary.LittleEndian.PutUint32(a[3:7], uint32(v)) 93 case uint32: 94 a = Aux{t[0], t[1], 'I', 0, 0, 0, 0} 95 binary.LittleEndian.PutUint32(a[3:7], v) 96 case float32: 97 a = Aux{t[0], t[1], 'f', 0, 0, 0, 0} 98 binary.LittleEndian.PutUint32(a[3:7], math.Float32bits(v)) 99 case Text: 100 a = make(Aux, len(v)+3) 101 a[0], a[1], a[2] = t[0], t[1], 'Z' 102 copy(a[3:], v) 103 case string: 104 a = make(Aux, len(v)+3) 105 a[0], a[1], a[2] = t[0], t[1], 'Z' 106 copy(a[3:], v) 107 case Hex: 108 a = make(Aux, 3, len(v)+3) 109 copy(a, Aux{t[0], t[1], 'H'}) 110 a = append(a, v...) 111 default: 112 rv := reflect.ValueOf(value) 113 rt := rv.Type() 114 if k := rt.Kind(); k != reflect.Array && k != reflect.Slice { 115 return nil, fmt.Errorf("sam: unknown type %T", value) 116 } 117 l := rv.Len() 118 if uint(l) > math.MaxUint32 { 119 return nil, fmt.Errorf("sam: array too long") 120 } 121 a = Aux{t[0], t[1], 'B', 0xff, 0, 0, 0, 0} 122 binary.LittleEndian.PutUint32([]byte(a[4:8]), uint32(l)) 123 124 switch rt.Elem().Kind() { 125 case reflect.Int8: 126 a[3] = 'c' 127 value := value.([]int8) 128 b := *(*[]byte)(unsafe.Pointer(&value)) 129 return append(a, b...), nil 130 case reflect.Uint8: 131 a[3] = 'C' 132 return append(a, value.([]uint8)...), nil 133 case reflect.Int16: 134 a[3] = 's' 135 case reflect.Uint16: 136 a[3] = 'S' 137 case reflect.Int32: 138 a[3] = 'i' 139 case reflect.Uint32: 140 a[3] = 'I' 141 case reflect.Float32: 142 a[3] = 'f' 143 default: 144 return nil, fmt.Errorf("sam: unsupported array type: %T", value) 145 } 146 buf := bytes.NewBuffer(a) 147 err := binary.Write(buf, binary.LittleEndian, value) 148 a = buf.Bytes() 149 if err != nil { 150 return nil, fmt.Errorf("sam: failed to encode array: %v", err) 151 } 152 } 153 return a, nil 154 } 155 156 // ParseAux returns an AUX parsed from the given text. 157 func ParseAux(text []byte) (Aux, error) { 158 // TG:T:v... 159 // 012345... 160 if len(text) < 6 || text[2] != ':' || text[4] != ':' { 161 return nil, fmt.Errorf("sam: invalid aux tag field: %q", text) 162 } 163 txt := text[5:] 164 var value interface{} 165 switch typ := text[3]; typ { 166 case 'A': 167 if len(txt) != 1 { 168 return nil, fmt.Errorf("sam: invalid aux tag field: %q", text) 169 } 170 value = ASCII(txt[0]) 171 case 'i': 172 i, err := strconv.Atoi(string(txt)) 173 if err != nil { 174 return nil, fmt.Errorf("sam: invalid aux tag field: %v", err) 175 } 176 if i < 0 { 177 value = i 178 } else { 179 value = uint(i) 180 } 181 case 'f': 182 f, err := strconv.ParseFloat(string(txt), 32) 183 if err != nil { 184 return nil, fmt.Errorf("sam: invalid aux tag field: %v", err) 185 } 186 value = float32(f) 187 case 'Z': 188 value = Text(txt) 189 case 'H': 190 b := make([]byte, hex.DecodedLen(len(txt))) 191 _, err := hex.Decode(b, txt) 192 if err != nil { 193 return nil, fmt.Errorf("sam: invalid aux tag field: %v", err) 194 } 195 value = Hex(b) 196 case 'B': 197 if txt[1] != ',' { 198 return nil, fmt.Errorf("sam: invalid aux tag field: %q", text) 199 } 200 nf := bytes.Split(txt[2:], []byte{','}) 201 if len(nf) == 0 { 202 return nil, fmt.Errorf("sam: invalid aux tag field: %q", text) 203 } 204 switch txt[0] { 205 case 'c': 206 a := make([]int8, len(nf)) 207 for i, n := range nf { 208 v, err := strconv.ParseInt(string(n), 0, 8) 209 if err != nil { 210 return nil, fmt.Errorf("sam: invalid aux tag field: %v", err) 211 } 212 a[i] = int8(v) 213 } 214 value = a 215 case 'C': 216 a := make([]uint8, len(nf)) 217 for i, n := range nf { 218 v, err := strconv.ParseUint(string(n), 0, 8) 219 if err != nil { 220 return nil, fmt.Errorf("sam: invalid aux tag field: %v", err) 221 } 222 a[i] = uint8(v) 223 } 224 value = a 225 case 's': 226 a := make([]int16, len(nf)) 227 for i, n := range nf { 228 v, err := strconv.ParseInt(string(n), 0, 16) 229 if err != nil { 230 return nil, fmt.Errorf("sam: invalid aux tag field: %v", err) 231 } 232 a[i] = int16(v) 233 } 234 value = a 235 case 'S': 236 a := make([]uint16, len(nf)) 237 for i, n := range nf { 238 v, err := strconv.ParseUint(string(n), 0, 16) 239 if err != nil { 240 return nil, fmt.Errorf("sam: invalid aux tag field: %v", err) 241 } 242 a[i] = uint16(v) 243 } 244 value = a 245 case 'i': 246 a := make([]int32, len(nf)) 247 for i, n := range nf { 248 v, err := strconv.ParseInt(string(n), 0, 32) 249 if err != nil { 250 return nil, fmt.Errorf("sam: invalid aux tag field: %v", err) 251 } 252 a[i] = int32(v) 253 } 254 value = a 255 case 'I': 256 a := make([]uint32, len(nf)) 257 for i, n := range nf { 258 v, err := strconv.ParseUint(string(n), 0, 32) 259 if err != nil { 260 return nil, fmt.Errorf("sam: invalid aux tag field: %v", err) 261 } 262 a[i] = uint32(v) 263 } 264 value = a 265 case 'f': 266 a := make([]float32, len(nf)) 267 for i, n := range nf { 268 f, err := strconv.ParseFloat(string(n), 32) 269 if err != nil { 270 return nil, fmt.Errorf("sam: invalid aux tag field: %v", err) 271 } 272 a[i] = float32(f) 273 } 274 value = a 275 default: 276 return nil, fmt.Errorf("sam: invalid aux tag field: %q", text) 277 } 278 default: 279 return nil, fmt.Errorf("sam: invalid aux tag field: %q", text) 280 } 281 aux, err := NewAux(Tag{text[0], text[1]}, value) 282 if err != nil { 283 return nil, fmt.Errorf("sam: invalid aux tag field: %v", err) 284 } 285 return aux, nil 286 } 287 288 var auxKind = [256]byte{ 289 'A': 'A', 290 'c': 'i', 'C': 'i', 291 's': 'i', 'S': 'i', 292 'i': 'i', 'I': 'i', 293 'f': 'f', 294 'Z': 'Z', 295 'H': 'H', 296 'B': 'B', 297 } 298 299 // String returns the string representation of an Aux type. 300 func (a Aux) String() string { 301 switch a.Type() { 302 case 'A': 303 return fmt.Sprintf("%s:%c:%c", []byte(a[:2]), a.Kind(), a.Value()) 304 case 'H': 305 return fmt.Sprintf("%s:%c:%02x", []byte(a[:2]), a.Kind(), a.Value()) 306 case 'B': 307 return fmt.Sprintf("%s:%c:%c:%v", []byte(a[:2]), a.Kind(), a[3], a.Value()) 308 } 309 return fmt.Sprintf("%s:%c:%v", []byte(a[:2]), a.Kind(), a.Value()) 310 } 311 312 // samAux implements SAM aux field formatting. 313 type samAux Aux 314 315 // String returns the string representation of an Aux type. 316 func (sa samAux) String() string { 317 a := Aux(sa) 318 switch a.Type() { 319 case 'A': 320 return fmt.Sprintf("%s:%c:%c", []byte(a[:2]), a.Kind(), a.Value()) 321 case 'H': 322 return fmt.Sprintf("%s:%c:%02x", []byte(a[:2]), a.Kind(), a.Value()) 323 case 'B': 324 var buf bytes.Buffer 325 fmt.Fprintf(&buf, "%s:%c:%c", []byte(a[:2]), a.Kind(), a[3]) 326 rv := reflect.ValueOf(a.Value()) 327 for i := 0; i < rv.Len(); i++ { 328 fmt.Fprintf(&buf, ",%v", rv.Index(i).Interface()) 329 } 330 return buf.String() 331 } 332 return fmt.Sprintf("%s:%c:%v", []byte(a[:2]), a.Kind(), a.Value()) 333 } 334 335 // A Tag represents an auxiliary or header tag label. 336 type Tag [2]byte 337 338 var ( 339 headerTag = Tag{'H', 'D'} 340 versionTag = Tag{'V', 'N'} 341 sortOrderTag = Tag{'S', 'O'} 342 groupOrderTag = Tag{'G', 'O'} 343 344 refDictTag = Tag{'S', 'Q'} 345 refNameTag = Tag{'S', 'N'} 346 refLengthTag = Tag{'L', 'N'} 347 alternativeLocus = Tag{'A', 'H'} // nolint 348 assemblyIDTag = Tag{'A', 'S'} 349 md5Tag = Tag{'M', '5'} 350 speciesTag = Tag{'S', 'P'} 351 uriTag = Tag{'U', 'R'} 352 353 readGroupTag = Tag{'R', 'G'} 354 centerTag = Tag{'C', 'N'} 355 descriptionTag = Tag{'D', 'S'} 356 dateTag = Tag{'D', 'T'} 357 flowOrderTag = Tag{'F', 'O'} 358 keySequenceTag = Tag{'K', 'S'} 359 libraryTag = Tag{'L', 'B'} 360 insertSizeTag = Tag{'P', 'I'} 361 platformTag = Tag{'P', 'L'} 362 platformUnitTag = Tag{'P', 'U'} 363 sampleTag = Tag{'S', 'M'} 364 365 programTag = Tag{'P', 'G'} 366 idTag = Tag{'I', 'D'} 367 programNameTag = Tag{'P', 'N'} 368 commandLineTag = Tag{'C', 'L'} 369 previousProgTag = Tag{'P', 'P'} 370 progDesc = Tag{'D', 'S'} // nolint 371 372 bagIDTag = Tag{'D', 'I'} 373 bagSizeTag = Tag{'D', 'S'} 374 dupTypeTag = Tag{'D', 'T'} 375 libraryBagSizeTag = Tag{'D', 'L'} 376 linearDupTag = Tag{'L', 'D'} 377 linearBagIDTag = Tag{'L', 'I'} 378 linearBagSizeTag = Tag{'L', 'S'} 379 380 commentTag = Tag{'C', 'O'} 381 ) 382 383 // NewTag returns a Tag from the tag string. It panics is len(tag) != 2. 384 func NewTag(tag string) Tag { 385 var t Tag 386 if copy(t[:], tag) != 2 { 387 panic("sam: illegal tag length") 388 } 389 return t 390 } 391 392 // String returns a string representation of a Tag. 393 func (t Tag) String() string { return string(t[:]) } 394 395 // Tag returns the Tag representation of the Aux tag ID. 396 func (a Aux) Tag() Tag { var t Tag; copy(t[:], a[:2]); return t } 397 398 // Type returns a byte corresponding to the type of the auxiliary tag. 399 // Returned values are in {'A', 'c', 'C', 's', 'S', 'i', 'I', 'f', 'Z', 'H', 'B'}. 400 func (a Aux) Type() byte { return a[2] } 401 402 // Kind returns a byte corresponding to the kind of the auxiliary tag. 403 // Returned values are in {'A', 'i', 'f', 'Z', 'H', 'B'}. 404 func (a Aux) Kind() byte { return auxKind[a[2]] } 405 406 // Value returns v containing the value of the auxiliary tag. 407 func (a Aux) Value() interface{} { 408 switch t := a.Type(); t { 409 case 'A': 410 return a[3] 411 case 'c': 412 return int8(a[3]) 413 case 'C': 414 return uint8(a[3]) 415 case 's': 416 return int16(binary.LittleEndian.Uint16(a[3:5])) 417 case 'S': 418 return binary.LittleEndian.Uint16(a[3:5]) 419 case 'i': 420 return int32(binary.LittleEndian.Uint32(a[3:7])) 421 case 'I': 422 return binary.LittleEndian.Uint32(a[3:7]) 423 case 'f': 424 return math.Float32frombits(binary.LittleEndian.Uint32(a[3:7])) 425 case 'Z': // Z and H Require that parsing stops before the terminating zero. 426 return string(a[3:]) 427 case 'H': 428 return []byte(a[3:]) 429 case 'B': 430 length := int32(binary.LittleEndian.Uint32(a[4:8])) 431 switch t := a[3]; t { 432 case 'c': 433 c := a[8:] 434 return *(*[]int8)(unsafe.Pointer(&c)) 435 case 'C': 436 return []uint8(a[8:]) 437 case 's': 438 Bs := make([]int16, length) 439 err := binary.Read(bytes.NewBuffer(a[8:]), binary.LittleEndian, &Bs) 440 if err != nil { 441 panic(fmt.Sprintf("sam: binary.Read of s field failed: %v", err)) 442 } 443 return Bs 444 case 'S': 445 BS := make([]uint16, length) 446 err := binary.Read(bytes.NewBuffer(a[8:]), binary.LittleEndian, &BS) 447 if err != nil { 448 panic(fmt.Sprintf("sam: binary.Read of S field failed: %v", err)) 449 } 450 return BS 451 case 'i': 452 Bi := make([]int32, length) 453 err := binary.Read(bytes.NewBuffer(a[8:]), binary.LittleEndian, &Bi) 454 if err != nil { 455 panic(fmt.Sprintf("sam: binary.Read of i field failed: %v", err)) 456 } 457 return Bi 458 case 'I': 459 BI := make([]uint32, length) 460 err := binary.Read(bytes.NewBuffer(a[8:]), binary.LittleEndian, &BI) 461 if err != nil { 462 panic(fmt.Sprintf("sam: binary.Read of I field failed: %v", err)) 463 } 464 return BI 465 case 'f': 466 Bf := make([]float32, length) 467 err := binary.Read(bytes.NewBuffer(a[8:]), binary.LittleEndian, &Bf) 468 if err != nil { 469 panic(fmt.Sprintf("sam: binary.Read of f field failed: %v", err)) 470 } 471 return Bf 472 default: 473 return fmt.Errorf("%%B!(UNKNOWN ARRAY type=%c)", t) 474 } 475 default: 476 return fmt.Errorf("%%?!(UNKNOWN type=%c)", t) 477 } 478 } 479 480 func (a Aux) matches(tag []byte) bool { 481 return a[1] == tag[1] && a[0] == tag[0] 482 } 483 484 // AuxFields is a set of auxiliary fields. 485 type AuxFields []Aux 486 487 // Get returns the auxiliary field identified by the given tag, or nil 488 // if no field matches. 489 func (a AuxFields) Get(tag Tag) Aux { 490 for _, f := range a { 491 if f.Tag() == tag { 492 return f 493 } 494 } 495 return nil 496 } 497 498 // GetUnique returns an error if the tag appears more than once, and is 499 // otherwise identical to Get. 500 func (a AuxFields) GetUnique(tag Tag) (Aux, error) { 501 for i, f := range a { 502 if f.Tag() == tag { 503 for _, f2 := range a[i+1:] { 504 if f2.Tag() == tag { 505 return nil, fmt.Errorf("sam.GetUnique: tag %v appears multiple times", tag) 506 } 507 } 508 return f, nil 509 } 510 } 511 return nil, nil 512 }