github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/sam/reference.go (about) 1 // Copyright ©2012 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package sam 6 7 import ( 8 "bytes" 9 "encoding/hex" 10 "errors" 11 "fmt" 12 "net/url" 13 "sort" 14 "strconv" 15 ) 16 17 // Reference is a mapping reference. 18 type Reference struct { 19 owner *Header 20 id int32 21 name string 22 lRef int32 23 md5 string 24 assemID string 25 species string 26 uri *url.URL 27 otherTags []tagPair 28 } 29 30 // NewReference returns a new Reference based on the given parameters. 31 // Only name and length are mandatory and length must be a valid reference 32 // length according to the SAM specification, [1, 1<<31). 33 func NewReference(name, assemID, species string, length int, md5 []byte, uri *url.URL) (*Reference, error) { 34 if !validLen(length) { 35 return nil, errors.New("sam: length out of range") 36 } 37 if name == "" { 38 return nil, errors.New("sam: no name provided") 39 } 40 var h string 41 if md5 != nil { 42 if len(md5) != 16 { 43 return nil, errors.New("sam: invalid md5 sum length") 44 } 45 h = string(md5[:]) 46 } 47 return &Reference{ 48 id: -1, // This is altered by a Header when added. 49 name: name, 50 lRef: int32(length), 51 md5: h, 52 assemID: assemID, 53 species: species, 54 uri: uri, 55 }, nil 56 } 57 58 // ID returns the header ID of the Reference. 59 func (r *Reference) ID() int { 60 if r == nil { 61 return -1 62 } 63 return int(r.id) 64 } 65 66 // Name returns the reference name. 67 func (r *Reference) Name() string { 68 if r == nil { 69 return "*" 70 } 71 return r.name 72 } 73 74 // SetName sets the reference name to n. 75 func (r *Reference) SetName(n string) error { 76 if r.owner != nil { 77 id, exists := r.owner.seenRefs[n] 78 if exists { 79 if id != r.id { 80 return errors.New("sam: name exists") 81 } 82 return nil 83 } 84 delete(r.owner.seenRefs, r.name) 85 r.owner.seenRefs[n] = r.id 86 } 87 r.name = n 88 return nil 89 } 90 91 // AssemblyID returns the assembly ID of the reference. 92 func (r *Reference) AssemblyID() string { 93 if r == nil { 94 return "" 95 } 96 return r.assemID 97 } 98 99 // Species returns the reference species. 100 func (r *Reference) Species() string { 101 if r == nil { 102 return "" 103 } 104 return r.species 105 } 106 107 // MD5 returns a 16 byte slice holding the MD5 sum of the reference sequence. 108 func (r *Reference) MD5() []byte { 109 if r == nil || r.md5 == "" { 110 return nil 111 } 112 return []byte(r.md5) 113 } 114 115 // URI returns the URI of the reference. 116 func (r *Reference) URI() string { 117 if r == nil { 118 return "" 119 } 120 return fmt.Sprintf("%s", r.uri) 121 } 122 123 // Len returns the length of the reference sequence. 124 func (r *Reference) Len() int { 125 if r == nil { 126 return -1 127 } 128 return int(r.lRef) 129 } 130 131 // SetLen sets the length of the reference sequence to l. The given length 132 // must be a valid SAM reference length. 133 func (r *Reference) SetLen(l int) error { 134 if !validLen(l) { 135 return errors.New("sam: length out of range") 136 } 137 r.lRef = int32(l) 138 return nil 139 } 140 141 // Tags applies the function fn to each of the tag-value pairs of the Reference. 142 // The function fn must not add or delete tags held by the receiver during 143 // iteration. 144 func (r *Reference) Tags(fn func(t Tag, value string)) { 145 if fn == nil { 146 return 147 } 148 fn(refNameTag, r.Name()) 149 fn(refLengthTag, fmt.Sprint(r.lRef)) 150 if r.assemID != "" { 151 fn(assemblyIDTag, r.assemID) 152 } 153 if r.md5 != "" { 154 fn(md5Tag, fmt.Sprintf("%x", []byte(r.md5))) 155 } 156 if r.species != "" { 157 fn(speciesTag, r.species) 158 } 159 if r.uri != nil { 160 fn(uriTag, r.uri.String()) 161 } 162 for _, tp := range r.otherTags { 163 fn(tp.tag, tp.value) 164 } 165 } 166 167 // Get returns the string representation of the value associated with the 168 // given reference line tag. If the tag is not present the empty string is returned. 169 func (r *Reference) Get(t Tag) string { 170 switch t { 171 case refNameTag: 172 return r.Name() 173 case refLengthTag: 174 return fmt.Sprint(r.lRef) 175 case assemblyIDTag: 176 return r.assemID 177 case md5Tag: 178 if r.md5 == "" { 179 return "" 180 } 181 return fmt.Sprintf("%x", []byte(r.md5)) 182 case speciesTag: 183 return r.species 184 case uriTag: 185 if r.uri == nil { 186 return "" 187 } 188 return r.uri.String() 189 } 190 for _, tp := range r.otherTags { 191 if t == tp.tag { 192 return tp.value 193 } 194 } 195 return "" 196 } 197 198 // Set sets the value associated with the given reference line tag to the specified 199 // value. If value is the empty string and the tag may be absent, it is deleted. 200 func (r *Reference) Set(t Tag, value string) error { 201 switch t { 202 case refNameTag: 203 if value == "*" { 204 r.name = "" 205 return nil 206 } 207 r.name = value 208 case refLengthTag: 209 l, err := strconv.Atoi(value) 210 if err != nil { 211 return errBadHeader 212 } 213 if !validLen(l) { 214 return errBadLen 215 } 216 r.lRef = int32(l) 217 case assemblyIDTag: 218 r.assemID = value 219 case md5Tag: 220 if value == "" { 221 r.md5 = "" 222 return nil 223 } 224 hb := [16]byte{} 225 n, err := hex.Decode(hb[:], []byte(value)) 226 if err != nil { 227 return err 228 } 229 if n != 16 { 230 return errBadHeader 231 } 232 r.md5 = string(hb[:]) 233 case speciesTag: 234 r.species = value 235 case uriTag: 236 if value == "" { 237 r.uri = nil 238 return nil 239 } 240 uri, err := url.Parse(value) 241 if err != nil { 242 return err 243 } 244 r.uri = uri 245 if r.uri.Scheme != "http" && r.uri.Scheme != "ftp" { 246 r.uri.Scheme = "file" 247 } 248 default: 249 if value == "" { 250 for i, tp := range r.otherTags { 251 if t == tp.tag { 252 copy(r.otherTags[i:], r.otherTags[i+1:]) 253 r.otherTags = r.otherTags[:len(r.otherTags)-1] 254 return nil 255 } 256 } 257 } else { 258 for i, tp := range r.otherTags { 259 if t == tp.tag { 260 r.otherTags[i].value = value 261 return nil 262 } 263 } 264 r.otherTags = append(r.otherTags, tagPair{tag: t, value: value}) 265 } 266 } 267 return nil 268 } 269 270 // String returns a string representation of the Reference according to the 271 // SAM specification section 1.3, 272 func (r *Reference) String() string { 273 var buf bytes.Buffer 274 fmt.Fprintf(&buf, "@SQ\tSN:%s\tLN:%d", r.name, r.lRef) 275 if r.md5 != "" { 276 fmt.Fprintf(&buf, "\tM5:%x", []byte(r.md5)) 277 } 278 if r.assemID != "" { 279 fmt.Fprintf(&buf, "\tAS:%s", r.assemID) 280 } 281 if r.species != "" { 282 fmt.Fprintf(&buf, "\tSP:%s", r.species) 283 } 284 if r.uri != nil { 285 fmt.Fprintf(&buf, "\tUR:%s", r.uri) 286 } 287 for _, tp := range r.otherTags { 288 fmt.Fprintf(&buf, "\t%s:%s", tp.tag, tp.value) 289 } 290 return buf.String() 291 } 292 293 // Clone returns a deep copy of the Reference. 294 func (r *Reference) Clone() *Reference { 295 if r == nil { 296 return nil 297 } 298 cr := *r 299 if len(cr.otherTags) != 0 { 300 cr.otherTags = make([]tagPair, len(cr.otherTags)) 301 } 302 copy(cr.otherTags, r.otherTags) 303 cr.owner = nil 304 cr.id = -1 305 if r.uri != nil { 306 cr.uri = &url.URL{} 307 *cr.uri = *r.uri 308 if r.uri.User != nil { 309 cr.uri.User = &url.Userinfo{} 310 *cr.uri.User = *r.uri.User 311 } 312 } 313 return &cr 314 } 315 316 func equalRefs(a, b *Reference) bool { 317 if a == b { 318 return true 319 } 320 if (a.id != -1 && b.id != -1 && a.id != b.id) || 321 a.name != b.name || 322 a.lRef != b.lRef || 323 (a.md5 != "" && b.md5 != "" && a.md5 != b.md5) || 324 (a.assemID != "" && b.assemID != "" && a.assemID != b.assemID) || 325 (a.species != "" && b.species != "" && a.species != b.species) || 326 (a.uri != nil && b.uri != nil && a.uri != b.uri) { 327 return false 328 } 329 if a.uri != nil && b.uri != nil && a.uri.String() != b.uri.String() { 330 return false 331 } 332 if len(a.otherTags) != len(b.otherTags) { 333 return false 334 } 335 aOther := make(tagPairs, len(a.otherTags)) 336 copy(aOther, a.otherTags) 337 sort.Sort(aOther) 338 bOther := make(tagPairs, len(b.otherTags)) 339 copy(bOther, b.otherTags) 340 sort.Sort(bOther) 341 for i, ap := range aOther { 342 bp := bOther[i] 343 if ap.tag != bp.tag || ap.value != bp.value { 344 return false 345 } 346 } 347 return true 348 } 349 350 type tagPairs []tagPair 351 352 func (p tagPairs) Len() int { return len(p) } 353 func (p tagPairs) Less(i, j int) bool { 354 return p[i].tag[0] < p[j].tag[0] || (p[i].tag[0] == p[j].tag[0] && p[i].tag[1] < p[j].tag[1]) 355 } 356 func (p tagPairs) Swap(i, j int) { p[i], p[j] = p[j], p[i] }