github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/sam/reference.go (about)

     1  // Copyright ©2012 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package sam
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/hex"
    10  	"errors"
    11  	"fmt"
    12  	"net/url"
    13  	"sort"
    14  	"strconv"
    15  )
    16  
    17  // Reference is a mapping reference.
    18  type Reference struct {
    19  	owner     *Header
    20  	id        int32
    21  	name      string
    22  	lRef      int32
    23  	md5       string
    24  	assemID   string
    25  	species   string
    26  	uri       *url.URL
    27  	otherTags []tagPair
    28  }
    29  
    30  // NewReference returns a new Reference based on the given parameters.
    31  // Only name and length are mandatory and length must be a valid reference
    32  // length according to the SAM specification, [1, 1<<31).
    33  func NewReference(name, assemID, species string, length int, md5 []byte, uri *url.URL) (*Reference, error) {
    34  	if !validLen(length) {
    35  		return nil, errors.New("sam: length out of range")
    36  	}
    37  	if name == "" {
    38  		return nil, errors.New("sam: no name provided")
    39  	}
    40  	var h string
    41  	if md5 != nil {
    42  		if len(md5) != 16 {
    43  			return nil, errors.New("sam: invalid md5 sum length")
    44  		}
    45  		h = string(md5[:])
    46  	}
    47  	return &Reference{
    48  		id:      -1, // This is altered by a Header when added.
    49  		name:    name,
    50  		lRef:    int32(length),
    51  		md5:     h,
    52  		assemID: assemID,
    53  		species: species,
    54  		uri:     uri,
    55  	}, nil
    56  }
    57  
    58  // ID returns the header ID of the Reference.
    59  func (r *Reference) ID() int {
    60  	if r == nil {
    61  		return -1
    62  	}
    63  	return int(r.id)
    64  }
    65  
    66  // Name returns the reference name.
    67  func (r *Reference) Name() string {
    68  	if r == nil {
    69  		return "*"
    70  	}
    71  	return r.name
    72  }
    73  
    74  // SetName sets the reference name to n.
    75  func (r *Reference) SetName(n string) error {
    76  	if r.owner != nil {
    77  		id, exists := r.owner.seenRefs[n]
    78  		if exists {
    79  			if id != r.id {
    80  				return errors.New("sam: name exists")
    81  			}
    82  			return nil
    83  		}
    84  		delete(r.owner.seenRefs, r.name)
    85  		r.owner.seenRefs[n] = r.id
    86  	}
    87  	r.name = n
    88  	return nil
    89  }
    90  
    91  // AssemblyID returns the assembly ID of the reference.
    92  func (r *Reference) AssemblyID() string {
    93  	if r == nil {
    94  		return ""
    95  	}
    96  	return r.assemID
    97  }
    98  
    99  // Species returns the reference species.
   100  func (r *Reference) Species() string {
   101  	if r == nil {
   102  		return ""
   103  	}
   104  	return r.species
   105  }
   106  
   107  // MD5 returns a 16 byte slice holding the MD5 sum of the reference sequence.
   108  func (r *Reference) MD5() []byte {
   109  	if r == nil || r.md5 == "" {
   110  		return nil
   111  	}
   112  	return []byte(r.md5)
   113  }
   114  
   115  // URI returns the URI of the reference.
   116  func (r *Reference) URI() string {
   117  	if r == nil {
   118  		return ""
   119  	}
   120  	return fmt.Sprintf("%s", r.uri)
   121  }
   122  
   123  // Len returns the length of the reference sequence.
   124  func (r *Reference) Len() int {
   125  	if r == nil {
   126  		return -1
   127  	}
   128  	return int(r.lRef)
   129  }
   130  
   131  // SetLen sets the length of the reference sequence to l. The given length
   132  // must be a valid SAM reference length.
   133  func (r *Reference) SetLen(l int) error {
   134  	if !validLen(l) {
   135  		return errors.New("sam: length out of range")
   136  	}
   137  	r.lRef = int32(l)
   138  	return nil
   139  }
   140  
   141  // Tags applies the function fn to each of the tag-value pairs of the Reference.
   142  // The function fn must not add or delete tags held by the receiver during
   143  // iteration.
   144  func (r *Reference) Tags(fn func(t Tag, value string)) {
   145  	if fn == nil {
   146  		return
   147  	}
   148  	fn(refNameTag, r.Name())
   149  	fn(refLengthTag, fmt.Sprint(r.lRef))
   150  	if r.assemID != "" {
   151  		fn(assemblyIDTag, r.assemID)
   152  	}
   153  	if r.md5 != "" {
   154  		fn(md5Tag, fmt.Sprintf("%x", []byte(r.md5)))
   155  	}
   156  	if r.species != "" {
   157  		fn(speciesTag, r.species)
   158  	}
   159  	if r.uri != nil {
   160  		fn(uriTag, r.uri.String())
   161  	}
   162  	for _, tp := range r.otherTags {
   163  		fn(tp.tag, tp.value)
   164  	}
   165  }
   166  
   167  // Get returns the string representation of the value associated with the
   168  // given reference line tag. If the tag is not present the empty string is returned.
   169  func (r *Reference) Get(t Tag) string {
   170  	switch t {
   171  	case refNameTag:
   172  		return r.Name()
   173  	case refLengthTag:
   174  		return fmt.Sprint(r.lRef)
   175  	case assemblyIDTag:
   176  		return r.assemID
   177  	case md5Tag:
   178  		if r.md5 == "" {
   179  			return ""
   180  		}
   181  		return fmt.Sprintf("%x", []byte(r.md5))
   182  	case speciesTag:
   183  		return r.species
   184  	case uriTag:
   185  		if r.uri == nil {
   186  			return ""
   187  		}
   188  		return r.uri.String()
   189  	}
   190  	for _, tp := range r.otherTags {
   191  		if t == tp.tag {
   192  			return tp.value
   193  		}
   194  	}
   195  	return ""
   196  }
   197  
   198  // Set sets the value associated with the given reference line tag to the specified
   199  // value. If value is the empty string and the tag may be absent, it is deleted.
   200  func (r *Reference) Set(t Tag, value string) error {
   201  	switch t {
   202  	case refNameTag:
   203  		if value == "*" {
   204  			r.name = ""
   205  			return nil
   206  		}
   207  		r.name = value
   208  	case refLengthTag:
   209  		l, err := strconv.Atoi(value)
   210  		if err != nil {
   211  			return errBadHeader
   212  		}
   213  		if !validLen(l) {
   214  			return errBadLen
   215  		}
   216  		r.lRef = int32(l)
   217  	case assemblyIDTag:
   218  		r.assemID = value
   219  	case md5Tag:
   220  		if value == "" {
   221  			r.md5 = ""
   222  			return nil
   223  		}
   224  		hb := [16]byte{}
   225  		n, err := hex.Decode(hb[:], []byte(value))
   226  		if err != nil {
   227  			return err
   228  		}
   229  		if n != 16 {
   230  			return errBadHeader
   231  		}
   232  		r.md5 = string(hb[:])
   233  	case speciesTag:
   234  		r.species = value
   235  	case uriTag:
   236  		if value == "" {
   237  			r.uri = nil
   238  			return nil
   239  		}
   240  		uri, err := url.Parse(value)
   241  		if err != nil {
   242  			return err
   243  		}
   244  		r.uri = uri
   245  		if r.uri.Scheme != "http" && r.uri.Scheme != "ftp" {
   246  			r.uri.Scheme = "file"
   247  		}
   248  	default:
   249  		if value == "" {
   250  			for i, tp := range r.otherTags {
   251  				if t == tp.tag {
   252  					copy(r.otherTags[i:], r.otherTags[i+1:])
   253  					r.otherTags = r.otherTags[:len(r.otherTags)-1]
   254  					return nil
   255  				}
   256  			}
   257  		} else {
   258  			for i, tp := range r.otherTags {
   259  				if t == tp.tag {
   260  					r.otherTags[i].value = value
   261  					return nil
   262  				}
   263  			}
   264  			r.otherTags = append(r.otherTags, tagPair{tag: t, value: value})
   265  		}
   266  	}
   267  	return nil
   268  }
   269  
   270  // String returns a string representation of the Reference according to the
   271  // SAM specification section 1.3,
   272  func (r *Reference) String() string {
   273  	var buf bytes.Buffer
   274  	fmt.Fprintf(&buf, "@SQ\tSN:%s\tLN:%d", r.name, r.lRef)
   275  	if r.md5 != "" {
   276  		fmt.Fprintf(&buf, "\tM5:%x", []byte(r.md5))
   277  	}
   278  	if r.assemID != "" {
   279  		fmt.Fprintf(&buf, "\tAS:%s", r.assemID)
   280  	}
   281  	if r.species != "" {
   282  		fmt.Fprintf(&buf, "\tSP:%s", r.species)
   283  	}
   284  	if r.uri != nil {
   285  		fmt.Fprintf(&buf, "\tUR:%s", r.uri)
   286  	}
   287  	for _, tp := range r.otherTags {
   288  		fmt.Fprintf(&buf, "\t%s:%s", tp.tag, tp.value)
   289  	}
   290  	return buf.String()
   291  }
   292  
   293  // Clone returns a deep copy of the Reference.
   294  func (r *Reference) Clone() *Reference {
   295  	if r == nil {
   296  		return nil
   297  	}
   298  	cr := *r
   299  	if len(cr.otherTags) != 0 {
   300  		cr.otherTags = make([]tagPair, len(cr.otherTags))
   301  	}
   302  	copy(cr.otherTags, r.otherTags)
   303  	cr.owner = nil
   304  	cr.id = -1
   305  	if r.uri != nil {
   306  		cr.uri = &url.URL{}
   307  		*cr.uri = *r.uri
   308  		if r.uri.User != nil {
   309  			cr.uri.User = &url.Userinfo{}
   310  			*cr.uri.User = *r.uri.User
   311  		}
   312  	}
   313  	return &cr
   314  }
   315  
   316  func equalRefs(a, b *Reference) bool {
   317  	if a == b {
   318  		return true
   319  	}
   320  	if (a.id != -1 && b.id != -1 && a.id != b.id) ||
   321  		a.name != b.name ||
   322  		a.lRef != b.lRef ||
   323  		(a.md5 != "" && b.md5 != "" && a.md5 != b.md5) ||
   324  		(a.assemID != "" && b.assemID != "" && a.assemID != b.assemID) ||
   325  		(a.species != "" && b.species != "" && a.species != b.species) ||
   326  		(a.uri != nil && b.uri != nil && a.uri != b.uri) {
   327  		return false
   328  	}
   329  	if a.uri != nil && b.uri != nil && a.uri.String() != b.uri.String() {
   330  		return false
   331  	}
   332  	if len(a.otherTags) != len(b.otherTags) {
   333  		return false
   334  	}
   335  	aOther := make(tagPairs, len(a.otherTags))
   336  	copy(aOther, a.otherTags)
   337  	sort.Sort(aOther)
   338  	bOther := make(tagPairs, len(b.otherTags))
   339  	copy(bOther, b.otherTags)
   340  	sort.Sort(bOther)
   341  	for i, ap := range aOther {
   342  		bp := bOther[i]
   343  		if ap.tag != bp.tag || ap.value != bp.value {
   344  			return false
   345  		}
   346  	}
   347  	return true
   348  }
   349  
   350  type tagPairs []tagPair
   351  
   352  func (p tagPairs) Len() int { return len(p) }
   353  func (p tagPairs) Less(i, j int) bool {
   354  	return p[i].tag[0] < p[j].tag[0] || (p[i].tag[0] == p[j].tag[0] && p[i].tag[1] < p[j].tag[1])
   355  }
   356  func (p tagPairs) Swap(i, j int) { p[i], p[j] = p[j], p[i] }