github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/sam/read_group.go (about)

     1  // Copyright ©2012 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package sam
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"fmt"
    11  	"strconv"
    12  	"time"
    13  )
    14  
    15  // ReadGroup represents a sequencing read group.
    16  type ReadGroup struct {
    17  	owner        *Header
    18  	id           int32
    19  	name         string
    20  	center       string
    21  	description  string
    22  	date         time.Time
    23  	flowOrder    string
    24  	keySeq       string
    25  	library      string
    26  	program      string
    27  	insertSize   int
    28  	platform     string
    29  	platformUnit string
    30  	sample       string
    31  	otherTags    []tagPair
    32  }
    33  
    34  // NewReadGroup returns a ReadGroup with the given name, center, description,
    35  // library, program, platform, unique platform unit, sample name, flow order,
    36  // key, date of read group production, and predicted median insert size sequence.
    37  func NewReadGroup(name, center, desc, lib, prog, plat, unit, sample, flow, key string, date time.Time, size int) (*ReadGroup, error) {
    38  	if !validInt32(size) {
    39  		return nil, errors.New("sam: length overflow")
    40  	}
    41  	return &ReadGroup{
    42  		id:           -1, // This is altered by a Header when added.
    43  		name:         name,
    44  		center:       center,
    45  		description:  desc,
    46  		date:         date,
    47  		flowOrder:    flow,
    48  		keySeq:       key,
    49  		library:      lib,
    50  		program:      prog,
    51  		insertSize:   size,
    52  		platform:     plat,
    53  		platformUnit: unit,
    54  		sample:       sample,
    55  	}, nil
    56  }
    57  
    58  // ID returns the header ID for the ReadGroup.
    59  func (r *ReadGroup) ID() int {
    60  	if r == nil {
    61  		return -1
    62  	}
    63  	return int(r.id)
    64  }
    65  
    66  // Name returns the read group's name.
    67  func (r *ReadGroup) Name() string {
    68  	if r == nil {
    69  		return "*"
    70  	}
    71  	return r.name
    72  }
    73  
    74  // SetName sets the read group's name to n.
    75  func (r *ReadGroup) SetName(n string) error {
    76  	if r.owner != nil {
    77  		id, exists := r.owner.seenGroups[n]
    78  		if exists {
    79  			if id != r.id {
    80  				return errors.New("sam: name exists")
    81  			}
    82  			return nil
    83  		}
    84  		delete(r.owner.seenGroups, r.name)
    85  		r.owner.seenGroups[n] = r.id
    86  	}
    87  	r.name = n
    88  	return nil
    89  }
    90  
    91  // Clone returns a deep copy of the ReadGroup.
    92  func (r *ReadGroup) Clone() *ReadGroup {
    93  	if r == nil {
    94  		return nil
    95  	}
    96  	cr := *r
    97  	if len(cr.otherTags) != 0 {
    98  		cr.otherTags = make([]tagPair, len(cr.otherTags))
    99  	}
   100  	copy(cr.otherTags, r.otherTags)
   101  	cr.id = -1
   102  	cr.owner = nil
   103  	return &cr
   104  }
   105  
   106  // Library returns the library name for the read group.
   107  func (r *ReadGroup) Library() string { return r.library }
   108  
   109  // PlatformUnit returns the unique platform unit for the read group.
   110  func (r *ReadGroup) PlatformUnit() string { return r.platformUnit }
   111  
   112  // Time returns the time the read group was produced.
   113  func (r *ReadGroup) Time() time.Time { return r.date }
   114  
   115  // Tags applies the function fn to each of the tag-value pairs of the read group.
   116  // The function fn must not add or delete tags held by the receiver during
   117  // iteration.
   118  func (r *ReadGroup) Tags(fn func(t Tag, value string)) {
   119  	if fn == nil {
   120  		return
   121  	}
   122  	fn(idTag, r.name)
   123  	if r.center != "" {
   124  		fn(centerTag, r.center)
   125  	}
   126  	if r.description != "" {
   127  		fn(descriptionTag, r.description)
   128  	}
   129  	if !r.date.IsZero() {
   130  		fn(dateTag, r.date.Format(iso8601TimeDateN))
   131  	}
   132  	if r.flowOrder != "" {
   133  		fn(flowOrderTag, r.flowOrder)
   134  	}
   135  	if r.keySeq != "" {
   136  		fn(keySequenceTag, r.keySeq)
   137  	}
   138  	if r.library != "" {
   139  		fn(libraryTag, r.library)
   140  	}
   141  	if r.program != "" {
   142  		fn(programTag, r.program)
   143  	}
   144  	if r.insertSize != 0 {
   145  		fn(insertSizeTag, fmt.Sprint(r.insertSize))
   146  	}
   147  	if r.platform != "" {
   148  		fn(platformTag, r.platform)
   149  	}
   150  	if r.platformUnit != "" {
   151  		fn(platformUnitTag, r.platformUnit)
   152  	}
   153  	if r.sample != "" {
   154  		fn(sampleTag, r.sample)
   155  	}
   156  	for _, tp := range r.otherTags {
   157  		fn(tp.tag, tp.value)
   158  	}
   159  }
   160  
   161  // Get returns the string representation of the value associated with the
   162  // given read group line tag. If the tag is not present the empty string is returned.
   163  func (r *ReadGroup) Get(t Tag) string {
   164  	switch t {
   165  	case idTag:
   166  		return r.Name()
   167  	case centerTag:
   168  		return r.center
   169  	case descriptionTag:
   170  		return r.description
   171  	case dateTag:
   172  		return r.date.Format(iso8601TimeDateN)
   173  	case flowOrderTag:
   174  		if r.flowOrder == "" {
   175  			return "*"
   176  		}
   177  		return r.flowOrder
   178  	case keySequenceTag:
   179  		return r.keySeq
   180  	case libraryTag:
   181  		return r.library
   182  	case programTag:
   183  		return r.program
   184  	case insertSizeTag:
   185  		return fmt.Sprint(r.insertSize)
   186  	case platformTag:
   187  		return r.platform
   188  	case platformUnitTag:
   189  		return r.platformUnit
   190  	case sampleTag:
   191  		return r.sample
   192  	}
   193  	for _, tp := range r.otherTags {
   194  		if t == tp.tag {
   195  			return tp.value
   196  		}
   197  	}
   198  	return ""
   199  }
   200  
   201  // Set sets the value associated with the given read group line tag to the specified
   202  // value. If value is the empty string and the tag may be absent, it is deleted.
   203  func (r *ReadGroup) Set(t Tag, value string) error {
   204  	switch t {
   205  	case idTag:
   206  		r.name = value
   207  	case centerTag:
   208  		r.center = value
   209  	case descriptionTag:
   210  		r.description = value
   211  	case dateTag:
   212  		if value == "" {
   213  			r.date = time.Time{}
   214  			return nil
   215  		}
   216  		date, err := parseISO8601(value)
   217  		if err != nil {
   218  			return err
   219  		}
   220  		r.date = date
   221  	case flowOrderTag:
   222  		if value == "" || value == "*" {
   223  			r.flowOrder = ""
   224  			return nil
   225  		}
   226  		r.flowOrder = value
   227  	case keySequenceTag:
   228  		r.keySeq = value
   229  	case libraryTag:
   230  		r.library = value
   231  	case programTag:
   232  		r.program = value
   233  	case insertSizeTag:
   234  		if value == "" {
   235  			r.insertSize = 0
   236  			return nil
   237  		}
   238  		i, err := strconv.Atoi(value)
   239  		if err != nil {
   240  			return err
   241  		}
   242  		if !validInt32(i) {
   243  			return errBadLen
   244  		}
   245  		r.insertSize = i
   246  	case platformTag:
   247  		r.platform = value
   248  	case platformUnitTag:
   249  		r.platformUnit = value
   250  	case sampleTag:
   251  		r.sample = value
   252  	default:
   253  		if value == "" {
   254  			for i, tp := range r.otherTags {
   255  				if t == tp.tag {
   256  					copy(r.otherTags[i:], r.otherTags[i+1:])
   257  					r.otherTags = r.otherTags[:len(r.otherTags)-1]
   258  					return nil
   259  				}
   260  			}
   261  		} else {
   262  			for i, tp := range r.otherTags {
   263  				if t == tp.tag {
   264  					r.otherTags[i].value = value
   265  					return nil
   266  				}
   267  			}
   268  			r.otherTags = append(r.otherTags, tagPair{tag: t, value: value})
   269  		}
   270  	}
   271  	return nil
   272  }
   273  
   274  // String returns a string representation of the read group according to the
   275  // SAM specification section 1.3,
   276  func (r *ReadGroup) String() string {
   277  	var buf bytes.Buffer
   278  	fmt.Fprintf(&buf, "@RG\tID:%s", r.name)
   279  	if r.center != "" {
   280  		fmt.Fprintf(&buf, "\tCN:%s", r.center)
   281  	}
   282  	if r.description != "" {
   283  		fmt.Fprintf(&buf, "\tDS:%s", r.description)
   284  	}
   285  	if (r.date != time.Time{}) {
   286  		fmt.Fprintf(&buf, "\tDT:%s", r.date.Format(iso8601TimeDateN))
   287  	}
   288  	if r.flowOrder != "" {
   289  		fmt.Fprintf(&buf, "\tFO:%s", r.flowOrder)
   290  	}
   291  	if r.keySeq != "" {
   292  		fmt.Fprintf(&buf, "\tKS:%s", r.keySeq)
   293  	}
   294  	if r.library != "" {
   295  		fmt.Fprintf(&buf, "\tLB:%s", r.library)
   296  	}
   297  	if r.program != "" {
   298  		fmt.Fprintf(&buf, "\tPG:%s", r.program)
   299  	}
   300  	if r.insertSize != 0 {
   301  		fmt.Fprintf(&buf, "\tPI:%d", r.insertSize)
   302  	}
   303  	if r.platform != "" {
   304  		fmt.Fprintf(&buf, "\tPL:%s", r.platform)
   305  	}
   306  	if r.platformUnit != "" {
   307  		fmt.Fprintf(&buf, "\tPU:%s", r.platformUnit)
   308  	}
   309  	if r.sample != "" {
   310  		fmt.Fprintf(&buf, "\tSM:%s", r.sample)
   311  	}
   312  	for _, tp := range r.otherTags {
   313  		fmt.Fprintf(&buf, "\t%s:%s", tp.tag, tp.value)
   314  	}
   315  	return buf.String()
   316  }