github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/sam/sam.go (about) 1 // Copyright ©2012 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package sam implements SAM file format reading and writing. The SAM format 6 // is described in the SAM specification. 7 // 8 // http://samtools.github.io/hts-specs/SAMv1.pdf 9 package sam 10 11 import ( 12 "bufio" 13 "errors" 14 "io" 15 ) 16 17 // Reader implements SAM format reading. 18 type Reader struct { 19 r *bufio.Reader 20 h *Header 21 22 seenRefs map[string]*Reference 23 } 24 25 // NewReader returns a new Reader, reading from the given io.Reader. 26 func NewReader(r io.Reader) (*Reader, error) { 27 h, _ := NewHeader(nil, nil) 28 sr := &Reader{ 29 r: bufio.NewReader(r), 30 h: h, 31 } 32 33 var b []byte 34 p, err := sr.r.Peek(1) 35 if err != nil { 36 return nil, err 37 } 38 if p[0] != '@' { 39 sr.seenRefs = make(map[string]*Reference) 40 return sr, nil 41 } 42 43 for { 44 l, err := sr.r.ReadBytes('\n') 45 if err != nil { 46 return nil, io.ErrUnexpectedEOF 47 } 48 b = append(b, l...) 49 p, err := sr.r.Peek(1) 50 if err == io.EOF { 51 break 52 } 53 if err != nil { 54 return nil, err 55 } 56 if p[0] != '@' { 57 break 58 } 59 } 60 61 err = sr.h.UnmarshalText(b) 62 if err != nil { 63 return nil, err 64 } 65 66 return sr, nil 67 } 68 69 // Header returns the SAM Header held by the Reader. 70 func (r *Reader) Header() *Header { 71 return r.h 72 } 73 74 // Read returns the next sam.Record in the SAM stream. 75 func (r *Reader) Read() (*Record, error) { 76 b, err := r.r.ReadBytes('\n') 77 if err != nil { 78 return nil, err 79 } 80 b = b[:len(b)-1] 81 if b[len(b)-1] == '\r' { 82 b = b[:len(b)-1] 83 } 84 var rec Record 85 86 // Handle cases where a header was present. 87 if r.seenRefs == nil { 88 err = rec.UnmarshalSAM(r.h, b) 89 if err != nil { 90 return nil, err 91 } 92 return &rec, nil 93 } 94 95 // Handle cases where no SAM header is present. 96 err = rec.UnmarshalSAM(nil, b) 97 if err != nil { 98 return nil, err 99 } 100 101 if ref, ok := r.seenRefs[rec.Ref.Name()]; ok { 102 rec.Ref = ref 103 } else if rec.Ref != nil { 104 err = r.h.AddReference(rec.Ref) 105 if err != nil { 106 return nil, err 107 } 108 r.seenRefs[rec.Ref.Name()] = rec.Ref 109 } else { 110 r.seenRefs["*"] = nil 111 } 112 if ref, ok := r.seenRefs[rec.MateRef.Name()]; ok { 113 rec.MateRef = ref 114 } else if rec.MateRef != nil { 115 err = r.h.AddReference(rec.MateRef) 116 if err != nil { 117 return nil, err 118 } 119 r.seenRefs[rec.MateRef.Name()] = rec.MateRef 120 } else { 121 r.seenRefs["*"] = nil 122 } 123 124 return &rec, nil 125 } 126 127 // RecordReader wraps types that can read SAM Records. 128 type RecordReader interface { 129 Read() (*Record, error) 130 } 131 132 // Iterator wraps a Reader to provide a convenient loop interface for reading SAM/BAM data. 133 // Successive calls to the Next method will step through the features of the provided 134 // Reader. Iteration stops unrecoverably at EOF or the first error. 135 type Iterator struct { 136 r RecordReader 137 rec *Record 138 err error 139 } 140 141 // NewIterator returns a Iterator to read from r. 142 // 143 // i, err := NewIterator(r) 144 // if err != nil { 145 // return err 146 // } 147 // for i.Next() { 148 // fn(i.Record()) 149 // } 150 // return i.Error() 151 // 152 func NewIterator(r RecordReader) *Iterator { return &Iterator{r: r} } 153 154 // Next advances the Iterator past the next record, which will then be available through 155 // the Record method. It returns false when the iteration stops, either by reaching the end of the 156 // input or an error. After Next returns false, the Error method will return any error that 157 // occurred during iteration, except that if it was io.EOF, Error will return nil. 158 func (i *Iterator) Next() bool { 159 if i.err != nil { 160 return false 161 } 162 i.rec, i.err = i.r.Read() 163 return i.err == nil 164 } 165 166 // Error returns the first non-EOF error that was encountered by the Iterator. 167 func (i *Iterator) Error() error { 168 if i.err == io.EOF { 169 return nil 170 } 171 return i.err 172 } 173 174 // Record returns the most recent record read by a call to Next. 175 func (i *Iterator) Record() *Record { return i.rec } 176 177 // Writer implements SAM format writing. 178 type Writer struct { 179 w io.Writer 180 flags int 181 } 182 183 // NewWriter returns a Writer to the given io.Writer using h for the SAM 184 // header. The format of flags for SAM lines can be FlagDecimal, FlagHex 185 // or FlagString. 186 func NewWriter(w io.Writer, h *Header, flags int) (*Writer, error) { 187 if flags < FlagDecimal || flags > FlagString { 188 return nil, errors.New("bam: flag format option out of range") 189 } 190 sw := &Writer{w: w, flags: flags} 191 text, _ := h.MarshalText() 192 _, err := w.Write(text) 193 if err != nil { 194 return nil, err 195 } 196 return sw, nil 197 } 198 199 // Write writes r to the SAM stream. 200 func (w *Writer) Write(r *Record) error { 201 b, err := r.MarshalSAM(w.flags) 202 if err != nil { 203 return err 204 } 205 b = append(b, '\n') 206 _, err = w.w.Write(b) 207 return err 208 } 209 210 const ( 211 wordBits = 31 212 213 maxInt32 = int(int32(^uint32(0) >> 1)) 214 minInt32 = -int(maxInt32) - 1 215 ) 216 217 func validInt32(i int) bool { return minInt32 <= i && i <= maxInt32 } 218 219 func validLen(i int) bool { return 1 <= i && i <= 1<<wordBits-1 } 220 func validPos(i int) bool { return -1 <= i && i <= (1<<wordBits-1)-1 } // 0-based. 221 func validTmpltLen(i int) bool { return -(1<<wordBits) <= i && i <= 1<<wordBits-1 }