github.com/biogo/biogo@v1.0.4/io/featio/bed/bed.go (about) 1 // Copyright ©2011-2013 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package bed provides types to read and write BED format files according to 6 // the UCSC specification. 7 // 8 // The specification can be found at http://genome.ucsc.edu/FAQ/FAQformat.html#format1. 9 package bed 10 11 import ( 12 "github.com/biogo/biogo/feat" 13 "github.com/biogo/biogo/io/featio" 14 "github.com/biogo/biogo/seq" 15 16 "bufio" 17 "bytes" 18 "encoding/csv" 19 "errors" 20 "fmt" 21 "image/color" 22 "io" 23 "reflect" 24 "runtime" 25 "strconv" 26 "unsafe" 27 ) 28 29 var ( 30 ErrBadBedType = errors.New("bed: bad bed type") 31 ErrBadStrandField = errors.New("bad strand field") 32 ErrBadStrand = errors.New("invalid strand") 33 ErrBadColorField = errors.New("bad color field") 34 ErrMissingBlockValues = errors.New("missing block values") 35 ErrNoChromField = errors.New("no chrom field available") 36 ) 37 38 const ( 39 chromField = iota 40 startField 41 endField 42 nameField 43 scoreField 44 strandField 45 thickStartField 46 thickEndField 47 rgbField 48 blockCountField 49 blockSizesField 50 blockStartsField 51 ) 52 53 var ( 54 _ featio.Reader = (*Reader)(nil) 55 _ featio.Writer = (*Writer)(nil) 56 57 _ feat.Feature = (*Bed3)(nil) 58 _ feat.Feature = (*Bed4)(nil) 59 _ feat.Feature = (*Bed5)(nil) 60 _ feat.Feature = (*Bed6)(nil) 61 _ feat.Feature = (*Bed12)(nil) 62 63 _ Bed = (*Bed3)(nil) 64 _ Bed = (*Bed4)(nil) 65 _ Bed = (*Bed5)(nil) 66 _ Bed = (*Bed6)(nil) 67 _ Bed = (*Bed12)(nil) 68 69 _ feat.Orienter = (*Bed6)(nil) 70 _ feat.Orienter = (*Bed12)(nil) 71 ) 72 73 type Bed interface { 74 feat.Feature 75 canBed(int) bool 76 } 77 78 func handlePanic(f feat.Feature, err *error) { 79 r := recover() 80 if r != nil { 81 e, ok := r.(error) 82 if !ok { 83 panic(r) 84 } 85 if _, ok = r.(runtime.Error); ok { 86 panic(r) 87 } 88 *err = e 89 } 90 } 91 92 // This function cannot be used to create strings that are expected to persist. 93 func unsafeString(b []byte) string { 94 return *(*string)(unsafe.Pointer(&b)) 95 } 96 97 func mustAtoi(f []byte, column int) int { 98 i, err := strconv.ParseInt(unsafeString(f), 0, 0) 99 if err != nil { 100 panic(&csv.ParseError{Column: column, Err: err}) 101 } 102 return int(i) 103 } 104 105 func mustAtob(f []byte, column int) byte { 106 b, err := strconv.ParseUint(unsafeString(f), 0, 8) 107 if err != nil { 108 panic(&csv.ParseError{Column: column, Err: err}) 109 } 110 return byte(b) 111 } 112 113 var charToStrand = func() [256]seq.Strand { 114 var t [256]seq.Strand 115 for i := range t { 116 t[i] = 0x7f 117 } 118 t['+'] = seq.Plus 119 t['.'] = seq.None 120 t['-'] = seq.Minus 121 return t 122 }() 123 124 func mustAtos(f []byte, index int) seq.Strand { 125 if len(f) != 1 { 126 panic(&csv.ParseError{Column: index, Err: ErrBadStrandField}) 127 } 128 s := charToStrand[f[0]] 129 if s == 0x7f { 130 panic(&csv.ParseError{Column: index, Err: ErrBadStrand}) 131 } 132 return s 133 } 134 135 func mustAtoRgb(f []byte, index int) color.RGBA { 136 c := bytes.SplitN(f, []byte{','}, 4) 137 l := len(c) 138 if l == 0 || (l == 1 && mustAtoi(c[0], index) == 0) { 139 return color.RGBA{} 140 } 141 if l < 3 { 142 panic(&csv.ParseError{Column: index, Err: ErrBadColorField}) 143 } 144 return color.RGBA{ 145 R: mustAtob(c[0], index), 146 G: mustAtob(c[1], index), 147 B: mustAtob(c[2], index), 148 A: 0xff, 149 } 150 } 151 152 func mustAtoa(f []byte, index int) []int { 153 c := bytes.Split(f, []byte{','}) 154 a := make([]int, len(c)) 155 for i, f := range c { 156 if len(f) == 0 { 157 return a[:i] 158 } 159 a[i] = mustAtoi(f, index) 160 } 161 return a 162 } 163 164 type Chrom string 165 166 func (c Chrom) Start() int { return 0 } 167 func (c Chrom) End() int { return 0 } 168 func (c Chrom) Len() int { return 0 } 169 func (c Chrom) Name() string { return string(c) } 170 func (c Chrom) Description() string { return "bed chrom" } 171 func (c Chrom) Location() feat.Feature { return nil } 172 173 type Bed3 struct { 174 Chrom string 175 ChromStart int 176 ChromEnd int 177 } 178 179 func parseBed3(line []byte) (b *Bed3, err error) { 180 const n = 3 181 defer handlePanic(b, &err) 182 f := bytes.SplitN(line, []byte{'\t'}, n+1) 183 if len(f) < n { 184 return nil, ErrBadBedType 185 } 186 b = &Bed3{ 187 Chrom: string(f[chromField]), 188 ChromStart: mustAtoi(f[startField], startField), 189 ChromEnd: mustAtoi(f[endField], endField), 190 } 191 return 192 } 193 194 func (b *Bed3) Start() int { return b.ChromStart } 195 func (b *Bed3) End() int { return b.ChromEnd } 196 func (b *Bed3) Len() int { return b.ChromEnd - b.ChromStart } 197 func (b *Bed3) Name() string { return fmt.Sprintf("%s:[%d,%d)", b.Chrom, b.ChromStart, b.ChromEnd) } 198 func (b *Bed3) Description() string { return "bed3 feature" } 199 func (b *Bed3) Location() feat.Feature { return Chrom(b.Chrom) } 200 func (b *Bed3) canBed(i int) bool { return i <= 3 } 201 func (b *Bed3) Format(fs fmt.State, c rune) { format(b, fs, c) } 202 203 type Bed4 struct { 204 Chrom string 205 ChromStart int 206 ChromEnd int 207 FeatName string 208 } 209 210 func parseBed4(line []byte) (b *Bed4, err error) { 211 const n = 4 212 defer handlePanic(b, &err) 213 f := bytes.SplitN(line, []byte{'\t'}, n+1) 214 if len(f) < n { 215 return nil, ErrBadBedType 216 } 217 b = &Bed4{ 218 Chrom: string(f[chromField]), 219 ChromStart: mustAtoi(f[startField], startField), 220 ChromEnd: mustAtoi(f[endField], endField), 221 FeatName: string(f[nameField]), 222 } 223 return 224 } 225 226 func (b *Bed4) Start() int { return b.ChromStart } 227 func (b *Bed4) End() int { return b.ChromEnd } 228 func (b *Bed4) Len() int { return b.ChromEnd - b.ChromStart } 229 func (b *Bed4) Name() string { return b.FeatName } 230 func (b *Bed4) Description() string { return "bed4 feature" } 231 func (b *Bed4) Location() feat.Feature { return Chrom(b.Chrom) } 232 func (b *Bed4) canBed(i int) bool { return i <= 4 } 233 func (b *Bed4) Format(fs fmt.State, c rune) { format(b, fs, c) } 234 235 type Bed5 struct { 236 Chrom string 237 ChromStart int 238 ChromEnd int 239 FeatName string 240 FeatScore int 241 } 242 243 func parseBed5(line []byte) (b *Bed5, err error) { 244 const n = 5 245 defer handlePanic(b, &err) 246 f := bytes.SplitN(line, []byte{'\t'}, n+1) 247 if len(f) < n { 248 return nil, ErrBadBedType 249 } 250 b = &Bed5{ 251 Chrom: string(f[chromField]), 252 ChromStart: mustAtoi(f[startField], startField), 253 ChromEnd: mustAtoi(f[endField], endField), 254 FeatName: string(f[nameField]), 255 FeatScore: mustAtoi(f[scoreField], scoreField), 256 } 257 return 258 } 259 260 func (b *Bed5) Start() int { return b.ChromStart } 261 func (b *Bed5) End() int { return b.ChromEnd } 262 func (b *Bed5) Len() int { return b.ChromEnd - b.ChromStart } 263 func (b *Bed5) Name() string { return b.FeatName } 264 func (b *Bed5) Description() string { return "bed5 feature" } 265 func (b *Bed5) Location() feat.Feature { return Chrom(b.Chrom) } 266 func (b *Bed5) canBed(i int) bool { return i <= 5 } 267 func (b *Bed5) Format(fs fmt.State, c rune) { format(b, fs, c) } 268 269 type Bed6 struct { 270 Chrom string 271 ChromStart int 272 ChromEnd int 273 FeatName string 274 FeatScore int 275 FeatStrand seq.Strand 276 } 277 278 func parseBed6(line []byte) (b *Bed6, err error) { 279 const n = 6 280 defer handlePanic(b, &err) 281 f := bytes.SplitN(line, []byte{'\t'}, n+1) 282 if len(f) < n { 283 return nil, ErrBadBedType 284 } 285 b = &Bed6{ 286 Chrom: string(f[chromField]), 287 ChromStart: mustAtoi(f[startField], startField), 288 ChromEnd: mustAtoi(f[endField], endField), 289 FeatName: string(f[nameField]), 290 FeatScore: mustAtoi(f[scoreField], scoreField), 291 FeatStrand: mustAtos(f[strandField], strandField), 292 } 293 return 294 } 295 296 func (b *Bed6) Start() int { return b.ChromStart } 297 func (b *Bed6) End() int { return b.ChromEnd } 298 func (b *Bed6) Len() int { return b.ChromEnd - b.ChromStart } 299 func (b *Bed6) Name() string { return b.FeatName } 300 func (b *Bed6) Description() string { return "bed6 feature" } 301 func (b *Bed6) Location() feat.Feature { return Chrom(b.Chrom) } 302 func (b *Bed6) Orientation() feat.Orientation { return feat.Orientation(b.FeatStrand) } 303 func (b *Bed6) canBed(i int) bool { return i <= 6 } 304 func (b *Bed6) Format(fs fmt.State, c rune) { format(b, fs, c) } 305 306 type Bed12 struct { 307 Chrom string 308 ChromStart int 309 ChromEnd int 310 FeatName string 311 FeatScore int 312 FeatStrand seq.Strand 313 ThickStart int 314 ThickEnd int 315 Rgb color.RGBA 316 BlockCount int 317 BlockSizes []int 318 BlockStarts []int 319 } 320 321 func parseBed12(line []byte) (b *Bed12, err error) { 322 const n = 12 323 defer handlePanic(b, &err) 324 f := bytes.SplitN(line, []byte{'\t'}, n+1) 325 if len(f) < n { 326 return nil, ErrBadBedType 327 } 328 b = &Bed12{ 329 Chrom: string(f[chromField]), 330 ChromStart: mustAtoi(f[startField], startField), 331 ChromEnd: mustAtoi(f[endField], endField), 332 FeatName: string(f[nameField]), 333 FeatScore: mustAtoi(f[scoreField], scoreField), 334 FeatStrand: mustAtos(f[strandField], strandField), 335 ThickStart: mustAtoi(f[thickStartField], thickStartField), 336 ThickEnd: mustAtoi(f[thickEndField], thickEndField), 337 Rgb: mustAtoRgb(f[rgbField], rgbField), 338 BlockCount: mustAtoi(f[blockCountField], blockCountField), 339 BlockSizes: mustAtoa(f[blockSizesField], blockSizesField), 340 BlockStarts: mustAtoa(f[blockStartsField], blockStartsField), 341 } 342 if b.BlockCount != len(b.BlockSizes) || b.BlockCount != len(b.BlockStarts) { 343 return nil, ErrMissingBlockValues 344 } 345 return 346 } 347 348 func (b *Bed12) Start() int { return b.ChromStart } 349 func (b *Bed12) End() int { return b.ChromEnd } 350 func (b *Bed12) Len() int { return b.ChromEnd - b.ChromStart } 351 func (b *Bed12) Name() string { return b.FeatName } 352 func (b *Bed12) Description() string { return "bed12 feature" } 353 func (b *Bed12) Location() feat.Feature { return Chrom(b.Chrom) } 354 func (b *Bed12) Orientation() feat.Orientation { return feat.Orientation(b.FeatStrand) } 355 func (b *Bed12) canBed(i int) bool { return i <= 12 } 356 func (b *Bed12) Format(fs fmt.State, c rune) { format(b, fs, c) } 357 358 // BED format reader type. 359 type Reader struct { 360 r *bufio.Reader 361 BedType int 362 line int 363 } 364 365 // Returns a new BED format reader using r. 366 func NewReader(r io.Reader, b int) (*Reader, error) { 367 switch b { 368 case 3, 4, 5, 6, 12: 369 default: 370 return nil, ErrBadBedType 371 } 372 return &Reader{ 373 r: bufio.NewReader(r), 374 BedType: b, 375 }, nil 376 } 377 378 // Read a single feature and return it or an error. 379 func (r *Reader) Read() (f feat.Feature, err error) { 380 line, err := r.r.ReadBytes('\n') 381 if err != nil { 382 return 383 } 384 r.line++ 385 line = bytes.TrimSpace(line) 386 387 switch r.BedType { 388 case 3: 389 f, err = parseBed3(line) 390 case 4: 391 f, err = parseBed4(line) 392 case 5: 393 f, err = parseBed5(line) 394 case 6: 395 f, err = parseBed6(line) 396 case 12: 397 f, err = parseBed12(line) 398 default: 399 return nil, ErrBadBedType 400 } 401 if err != nil { 402 if err, ok := err.(*csv.ParseError); ok { 403 err.Line = r.line 404 return nil, err 405 } 406 return nil, fmt.Errorf("%v at line %d", err, r.line) 407 } 408 409 return 410 } 411 412 // Return the current line number 413 func (r *Reader) Line() int { return r.line } 414 415 func format(b Bed, fs fmt.State, c rune) { 416 bv := reflect.ValueOf(b) 417 if bv.IsNil() { 418 fmt.Fprint(fs, "<nil>") 419 return 420 } 421 bv = bv.Elem() 422 switch c { 423 case 'v': 424 if fs.Flag('#') { 425 fmt.Fprintf(fs, "&%#v", bv.Interface()) 426 return 427 } 428 fallthrough 429 case 's': 430 width, _ := fs.Width() 431 if !b.canBed(width) { 432 fmt.Fprintf(fs, "%%!(BADWIDTH)%T", b) 433 return 434 } 435 if width == 0 { 436 width = bv.NumField() 437 } 438 for i := 0; i < width; i++ { 439 f := bv.Field(i).Interface() 440 if i >= rgbField { 441 switch i { 442 case rgbField: 443 rv := reflect.ValueOf(f) 444 if reflect.DeepEqual(rv.Interface(), color.RGBA{}) { 445 fs.Write([]byte{'0'}) 446 } else { 447 fmt.Fprintf(fs, "%d,%d,%d", 448 rv.Field(0).Interface(), rv.Field(1).Interface(), rv.Field(2).Interface()) 449 } 450 case blockCountField: 451 fmt.Fprint(fs, f) 452 case blockSizesField, blockStartsField: 453 av := reflect.ValueOf(f) 454 l := av.Len() 455 for j := 0; j < l; j++ { 456 fmt.Fprint(fs, av.Index(j).Interface()) 457 if j < l-1 { 458 fs.Write([]byte{','}) 459 } 460 } 461 } 462 } else { 463 fmt.Fprint(fs, f) 464 } 465 if i < width-1 { 466 fs.Write([]byte{'\t'}) 467 } 468 } 469 default: 470 fmt.Fprintf(fs, "%%!%c(%T=%3s)", c, b, b) 471 } 472 } 473 474 // BED format writer type. 475 type Writer struct { 476 w io.Writer 477 BedType int 478 } 479 480 // Returns a new BED format writer using w. 481 func NewWriter(w io.Writer, b int) (*Writer, error) { 482 switch b { 483 case 3, 4, 5, 6, 12: 484 default: 485 return nil, ErrBadBedType 486 } 487 return &Writer{ 488 w: w, 489 BedType: b, 490 }, nil 491 } 492 493 type Scorer interface { 494 Score() int 495 } 496 497 // Write a single feature and return the number of bytes written and any error. 498 func (w *Writer) Write(f feat.Feature) (n int, err error) { 499 defer func() { 500 if err != nil { 501 return 502 } 503 _, err = w.w.Write([]byte{'\n'}) 504 if err != nil { 505 return 506 } 507 n++ 508 }() 509 510 // Handle Bed types. 511 if f, ok := f.(Bed); ok { 512 if !f.canBed(w.BedType) { 513 return 0, ErrBadBedType 514 } 515 return fmt.Fprintf(w.w, "%*s", w.BedType, f) 516 } 517 518 // Handle other feature types. 519 if f.Location() == nil { 520 return 0, ErrNoChromField 521 } 522 523 // Bed3 524 n, err = fmt.Fprintf(w.w, "%s\t%d\t%d", f.Location(), f.Start(), f.End()) 525 if w.BedType == 3 { 526 return n, err 527 } 528 529 // Bed4 530 _n, err := fmt.Fprintf(w.w, "\t%s", f.Name()) 531 n += _n 532 if w.BedType == 4 || err != nil { 533 return n, err 534 } 535 536 // Bed5 537 if f, ok := f.(Scorer); ok { 538 _n, err := fmt.Fprintf(w.w, "\t%d", f.Score()) 539 n += _n 540 if err != nil { 541 return n, err 542 } 543 } else { 544 _n, err := fmt.Fprintf(w.w, "\t0") 545 n += _n 546 if err != nil { 547 return n, err 548 } 549 } 550 if w.BedType == 5 { 551 return 552 } 553 554 // Bed6 555 if f, ok := f.(feat.Orienter); ok { 556 _n, err := fmt.Fprintf(w.w, "\t%s", seq.Strand(f.Orientation())) 557 n += _n 558 if err != nil { 559 return n, err 560 } 561 } else { 562 _n, err := fmt.Fprintf(w.w, "\t.") 563 n += _n 564 if err != nil { 565 return n, err 566 } 567 } 568 if w.BedType == 6 || w.BedType == 0 { 569 return 570 } 571 572 // Don't handle Bed12. 573 _n, err = w.w.Write([]byte{'\n'}) 574 n += _n 575 return n, ErrBadBedType 576 }