github.com/grailbio/base@v0.0.11/recordio/legacyscanner.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache-2.0 3 // license that can be found in the LICENSE file. 4 5 package recordio 6 7 import ( 8 "fmt" 9 "io" 10 11 "github.com/grailbio/base/errors" 12 "github.com/grailbio/base/recordio/deprecated" 13 "github.com/grailbio/base/recordio/internal" 14 ) 15 16 // legacyScanner is a ScannerV2 implementation that reads legacy recordio files, 17 // either packed or unpacked. 18 type legacyScannerAdapter struct { 19 err errors.Once 20 in io.ReadSeeker 21 sc *deprecated.LegacyScannerImpl 22 opts ScannerOpts 23 24 pbr *deprecated.Unpacker 25 buffered [][]byte 26 item interface{} 27 nextItem int 28 } 29 30 func newLegacyScannerAdapter(in io.ReadSeeker, opts ScannerOpts) Scanner { 31 var legacyTransform func(scratch, in []byte) ([]byte, error) 32 if opts.LegacyTransform != nil { 33 legacyTransform = func(scratch, in []byte) ([]byte, error) { 34 return opts.LegacyTransform(scratch, [][]byte{in}) 35 } 36 } 37 return &legacyScannerAdapter{ 38 in: in, 39 sc: deprecated.NewLegacyScanner(in, deprecated.LegacyScannerOpts{}).(*deprecated.LegacyScannerImpl), 40 opts: opts, 41 pbr: deprecated.NewUnpacker(deprecated.UnpackerOpts{Transform: legacyTransform}), 42 } 43 } 44 45 func (s *legacyScannerAdapter) Version() FormatVersion { 46 return V1 47 } 48 49 func (s *legacyScannerAdapter) Header() ParsedHeader { 50 return ParsedHeader{} 51 } 52 53 func (s *legacyScannerAdapter) Trailer() []byte { 54 return nil 55 } 56 57 func (s *legacyScannerAdapter) seekRaw(off int64) bool { 58 err := internal.Seek(s.in, off) 59 if err != nil { 60 s.err.Set(err) 61 return false 62 } 63 s.sc.Reset(s.in) 64 return true 65 } 66 67 func (s *legacyScannerAdapter) Seek(loc ItemLocation) { 68 // TODO(saito) Avoid seeking the file if loc.Block points to the current block. 69 if s.err.Err() == io.EOF { 70 s.err = errors.Once{} 71 } 72 if !s.seekRaw(int64(loc.Block)) { 73 return 74 } 75 if !s.scanNextBlock() { 76 return 77 } 78 if loc.Item >= len(s.buffered) { 79 s.err.Set(fmt.Errorf("Invalid location %+v, block has only %d items", loc, len(s.buffered))) 80 } 81 s.nextItem = loc.Item 82 } 83 84 func (s *legacyScannerAdapter) scanNextBlock() bool { 85 s.buffered = s.buffered[:0] 86 s.nextItem = 0 87 if s.Err() != nil { 88 return false 89 } 90 // Need to read the next record. 91 magic, ok := s.sc.InternalScan() 92 if !ok { 93 return false 94 } 95 if magic == internal.MagicPacked { 96 tmp, err := s.pbr.Unpack(s.sc.Bytes()) 97 if err != nil { 98 s.err.Set(err) 99 return false 100 } 101 s.buffered = tmp 102 s.nextItem = 0 103 return true 104 } 105 if magic == internal.MagicLegacyUnpacked { 106 if cap(s.buffered) >= 1 { 107 s.buffered = s.buffered[:1] 108 } else { 109 s.buffered = make([][]byte, 1) 110 } 111 s.buffered[0] = s.sc.Bytes() 112 s.nextItem = 0 113 return true 114 } 115 s.err.Set(fmt.Errorf("recordio: invalid magic number: %v", magic)) 116 return false 117 } 118 119 func (s *legacyScannerAdapter) Scan() bool { 120 for s.nextItem >= len(s.buffered) { 121 if !s.scanNextBlock() { 122 return false 123 } 124 } 125 item, err := s.opts.Unmarshal(s.buffered[s.nextItem]) 126 if err != nil { 127 s.err.Set(err) 128 return false 129 } 130 s.item = item 131 s.nextItem++ 132 return true 133 } 134 135 func (s *legacyScannerAdapter) Err() error { 136 err := s.err.Err() 137 if err == nil { 138 err = s.sc.Err() 139 } 140 if err == io.EOF { 141 err = nil 142 } 143 return err 144 } 145 146 func (s *legacyScannerAdapter) Get() interface{} { 147 return s.item 148 } 149 150 func (s *legacyScannerAdapter) Finish() error { 151 return s.Err() 152 }