github.com/grailbio/base@v0.0.11/recordio/legacyscanner.go (about)

     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache-2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package recordio
     6  
     7  import (
     8  	"fmt"
     9  	"io"
    10  
    11  	"github.com/grailbio/base/errors"
    12  	"github.com/grailbio/base/recordio/deprecated"
    13  	"github.com/grailbio/base/recordio/internal"
    14  )
    15  
    16  // legacyScanner is a ScannerV2 implementation that reads legacy recordio files,
    17  // either packed or unpacked.
    18  type legacyScannerAdapter struct {
    19  	err  errors.Once
    20  	in   io.ReadSeeker
    21  	sc   *deprecated.LegacyScannerImpl
    22  	opts ScannerOpts
    23  
    24  	pbr      *deprecated.Unpacker
    25  	buffered [][]byte
    26  	item     interface{}
    27  	nextItem int
    28  }
    29  
    30  func newLegacyScannerAdapter(in io.ReadSeeker, opts ScannerOpts) Scanner {
    31  	var legacyTransform func(scratch, in []byte) ([]byte, error)
    32  	if opts.LegacyTransform != nil {
    33  		legacyTransform = func(scratch, in []byte) ([]byte, error) {
    34  			return opts.LegacyTransform(scratch, [][]byte{in})
    35  		}
    36  	}
    37  	return &legacyScannerAdapter{
    38  		in:   in,
    39  		sc:   deprecated.NewLegacyScanner(in, deprecated.LegacyScannerOpts{}).(*deprecated.LegacyScannerImpl),
    40  		opts: opts,
    41  		pbr:  deprecated.NewUnpacker(deprecated.UnpackerOpts{Transform: legacyTransform}),
    42  	}
    43  }
    44  
    45  func (s *legacyScannerAdapter) Version() FormatVersion {
    46  	return V1
    47  }
    48  
    49  func (s *legacyScannerAdapter) Header() ParsedHeader {
    50  	return ParsedHeader{}
    51  }
    52  
    53  func (s *legacyScannerAdapter) Trailer() []byte {
    54  	return nil
    55  }
    56  
    57  func (s *legacyScannerAdapter) seekRaw(off int64) bool {
    58  	err := internal.Seek(s.in, off)
    59  	if err != nil {
    60  		s.err.Set(err)
    61  		return false
    62  	}
    63  	s.sc.Reset(s.in)
    64  	return true
    65  }
    66  
    67  func (s *legacyScannerAdapter) Seek(loc ItemLocation) {
    68  	// TODO(saito) Avoid seeking the file if loc.Block points to the current block.
    69  	if s.err.Err() == io.EOF {
    70  		s.err = errors.Once{}
    71  	}
    72  	if !s.seekRaw(int64(loc.Block)) {
    73  		return
    74  	}
    75  	if !s.scanNextBlock() {
    76  		return
    77  	}
    78  	if loc.Item >= len(s.buffered) {
    79  		s.err.Set(fmt.Errorf("Invalid location %+v, block has only %d items", loc, len(s.buffered)))
    80  	}
    81  	s.nextItem = loc.Item
    82  }
    83  
    84  func (s *legacyScannerAdapter) scanNextBlock() bool {
    85  	s.buffered = s.buffered[:0]
    86  	s.nextItem = 0
    87  	if s.Err() != nil {
    88  		return false
    89  	}
    90  	// Need to read the next record.
    91  	magic, ok := s.sc.InternalScan()
    92  	if !ok {
    93  		return false
    94  	}
    95  	if magic == internal.MagicPacked {
    96  		tmp, err := s.pbr.Unpack(s.sc.Bytes())
    97  		if err != nil {
    98  			s.err.Set(err)
    99  			return false
   100  		}
   101  		s.buffered = tmp
   102  		s.nextItem = 0
   103  		return true
   104  	}
   105  	if magic == internal.MagicLegacyUnpacked {
   106  		if cap(s.buffered) >= 1 {
   107  			s.buffered = s.buffered[:1]
   108  		} else {
   109  			s.buffered = make([][]byte, 1)
   110  		}
   111  		s.buffered[0] = s.sc.Bytes()
   112  		s.nextItem = 0
   113  		return true
   114  	}
   115  	s.err.Set(fmt.Errorf("recordio: invalid magic number: %v", magic))
   116  	return false
   117  }
   118  
   119  func (s *legacyScannerAdapter) Scan() bool {
   120  	for s.nextItem >= len(s.buffered) {
   121  		if !s.scanNextBlock() {
   122  			return false
   123  		}
   124  	}
   125  	item, err := s.opts.Unmarshal(s.buffered[s.nextItem])
   126  	if err != nil {
   127  		s.err.Set(err)
   128  		return false
   129  	}
   130  	s.item = item
   131  	s.nextItem++
   132  	return true
   133  }
   134  
   135  func (s *legacyScannerAdapter) Err() error {
   136  	err := s.err.Err()
   137  	if err == nil {
   138  		err = s.sc.Err()
   139  	}
   140  	if err == io.EOF {
   141  		err = nil
   142  	}
   143  	return err
   144  }
   145  
   146  func (s *legacyScannerAdapter) Get() interface{} {
   147  	return s.item
   148  }
   149  
   150  func (s *legacyScannerAdapter) Finish() error {
   151  	return s.Err()
   152  }