github.com/kjk/siser@v0.0.0-20220410204903-1b1e84ea1397/reader.go (about)

     1  package siser
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"strconv"
     9  	"time"
    10  )
    11  
    12  // Reader is for reading (deserializing) records from a bufio.Reader
    13  type Reader struct {
    14  	r *bufio.Reader
    15  
    16  	// hints that the data was written without a timestamp
    17  	// (see Writer.NoTimestamp). We're permissive i.e. we'll
    18  	// read timestamp if it's written even if NoTimestamp is true
    19  	NoTimestamp bool
    20  
    21  	// Record is available after ReadNextRecord().
    22  	// It's over-written in next ReadNextRecord().
    23  	Record *Record
    24  
    25  	// Data / Name / Timestampe are available after ReadNextData.
    26  	// They are over-written in next ReadNextData.
    27  	Data      []byte
    28  	Name      string
    29  	Timestamp time.Time
    30  
    31  	// position of the current record within the reader.
    32  	// We keep track of it so that callers can index records
    33  	// by offset and seek to it
    34  	CurrRecordPos int64
    35  
    36  	// position of the next record within the reader.
    37  	NextRecordPos int64
    38  
    39  	err error
    40  
    41  	// true if reached end of file with io.EOF
    42  	done bool
    43  }
    44  
    45  // NewReader creates a new reader
    46  func NewReader(r *bufio.Reader) *Reader {
    47  	return &Reader{
    48  		r:      r,
    49  		Record: &Record{},
    50  	}
    51  }
    52  
    53  // Done returns true if we're finished reading from the reader
    54  func (r *Reader) Done() bool {
    55  	return r.err != nil || r.done
    56  }
    57  
    58  // ReadNextData reads next block from the reader, returns false
    59  // when no more record. If returns false, check Err() to see
    60  // if there were errors.
    61  // After reading Data containst data, and Timestamp and (optional) Name
    62  // contain meta-data
    63  func (r *Reader) ReadNextData() bool {
    64  	if r.Done() {
    65  		return false
    66  	}
    67  	r.Name = ""
    68  	r.CurrRecordPos = r.NextRecordPos
    69  
    70  	// read header in the format:
    71  	// "${size} ${timestamp_in_unix_epoch_ms} ${name}\n"
    72  	// or (if NoTimestamp):
    73  	// "${size} ${name}\n"
    74  	// ${name} is optional
    75  	hdr, err := r.r.ReadBytes('\n')
    76  	if err != nil {
    77  		if err == io.EOF {
    78  			r.done = true
    79  		} else {
    80  			r.err = err
    81  		}
    82  		return false
    83  	}
    84  	recSize := len(hdr)
    85  	rest := hdr[:len(hdr)-1] // remove '\n' from end
    86  	idx := bytes.IndexByte(rest, ' ')
    87  	var dataSize []byte
    88  	if idx == -1 {
    89  		if !r.NoTimestamp {
    90  			// with timestamp, we need at least 2 values separated by space
    91  			r.err = fmt.Errorf("unexpected header '%s'", string(hdr))
    92  			return false
    93  		}
    94  		dataSize = rest
    95  		rest = nil
    96  	} else {
    97  		dataSize = rest[:idx]
    98  		rest = rest[idx+1:]
    99  	}
   100  	var name []byte
   101  	var timestamp []byte
   102  	idx = bytes.IndexByte(rest, ' ')
   103  	if idx == -1 {
   104  		if r.NoTimestamp {
   105  			// no timestamp, just name
   106  			name = rest
   107  		} else {
   108  			// no name, just timestamp
   109  			timestamp = rest
   110  		}
   111  	} else {
   112  		// timestamp and name
   113  		timestamp = rest[:idx]
   114  		name = rest[idx+1:]
   115  	}
   116  
   117  	size, err := strconv.ParseInt(string(dataSize), 10, 64)
   118  	if err != nil {
   119  		r.err = fmt.Errorf("unexpected header '%s'", string(hdr))
   120  		return false
   121  	}
   122  
   123  	if len(timestamp) > 0 {
   124  		timeMs, err := strconv.ParseInt(string(timestamp), 10, 64)
   125  		if err != nil {
   126  			r.err = fmt.Errorf("unexpected header '%s'", string(hdr))
   127  			return false
   128  		}
   129  		r.Timestamp = TimeFromUnixMillisecond(timeMs)
   130  	}
   131  	r.Name = string(name)
   132  
   133  	// we try to re-use r.Data as long as it doesn't grow too much
   134  	// (limit to 1 MB)
   135  	if cap(r.Data) > 1024*1024 {
   136  		r.Data = nil
   137  	}
   138  	if size > int64(cap(r.Data)) {
   139  		r.Data = make([]byte, size)
   140  	} else {
   141  		// re-use existing buffer
   142  		r.Data = r.Data[:size]
   143  	}
   144  	n, err := io.ReadFull(r.r, r.Data)
   145  	if err != nil {
   146  		r.err = err
   147  		return false
   148  	}
   149  	panicIf(n != len(r.Data))
   150  	recSize += n
   151  
   152  	// account for the fact that for readability we might
   153  	// have padded data with '\n'
   154  	// same as needsNewline logic in Writer.Write
   155  	n = len(r.Data)
   156  	needsNewline := (n > 0) && (r.Data[n-1] != '\n')
   157  	if needsNewline {
   158  		_, err = r.r.Discard(1)
   159  		if err != nil {
   160  			r.err = err
   161  			return false
   162  		}
   163  		recSize++
   164  	}
   165  	r.NextRecordPos += int64(recSize)
   166  	return true
   167  }
   168  
   169  // ReadNextRecord reads a key / value record.
   170  // Returns false if there are no more record.
   171  // Check Err() for errors.
   172  // After reading information is in Record (valid until
   173  // next read).
   174  func (r *Reader) ReadNextRecord() bool {
   175  	ok := r.ReadNextData()
   176  	if !ok {
   177  		return false
   178  	}
   179  
   180  	_, r.err = UnmarshalRecord(r.Data, r.Record)
   181  	if r.err != nil {
   182  		return false
   183  	}
   184  	r.Record.Name = r.Name
   185  	r.Record.Timestamp = r.Timestamp
   186  	return true
   187  }
   188  
   189  // Err returns error from last Read. We swallow io.EOF to make it easier
   190  // to use
   191  func (r *Reader) Err() error {
   192  	return r.err
   193  }