github.com/kjk/siser@v0.0.0-20220410204903-1b1e84ea1397/reader.go (about) 1 package siser 2 3 import ( 4 "bufio" 5 "bytes" 6 "fmt" 7 "io" 8 "strconv" 9 "time" 10 ) 11 12 // Reader is for reading (deserializing) records from a bufio.Reader 13 type Reader struct { 14 r *bufio.Reader 15 16 // hints that the data was written without a timestamp 17 // (see Writer.NoTimestamp). We're permissive i.e. we'll 18 // read timestamp if it's written even if NoTimestamp is true 19 NoTimestamp bool 20 21 // Record is available after ReadNextRecord(). 22 // It's over-written in next ReadNextRecord(). 23 Record *Record 24 25 // Data / Name / Timestampe are available after ReadNextData. 26 // They are over-written in next ReadNextData. 27 Data []byte 28 Name string 29 Timestamp time.Time 30 31 // position of the current record within the reader. 32 // We keep track of it so that callers can index records 33 // by offset and seek to it 34 CurrRecordPos int64 35 36 // position of the next record within the reader. 37 NextRecordPos int64 38 39 err error 40 41 // true if reached end of file with io.EOF 42 done bool 43 } 44 45 // NewReader creates a new reader 46 func NewReader(r *bufio.Reader) *Reader { 47 return &Reader{ 48 r: r, 49 Record: &Record{}, 50 } 51 } 52 53 // Done returns true if we're finished reading from the reader 54 func (r *Reader) Done() bool { 55 return r.err != nil || r.done 56 } 57 58 // ReadNextData reads next block from the reader, returns false 59 // when no more record. If returns false, check Err() to see 60 // if there were errors. 61 // After reading Data containst data, and Timestamp and (optional) Name 62 // contain meta-data 63 func (r *Reader) ReadNextData() bool { 64 if r.Done() { 65 return false 66 } 67 r.Name = "" 68 r.CurrRecordPos = r.NextRecordPos 69 70 // read header in the format: 71 // "${size} ${timestamp_in_unix_epoch_ms} ${name}\n" 72 // or (if NoTimestamp): 73 // "${size} ${name}\n" 74 // ${name} is optional 75 hdr, err := r.r.ReadBytes('\n') 76 if err != nil { 77 if err == io.EOF { 78 r.done = true 79 } else { 80 r.err = err 81 } 82 return false 83 } 84 recSize := len(hdr) 85 rest := hdr[:len(hdr)-1] // remove '\n' from end 86 idx := bytes.IndexByte(rest, ' ') 87 var dataSize []byte 88 if idx == -1 { 89 if !r.NoTimestamp { 90 // with timestamp, we need at least 2 values separated by space 91 r.err = fmt.Errorf("unexpected header '%s'", string(hdr)) 92 return false 93 } 94 dataSize = rest 95 rest = nil 96 } else { 97 dataSize = rest[:idx] 98 rest = rest[idx+1:] 99 } 100 var name []byte 101 var timestamp []byte 102 idx = bytes.IndexByte(rest, ' ') 103 if idx == -1 { 104 if r.NoTimestamp { 105 // no timestamp, just name 106 name = rest 107 } else { 108 // no name, just timestamp 109 timestamp = rest 110 } 111 } else { 112 // timestamp and name 113 timestamp = rest[:idx] 114 name = rest[idx+1:] 115 } 116 117 size, err := strconv.ParseInt(string(dataSize), 10, 64) 118 if err != nil { 119 r.err = fmt.Errorf("unexpected header '%s'", string(hdr)) 120 return false 121 } 122 123 if len(timestamp) > 0 { 124 timeMs, err := strconv.ParseInt(string(timestamp), 10, 64) 125 if err != nil { 126 r.err = fmt.Errorf("unexpected header '%s'", string(hdr)) 127 return false 128 } 129 r.Timestamp = TimeFromUnixMillisecond(timeMs) 130 } 131 r.Name = string(name) 132 133 // we try to re-use r.Data as long as it doesn't grow too much 134 // (limit to 1 MB) 135 if cap(r.Data) > 1024*1024 { 136 r.Data = nil 137 } 138 if size > int64(cap(r.Data)) { 139 r.Data = make([]byte, size) 140 } else { 141 // re-use existing buffer 142 r.Data = r.Data[:size] 143 } 144 n, err := io.ReadFull(r.r, r.Data) 145 if err != nil { 146 r.err = err 147 return false 148 } 149 panicIf(n != len(r.Data)) 150 recSize += n 151 152 // account for the fact that for readability we might 153 // have padded data with '\n' 154 // same as needsNewline logic in Writer.Write 155 n = len(r.Data) 156 needsNewline := (n > 0) && (r.Data[n-1] != '\n') 157 if needsNewline { 158 _, err = r.r.Discard(1) 159 if err != nil { 160 r.err = err 161 return false 162 } 163 recSize++ 164 } 165 r.NextRecordPos += int64(recSize) 166 return true 167 } 168 169 // ReadNextRecord reads a key / value record. 170 // Returns false if there are no more record. 171 // Check Err() for errors. 172 // After reading information is in Record (valid until 173 // next read). 174 func (r *Reader) ReadNextRecord() bool { 175 ok := r.ReadNextData() 176 if !ok { 177 return false 178 } 179 180 _, r.err = UnmarshalRecord(r.Data, r.Record) 181 if r.err != nil { 182 return false 183 } 184 r.Record.Name = r.Name 185 r.Record.Timestamp = r.Timestamp 186 return true 187 } 188 189 // Err returns error from last Read. We swallow io.EOF to make it easier 190 // to use 191 func (r *Reader) Err() error { 192 return r.err 193 }