github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/pkg/schema/filereader.go (about) 1 /* 2 Copyright 2011 Google Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package schema 18 19 import ( 20 "errors" 21 "fmt" 22 "io" 23 "io/ioutil" 24 "log" 25 "os" 26 "strings" 27 "sync" 28 29 "camlistore.org/pkg/blob" 30 "camlistore.org/pkg/singleflight" 31 "camlistore.org/pkg/types" 32 ) 33 34 const closedIndex = -1 35 36 var errClosed = errors.New("filereader is closed") 37 38 // A FileReader reads the bytes of "file" and "bytes" schema blobrefs. 39 type FileReader struct { 40 // Immutable stuff: 41 *io.SectionReader // provides Read, etc. 42 parent *FileReader // or nil for sub-region readers to find the ssm map in getSuperset 43 rootOff int64 // this FileReader's offset from the root 44 fetcher blob.SeekFetcher 45 ss *superset 46 size int64 // total number of bytes 47 48 sfg singleflight.Group // for loading blobrefs for ssm 49 50 ssmmu sync.Mutex // guards ssm 51 ssm map[string]*superset // blobref -> superset 52 } 53 54 // NewFileReader returns a new FileReader reading the contents of fileBlobRef, 55 // fetching blobs from fetcher. The fileBlobRef must be of a "bytes" or "file" 56 // schema blob. 57 // 58 // The caller should call Close on the FileReader when done reading. 59 func NewFileReader(fetcher blob.SeekFetcher, fileBlobRef blob.Ref) (*FileReader, error) { 60 // TODO(bradfitz): make this take a blobref.FetcherAt instead? 61 // TODO(bradfitz): rename this into bytes reader? but for now it's still 62 // named FileReader, but can also read a "bytes" schema. 63 if !fileBlobRef.Valid() { 64 return nil, errors.New("schema/filereader: NewFileReader blobref invalid") 65 } 66 rsc, _, err := fetcher.Fetch(fileBlobRef) 67 if err != nil { 68 return nil, fmt.Errorf("schema/filereader: fetching file schema blob: %v", err) 69 } 70 defer rsc.Close() 71 ss, err := parseSuperset(rsc) 72 if err != nil { 73 return nil, fmt.Errorf("schema/filereader: decoding file schema blob: %v", err) 74 } 75 if ss.Type != "file" && ss.Type != "bytes" { 76 return nil, fmt.Errorf("schema/filereader: expected \"file\" or \"bytes\" schema blob, got %q", ss.Type) 77 } 78 fr, err := ss.NewFileReader(fetcher) 79 if err != nil { 80 return nil, fmt.Errorf("schema/filereader: creating FileReader for %s: %v", fileBlobRef, err) 81 } 82 return fr, nil 83 } 84 85 func (b *Blob) NewFileReader(fetcher blob.SeekFetcher) (*FileReader, error) { 86 return b.ss.NewFileReader(fetcher) 87 } 88 89 // NewFileReader returns a new FileReader, reading bytes and blobs 90 // from the provided fetcher. 91 // 92 // NewFileReader does no fetch operation on the fetcher itself. The 93 // fetcher is only used in subsequent read operations. 94 // 95 // An error is only returned if the type of the superset is not either 96 // "file" or "bytes". 97 func (ss *superset) NewFileReader(fetcher blob.SeekFetcher) (*FileReader, error) { 98 if ss.Type != "file" && ss.Type != "bytes" { 99 return nil, fmt.Errorf("schema/filereader: Superset not of type \"file\" or \"bytes\"") 100 } 101 size := int64(ss.SumPartsSize()) 102 fr := &FileReader{ 103 fetcher: fetcher, 104 ss: ss, 105 size: size, 106 ssm: make(map[string]*superset), 107 } 108 fr.SectionReader = io.NewSectionReader(fr, 0, size) 109 return fr, nil 110 } 111 112 // LoadAllChunks causes all chunks of the file to be loaded as quickly 113 // as possible. The contents are immediately discarded, so it is 114 // assumed that the fetcher is a caching fetcher. 115 func (fr *FileReader) LoadAllChunks() { 116 offsetc := make(chan int64, 16) 117 go func() { 118 for off := range offsetc { 119 go func(off int64) { 120 rc, err := fr.readerForOffset(off) 121 if err == nil { 122 defer rc.Close() 123 var b [1]byte 124 rc.Read(b[:]) // fault in the blob 125 } 126 }(off) 127 } 128 }() 129 go fr.GetChunkOffsets(offsetc) 130 } 131 132 // FileSchema returns the reader's schema superset. Don't mutate it. 133 func (fr *FileReader) FileSchema() *superset { 134 return fr.ss 135 } 136 137 func (fr *FileReader) Close() error { 138 // TODO: close cached blobs? 139 return nil 140 } 141 142 var _ interface { 143 io.ReaderAt 144 io.Reader 145 io.Closer 146 Size() int64 147 } = (*FileReader)(nil) 148 149 func (fr *FileReader) ReadAt(p []byte, offset int64) (n int, err error) { 150 if offset < 0 { 151 return 0, errors.New("schema/filereader: negative offset") 152 } 153 if offset >= fr.Size() { 154 return 0, io.EOF 155 } 156 want := len(p) 157 for len(p) > 0 && err == nil { 158 var rc io.ReadCloser 159 rc, err = fr.readerForOffset(offset) 160 if err != nil { 161 return 162 } 163 var n1 int64 // never bigger than an int 164 n1, err = io.CopyN(&sliceWriter{p}, rc, int64(len(p))) 165 rc.Close() 166 if err == io.EOF { 167 err = nil 168 } 169 if n1 == 0 { 170 break 171 } 172 p = p[n1:] 173 offset += int64(n1) 174 n += int(n1) 175 } 176 if n < want && err == nil { 177 err = io.ErrUnexpectedEOF 178 } 179 return n, err 180 } 181 182 // GetChunkOffsets sends c each of the file's chunk offsets. 183 // The offsets are not necessarily sent in order, and all ranges of the file 184 // are not necessarily represented if the file contains zero holes. 185 // The channel c is closed before the function returns, regardless of error. 186 func (fr *FileReader) GetChunkOffsets(c chan<- int64) error { 187 defer close(c) 188 firstErrc := make(chan error, 1) 189 return fr.sendPartsChunks(c, firstErrc, 0, fr.ss.Parts) 190 } 191 192 // firstErrc is a communication mechanism amongst all outstanding 193 // superset-fetching goroutines to see if anybody else has failed. If 194 // so (a non-blocking read returns something), then the recursive call 195 // to sendPartsChunks is skipped, hopefully preventing unnecessary 196 // work. Whenever a caller receives on firstErrc, it should also send 197 // back to it. It's buffered. 198 func (fr *FileReader) sendPartsChunks(c chan<- int64, firstErrc chan error, off int64, parts []*BytesPart) error { 199 var errcs []chan error 200 for _, p := range parts { 201 switch { 202 case p.BlobRef.Valid() && p.BytesRef.Valid(): 203 return fmt.Errorf("part illegally contained both a blobRef and bytesRef") 204 case !p.BlobRef.Valid() && !p.BytesRef.Valid(): 205 // Don't send 206 case p.BlobRef.Valid(): 207 c <- off 208 case p.BytesRef.Valid(): 209 errc := make(chan error, 1) 210 errcs = append(errcs, errc) 211 br := p.BytesRef 212 go func(off int64) (err error) { 213 defer func() { 214 errc <- err 215 if err != nil { 216 select { 217 case firstErrc <- err: // pump 218 default: 219 } 220 } 221 }() 222 select { 223 case err = <-firstErrc: 224 // There was already an error elsewhere in the file. 225 // Avoid doing more work. 226 return 227 default: 228 ss, err := fr.getSuperset(br) 229 if err != nil { 230 return err 231 } 232 return fr.sendPartsChunks(c, firstErrc, off, ss.Parts) 233 } 234 }(off) 235 } 236 off += int64(p.Size) 237 } 238 239 var retErr error 240 for _, errc := range errcs { 241 if err := <-errc; err != nil && retErr == nil { 242 retErr = err 243 } 244 } 245 return retErr 246 } 247 248 type sliceWriter struct { 249 dst []byte 250 } 251 252 func (sw *sliceWriter) Write(p []byte) (n int, err error) { 253 n = copy(sw.dst, p) 254 sw.dst = sw.dst[n:] 255 return n, nil 256 } 257 258 var eofReader io.ReadCloser = ioutil.NopCloser(strings.NewReader("")) 259 260 func (fr *FileReader) rootReader() *FileReader { 261 if fr.parent != nil { 262 return fr.parent.rootReader() 263 } 264 return fr 265 } 266 267 func (fr *FileReader) getSuperset(br blob.Ref) (*superset, error) { 268 if root := fr.rootReader(); root != fr { 269 return root.getSuperset(br) 270 } 271 brStr := br.String() 272 ssi, err := fr.sfg.Do(brStr, func() (interface{}, error) { 273 fr.ssmmu.Lock() 274 ss, ok := fr.ssm[brStr] 275 fr.ssmmu.Unlock() 276 if ok { 277 return ss, nil 278 } 279 rsc, _, err := fr.fetcher.Fetch(br) 280 if err != nil { 281 return nil, fmt.Errorf("schema/filereader: fetching file schema blob: %v", err) 282 } 283 defer rsc.Close() 284 ss, err = parseSuperset(rsc) 285 if err != nil { 286 return nil, err 287 } 288 fr.ssmmu.Lock() 289 defer fr.ssmmu.Unlock() 290 fr.ssm[brStr] = ss 291 return ss, nil 292 }) 293 if err != nil { 294 return nil, err 295 } 296 return ssi.(*superset), nil 297 } 298 299 var debug = os.Getenv("CAMLI_DEBUG") != "" 300 301 // readerForOffset returns a ReadCloser that reads some number of bytes and then EOF 302 // from the provided offset. Seeing EOF doesn't mean the end of the whole file; just the 303 // chunk at that offset. The caller must close the ReadCloser when done reading. 304 func (fr *FileReader) readerForOffset(off int64) (io.ReadCloser, error) { 305 if debug { 306 log.Printf("(%p) readerForOffset %d + %d = %d", fr, fr.rootOff, off, fr.rootOff+off) 307 } 308 if off < 0 { 309 panic("negative offset") 310 } 311 if off >= fr.size { 312 return eofReader, nil 313 } 314 offRemain := off 315 var skipped int64 316 parts := fr.ss.Parts 317 for len(parts) > 0 && parts[0].Size <= uint64(offRemain) { 318 offRemain -= int64(parts[0].Size) 319 skipped += int64(parts[0].Size) 320 parts = parts[1:] 321 } 322 if len(parts) == 0 { 323 return eofReader, nil 324 } 325 p0 := parts[0] 326 var rsc types.ReadSeekCloser 327 var err error 328 switch { 329 case p0.BlobRef.Valid() && p0.BytesRef.Valid(): 330 return nil, fmt.Errorf("part illegally contained both a blobRef and bytesRef") 331 case !p0.BlobRef.Valid() && !p0.BytesRef.Valid(): 332 return &nZeros{int(p0.Size - uint64(offRemain))}, nil 333 case p0.BlobRef.Valid(): 334 rsc, _, err = fr.fetcher.Fetch(p0.BlobRef) 335 case p0.BytesRef.Valid(): 336 var ss *superset 337 ss, err = fr.getSuperset(p0.BytesRef) 338 if err != nil { 339 return nil, err 340 } 341 rsc, err = ss.NewFileReader(fr.fetcher) 342 if err == nil { 343 subFR := rsc.(*FileReader) 344 subFR.parent = fr.rootReader() 345 subFR.rootOff = fr.rootOff + skipped 346 } 347 } 348 if err != nil { 349 return nil, err 350 } 351 offRemain += int64(p0.Offset) 352 if offRemain > 0 { 353 newPos, err := rsc.Seek(offRemain, os.SEEK_SET) 354 if err != nil { 355 return nil, err 356 } 357 if newPos != offRemain { 358 panic("Seek didn't work") 359 } 360 } 361 return struct { 362 io.Reader 363 io.Closer 364 }{ 365 io.LimitReader(rsc, int64(p0.Size)), 366 rsc, 367 }, nil 368 } 369 370 // nZeros is a ReadCloser that reads remain zeros before EOF. 371 type nZeros struct { 372 remain int 373 } 374 375 func (z *nZeros) Read(p []byte) (n int, err error) { 376 for len(p) > 0 && z.remain > 0 { 377 p[0] = 0 378 n++ 379 z.remain-- 380 } 381 if n == 0 && z.remain == 0 { 382 err = io.EOF 383 } 384 return 385 } 386 387 func (*nZeros) Close() error { return nil }