github.com/keltia/go-ipfs@v0.3.8-0.20150909044612-210793031c63/unixfs/io/dagreader.go (about)

     1  package io
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  
    10  	proto "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/gogo/protobuf/proto"
    11  	"github.com/ipfs/go-ipfs/Godeps/_workspace/src/golang.org/x/net/context"
    12  
    13  	mdag "github.com/ipfs/go-ipfs/merkledag"
    14  	ft "github.com/ipfs/go-ipfs/unixfs"
    15  	ftpb "github.com/ipfs/go-ipfs/unixfs/pb"
    16  )
    17  
    18  var ErrIsDir = errors.New("this dag node is a directory")
    19  
    20  var ErrCantReadSymlinks = errors.New("cannot currently read symlinks")
    21  
    22  // DagReader provides a way to easily read the data contained in a dag.
    23  type DagReader struct {
    24  	serv mdag.DAGService
    25  
    26  	// the node being read
    27  	node *mdag.Node
    28  
    29  	// cached protobuf structure from node.Data
    30  	pbdata *ftpb.Data
    31  
    32  	// the current data buffer to be read from
    33  	// will either be a bytes.Reader or a child DagReader
    34  	buf ReadSeekCloser
    35  
    36  	// NodeGetters for each of 'nodes' child links
    37  	promises []mdag.NodeGetter
    38  
    39  	// the index of the child link currently being read from
    40  	linkPosition int
    41  
    42  	// current offset for the read head within the 'file'
    43  	offset int64
    44  
    45  	// Our context
    46  	ctx context.Context
    47  
    48  	// context cancel for children
    49  	cancel func()
    50  }
    51  
    52  type ReadSeekCloser interface {
    53  	io.Reader
    54  	io.Seeker
    55  	io.Closer
    56  	io.WriterTo
    57  }
    58  
    59  // NewDagReader creates a new reader object that reads the data represented by the given
    60  // node, using the passed in DAGService for data retreival
    61  func NewDagReader(ctx context.Context, n *mdag.Node, serv mdag.DAGService) (*DagReader, error) {
    62  	pb := new(ftpb.Data)
    63  	if err := proto.Unmarshal(n.Data, pb); err != nil {
    64  		return nil, err
    65  	}
    66  
    67  	switch pb.GetType() {
    68  	case ftpb.Data_Directory:
    69  		// Dont allow reading directories
    70  		return nil, ErrIsDir
    71  	case ftpb.Data_Raw:
    72  		fallthrough
    73  	case ftpb.Data_File:
    74  		return NewDataFileReader(ctx, n, pb, serv), nil
    75  	case ftpb.Data_Metadata:
    76  		if len(n.Links) == 0 {
    77  			return nil, errors.New("incorrectly formatted metadata object")
    78  		}
    79  		child, err := n.Links[0].GetNode(ctx, serv)
    80  		if err != nil {
    81  			return nil, err
    82  		}
    83  		return NewDagReader(ctx, child, serv)
    84  	case ftpb.Data_Symlink:
    85  		return nil, ErrCantReadSymlinks
    86  	default:
    87  		return nil, ft.ErrUnrecognizedType
    88  	}
    89  }
    90  
    91  func NewDataFileReader(ctx context.Context, n *mdag.Node, pb *ftpb.Data, serv mdag.DAGService) *DagReader {
    92  	fctx, cancel := context.WithCancel(ctx)
    93  	promises := serv.GetDAG(fctx, n)
    94  	return &DagReader{
    95  		node:     n,
    96  		serv:     serv,
    97  		buf:      NewRSNCFromBytes(pb.GetData()),
    98  		promises: promises,
    99  		ctx:      fctx,
   100  		cancel:   cancel,
   101  		pbdata:   pb,
   102  	}
   103  }
   104  
   105  // precalcNextBuf follows the next link in line and loads it from the DAGService,
   106  // setting the next buffer to read from
   107  func (dr *DagReader) precalcNextBuf(ctx context.Context) error {
   108  	dr.buf.Close() // Just to make sure
   109  	if dr.linkPosition >= len(dr.promises) {
   110  		return io.EOF
   111  	}
   112  
   113  	nxt, err := dr.promises[dr.linkPosition].Get(ctx)
   114  	if err != nil {
   115  		return err
   116  	}
   117  	dr.linkPosition++
   118  
   119  	pb := new(ftpb.Data)
   120  	err = proto.Unmarshal(nxt.Data, pb)
   121  	if err != nil {
   122  		return fmt.Errorf("incorrectly formatted protobuf: %s", err)
   123  	}
   124  
   125  	switch pb.GetType() {
   126  	case ftpb.Data_Directory:
   127  		// A directory should not exist within a file
   128  		return ft.ErrInvalidDirLocation
   129  	case ftpb.Data_File:
   130  		dr.buf = NewDataFileReader(dr.ctx, nxt, pb, dr.serv)
   131  		return nil
   132  	case ftpb.Data_Raw:
   133  		dr.buf = NewRSNCFromBytes(pb.GetData())
   134  		return nil
   135  	case ftpb.Data_Metadata:
   136  		return errors.New("Shouldnt have had metadata object inside file")
   137  	case ftpb.Data_Symlink:
   138  		return errors.New("shouldnt have had symlink inside file")
   139  	default:
   140  		return ft.ErrUnrecognizedType
   141  	}
   142  }
   143  
   144  // Size return the total length of the data from the DAG structured file.
   145  func (dr *DagReader) Size() uint64 {
   146  	return dr.pbdata.GetFilesize()
   147  }
   148  
   149  // Read reads data from the DAG structured file
   150  func (dr *DagReader) Read(b []byte) (int, error) {
   151  	return dr.CtxReadFull(dr.ctx, b)
   152  }
   153  
   154  // CtxReadFull reads data from the DAG structured file
   155  func (dr *DagReader) CtxReadFull(ctx context.Context, b []byte) (int, error) {
   156  	// If no cached buffer, load one
   157  	total := 0
   158  	for {
   159  		// Attempt to fill bytes from cached buffer
   160  		n, err := dr.buf.Read(b[total:])
   161  		total += n
   162  		dr.offset += int64(n)
   163  		if err != nil {
   164  			// EOF is expected
   165  			if err != io.EOF {
   166  				return total, err
   167  			}
   168  		}
   169  
   170  		// If weve read enough bytes, return
   171  		if total == len(b) {
   172  			return total, nil
   173  		}
   174  
   175  		// Otherwise, load up the next block
   176  		err = dr.precalcNextBuf(ctx)
   177  		if err != nil {
   178  			return total, err
   179  		}
   180  	}
   181  }
   182  
   183  func (dr *DagReader) WriteTo(w io.Writer) (int64, error) {
   184  	// If no cached buffer, load one
   185  	total := int64(0)
   186  	for {
   187  		// Attempt to write bytes from cached buffer
   188  		n, err := dr.buf.WriteTo(w)
   189  		total += n
   190  		dr.offset += n
   191  		if err != nil {
   192  			if err != io.EOF {
   193  				return total, err
   194  			}
   195  		}
   196  
   197  		// Otherwise, load up the next block
   198  		err = dr.precalcNextBuf(dr.ctx)
   199  		if err != nil {
   200  			if err == io.EOF {
   201  				return total, nil
   202  			}
   203  			return total, err
   204  		}
   205  	}
   206  }
   207  
   208  func (dr *DagReader) Close() error {
   209  	dr.cancel()
   210  	return nil
   211  }
   212  
   213  // Seek implements io.Seeker, and will seek to a given offset in the file
   214  // interface matches standard unix seek
   215  // TODO: check if we can do relative seeks, to reduce the amount of dagreader
   216  // recreations that need to happen.
   217  func (dr *DagReader) Seek(offset int64, whence int) (int64, error) {
   218  	switch whence {
   219  	case os.SEEK_SET:
   220  		if offset < 0 {
   221  			return -1, errors.New("Invalid offset")
   222  		}
   223  
   224  		// Grab cached protobuf object (solely to make code look cleaner)
   225  		pb := dr.pbdata
   226  
   227  		// left represents the number of bytes remaining to seek to (from beginning)
   228  		left := offset
   229  		if int64(len(pb.Data)) >= offset {
   230  			// Close current buf to close potential child dagreader
   231  			dr.buf.Close()
   232  			dr.buf = NewRSNCFromBytes(pb.GetData()[offset:])
   233  
   234  			// start reading links from the beginning
   235  			dr.linkPosition = 0
   236  			dr.offset = offset
   237  			return offset, nil
   238  		} else {
   239  			// skip past root block data
   240  			left -= int64(len(pb.Data))
   241  		}
   242  
   243  		// iterate through links and find where we need to be
   244  		for i := 0; i < len(pb.Blocksizes); i++ {
   245  			if pb.Blocksizes[i] > uint64(left) {
   246  				dr.linkPosition = i
   247  				break
   248  			} else {
   249  				left -= int64(pb.Blocksizes[i])
   250  			}
   251  		}
   252  
   253  		// start sub-block request
   254  		err := dr.precalcNextBuf(dr.ctx)
   255  		if err != nil {
   256  			return 0, err
   257  		}
   258  
   259  		// set proper offset within child readseeker
   260  		n, err := dr.buf.Seek(left, os.SEEK_SET)
   261  		if err != nil {
   262  			return -1, err
   263  		}
   264  
   265  		// sanity
   266  		left -= n
   267  		if left != 0 {
   268  			return -1, errors.New("failed to seek properly")
   269  		}
   270  		dr.offset = offset
   271  		return offset, nil
   272  	case os.SEEK_CUR:
   273  		// TODO: be smarter here
   274  		noffset := dr.offset + offset
   275  		return dr.Seek(noffset, os.SEEK_SET)
   276  	case os.SEEK_END:
   277  		noffset := int64(dr.pbdata.GetFilesize()) - offset
   278  		return dr.Seek(noffset, os.SEEK_SET)
   279  	default:
   280  		return 0, errors.New("invalid whence")
   281  	}
   282  	return 0, nil
   283  }
   284  
   285  // readSeekNopCloser wraps a bytes.Reader to implement ReadSeekCloser
   286  type readSeekNopCloser struct {
   287  	*bytes.Reader
   288  }
   289  
   290  func NewRSNCFromBytes(b []byte) ReadSeekCloser {
   291  	return &readSeekNopCloser{bytes.NewReader(b)}
   292  }
   293  
   294  func (r *readSeekNopCloser) Close() error { return nil }