github.com/keltia/go-ipfs@v0.3.8-0.20150909044612-210793031c63/unixfs/mod/dagmodifier.go (about)

     1  package mod
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"io"
     7  	"os"
     8  
     9  	proto "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/gogo/protobuf/proto"
    10  	mh "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/jbenet/go-multihash"
    11  	context "github.com/ipfs/go-ipfs/Godeps/_workspace/src/golang.org/x/net/context"
    12  
    13  	key "github.com/ipfs/go-ipfs/blocks/key"
    14  	imp "github.com/ipfs/go-ipfs/importer"
    15  	chunk "github.com/ipfs/go-ipfs/importer/chunk"
    16  	help "github.com/ipfs/go-ipfs/importer/helpers"
    17  	trickle "github.com/ipfs/go-ipfs/importer/trickle"
    18  	mdag "github.com/ipfs/go-ipfs/merkledag"
    19  	pin "github.com/ipfs/go-ipfs/pin"
    20  	ft "github.com/ipfs/go-ipfs/unixfs"
    21  	uio "github.com/ipfs/go-ipfs/unixfs/io"
    22  	u "github.com/ipfs/go-ipfs/util"
    23  )
    24  
    25  var ErrSeekFail = errors.New("failed to seek properly")
    26  var ErrSeekEndNotImpl = errors.New("SEEK_END currently not implemented")
    27  var ErrUnrecognizedWhence = errors.New("unrecognized whence")
    28  
    29  // 2MB
    30  var writebufferSize = 1 << 21
    31  
    32  var log = u.Logger("dagio")
    33  
    34  // DagModifier is the only struct licensed and able to correctly
    35  // perform surgery on a DAG 'file'
    36  // Dear god, please rename this to something more pleasant
    37  type DagModifier struct {
    38  	dagserv mdag.DAGService
    39  	curNode *mdag.Node
    40  	mp      pin.ManualPinner
    41  
    42  	splitter   chunk.SplitterGen
    43  	ctx        context.Context
    44  	readCancel func()
    45  
    46  	writeStart uint64
    47  	curWrOff   uint64
    48  	wrBuf      *bytes.Buffer
    49  
    50  	read *uio.DagReader
    51  }
    52  
    53  func NewDagModifier(ctx context.Context, from *mdag.Node, serv mdag.DAGService, mp pin.ManualPinner, spl chunk.SplitterGen) (*DagModifier, error) {
    54  	return &DagModifier{
    55  		curNode:  from.Copy(),
    56  		dagserv:  serv,
    57  		splitter: spl,
    58  		ctx:      ctx,
    59  		mp:       mp,
    60  	}, nil
    61  }
    62  
    63  // WriteAt will modify a dag file in place
    64  func (dm *DagModifier) WriteAt(b []byte, offset int64) (int, error) {
    65  	// TODO: this is currently VERY inneficient
    66  	// each write that happens at an offset other than the current one causes a
    67  	// flush to disk, and dag rewrite
    68  	if offset == int64(dm.writeStart) && dm.wrBuf != nil {
    69  		// If we would overwrite the previous write
    70  		if len(b) >= dm.wrBuf.Len() {
    71  			dm.wrBuf.Reset()
    72  		}
    73  	} else if uint64(offset) != dm.curWrOff {
    74  		size, err := dm.Size()
    75  		if err != nil {
    76  			return 0, err
    77  		}
    78  		if offset > size {
    79  			err := dm.expandSparse(offset - size)
    80  			if err != nil {
    81  				return 0, err
    82  			}
    83  		}
    84  
    85  		err = dm.Sync()
    86  		if err != nil {
    87  			return 0, err
    88  		}
    89  		dm.writeStart = uint64(offset)
    90  	}
    91  
    92  	return dm.Write(b)
    93  }
    94  
    95  // A reader that just returns zeros
    96  type zeroReader struct{}
    97  
    98  func (zr zeroReader) Read(b []byte) (int, error) {
    99  	for i := range b {
   100  		b[i] = 0
   101  	}
   102  	return len(b), nil
   103  }
   104  
   105  // expandSparse grows the file with zero blocks of 4096
   106  // A small blocksize is chosen to aid in deduplication
   107  func (dm *DagModifier) expandSparse(size int64) error {
   108  	r := io.LimitReader(zeroReader{}, size)
   109  	spl := chunk.NewSizeSplitter(r, 4096)
   110  	blks, errs := chunk.Chan(spl)
   111  	nnode, err := dm.appendData(dm.curNode, blks, errs)
   112  	if err != nil {
   113  		return err
   114  	}
   115  	_, err = dm.dagserv.Add(nnode)
   116  	if err != nil {
   117  		return err
   118  	}
   119  	dm.curNode = nnode
   120  	return nil
   121  }
   122  
   123  // Write continues writing to the dag at the current offset
   124  func (dm *DagModifier) Write(b []byte) (int, error) {
   125  	if dm.read != nil {
   126  		dm.read = nil
   127  	}
   128  	if dm.wrBuf == nil {
   129  		dm.wrBuf = new(bytes.Buffer)
   130  	}
   131  
   132  	n, err := dm.wrBuf.Write(b)
   133  	if err != nil {
   134  		return n, err
   135  	}
   136  	dm.curWrOff += uint64(n)
   137  	if dm.wrBuf.Len() > writebufferSize {
   138  		err := dm.Sync()
   139  		if err != nil {
   140  			return n, err
   141  		}
   142  	}
   143  	return n, nil
   144  }
   145  
   146  func (dm *DagModifier) Size() (int64, error) {
   147  	pbn, err := ft.FromBytes(dm.curNode.Data)
   148  	if err != nil {
   149  		return 0, err
   150  	}
   151  
   152  	if dm.wrBuf != nil {
   153  		if uint64(dm.wrBuf.Len())+dm.writeStart > pbn.GetFilesize() {
   154  			return int64(dm.wrBuf.Len()) + int64(dm.writeStart), nil
   155  		}
   156  	}
   157  
   158  	return int64(pbn.GetFilesize()), nil
   159  }
   160  
   161  // Sync writes changes to this dag to disk
   162  func (dm *DagModifier) Sync() error {
   163  	// No buffer? Nothing to do
   164  	if dm.wrBuf == nil {
   165  		return nil
   166  	}
   167  
   168  	// If we have an active reader, kill it
   169  	if dm.read != nil {
   170  		dm.read = nil
   171  		dm.readCancel()
   172  	}
   173  
   174  	// Number of bytes we're going to write
   175  	buflen := dm.wrBuf.Len()
   176  
   177  	// Grab key for unpinning after mod operation
   178  	curk, err := dm.curNode.Key()
   179  	if err != nil {
   180  		return err
   181  	}
   182  
   183  	// overwrite existing dag nodes
   184  	thisk, done, err := dm.modifyDag(dm.curNode, dm.writeStart, dm.wrBuf)
   185  	if err != nil {
   186  		return err
   187  	}
   188  
   189  	nd, err := dm.dagserv.Get(dm.ctx, thisk)
   190  	if err != nil {
   191  		return err
   192  	}
   193  
   194  	dm.curNode = nd
   195  
   196  	// need to write past end of current dag
   197  	if !done {
   198  		blks, errs := chunk.Chan(dm.splitter(dm.wrBuf))
   199  		nd, err = dm.appendData(dm.curNode, blks, errs)
   200  		if err != nil {
   201  			return err
   202  		}
   203  
   204  		thisk, err = dm.dagserv.Add(nd)
   205  		if err != nil {
   206  			return err
   207  		}
   208  
   209  		dm.curNode = nd
   210  	}
   211  
   212  	// Finalize correct pinning, and flush pinner
   213  	dm.mp.PinWithMode(thisk, pin.Recursive)
   214  	dm.mp.RemovePinWithMode(curk, pin.Recursive)
   215  	err = dm.mp.Flush()
   216  	if err != nil {
   217  		return err
   218  	}
   219  
   220  	dm.writeStart += uint64(buflen)
   221  
   222  	dm.wrBuf = nil
   223  	return nil
   224  }
   225  
   226  // modifyDag writes the data in 'data' over the data in 'node' starting at 'offset'
   227  // returns the new key of the passed in node and whether or not all the data in the reader
   228  // has been consumed.
   229  func (dm *DagModifier) modifyDag(node *mdag.Node, offset uint64, data io.Reader) (key.Key, bool, error) {
   230  	f, err := ft.FromBytes(node.Data)
   231  	if err != nil {
   232  		return "", false, err
   233  	}
   234  
   235  	// If we've reached a leaf node.
   236  	if len(node.Links) == 0 {
   237  		n, err := data.Read(f.Data[offset:])
   238  		if err != nil && err != io.EOF {
   239  			return "", false, err
   240  		}
   241  
   242  		// Update newly written node..
   243  		b, err := proto.Marshal(f)
   244  		if err != nil {
   245  			return "", false, err
   246  		}
   247  
   248  		nd := &mdag.Node{Data: b}
   249  		k, err := dm.dagserv.Add(nd)
   250  		if err != nil {
   251  			return "", false, err
   252  		}
   253  
   254  		// Hey look! we're done!
   255  		var done bool
   256  		if n < len(f.Data[offset:]) {
   257  			done = true
   258  		}
   259  
   260  		return k, done, nil
   261  	}
   262  
   263  	var cur uint64
   264  	var done bool
   265  	for i, bs := range f.GetBlocksizes() {
   266  		// We found the correct child to write into
   267  		if cur+bs > offset {
   268  			// Unpin block
   269  			ckey := key.Key(node.Links[i].Hash)
   270  			dm.mp.RemovePinWithMode(ckey, pin.Indirect)
   271  
   272  			child, err := node.Links[i].GetNode(dm.ctx, dm.dagserv)
   273  			if err != nil {
   274  				return "", false, err
   275  			}
   276  			k, sdone, err := dm.modifyDag(child, offset-cur, data)
   277  			if err != nil {
   278  				return "", false, err
   279  			}
   280  
   281  			// pin the new node
   282  			dm.mp.PinWithMode(k, pin.Indirect)
   283  
   284  			offset += bs
   285  			node.Links[i].Hash = mh.Multihash(k)
   286  
   287  			// Recache serialized node
   288  			_, err = node.Encoded(true)
   289  			if err != nil {
   290  				return "", false, err
   291  			}
   292  
   293  			if sdone {
   294  				// No more bytes to write!
   295  				done = true
   296  				break
   297  			}
   298  			offset = cur + bs
   299  		}
   300  		cur += bs
   301  	}
   302  
   303  	k, err := dm.dagserv.Add(node)
   304  	return k, done, err
   305  }
   306  
   307  // appendData appends the blocks from the given chan to the end of this dag
   308  func (dm *DagModifier) appendData(node *mdag.Node, blks <-chan []byte, errs <-chan error) (*mdag.Node, error) {
   309  	dbp := &help.DagBuilderParams{
   310  		Dagserv:  dm.dagserv,
   311  		Maxlinks: help.DefaultLinksPerBlock,
   312  		NodeCB:   imp.BasicPinnerCB(dm.mp),
   313  	}
   314  
   315  	return trickle.TrickleAppend(dm.ctx, node, dbp.New(blks, errs))
   316  }
   317  
   318  // Read data from this dag starting at the current offset
   319  func (dm *DagModifier) Read(b []byte) (int, error) {
   320  	err := dm.readPrep()
   321  	if err != nil {
   322  		return 0, err
   323  	}
   324  
   325  	n, err := dm.read.Read(b)
   326  	dm.curWrOff += uint64(n)
   327  	return n, err
   328  }
   329  
   330  func (dm *DagModifier) readPrep() error {
   331  	err := dm.Sync()
   332  	if err != nil {
   333  		return err
   334  	}
   335  
   336  	if dm.read == nil {
   337  		ctx, cancel := context.WithCancel(dm.ctx)
   338  		dr, err := uio.NewDagReader(ctx, dm.curNode, dm.dagserv)
   339  		if err != nil {
   340  			return err
   341  		}
   342  
   343  		i, err := dr.Seek(int64(dm.curWrOff), os.SEEK_SET)
   344  		if err != nil {
   345  			return err
   346  		}
   347  
   348  		if i != int64(dm.curWrOff) {
   349  			return ErrSeekFail
   350  		}
   351  
   352  		dm.readCancel = cancel
   353  		dm.read = dr
   354  	}
   355  
   356  	return nil
   357  }
   358  
   359  // Read data from this dag starting at the current offset
   360  func (dm *DagModifier) CtxReadFull(ctx context.Context, b []byte) (int, error) {
   361  	err := dm.readPrep()
   362  	if err != nil {
   363  		return 0, err
   364  	}
   365  
   366  	n, err := dm.read.CtxReadFull(ctx, b)
   367  	dm.curWrOff += uint64(n)
   368  	return n, err
   369  }
   370  
   371  // GetNode gets the modified DAG Node
   372  func (dm *DagModifier) GetNode() (*mdag.Node, error) {
   373  	err := dm.Sync()
   374  	if err != nil {
   375  		return nil, err
   376  	}
   377  	return dm.curNode.Copy(), nil
   378  }
   379  
   380  // HasChanges returned whether or not there are unflushed changes to this dag
   381  func (dm *DagModifier) HasChanges() bool {
   382  	return dm.wrBuf != nil
   383  }
   384  
   385  func (dm *DagModifier) Seek(offset int64, whence int) (int64, error) {
   386  	err := dm.Sync()
   387  	if err != nil {
   388  		return 0, err
   389  	}
   390  
   391  	switch whence {
   392  	case os.SEEK_CUR:
   393  		dm.curWrOff += uint64(offset)
   394  		dm.writeStart = dm.curWrOff
   395  	case os.SEEK_SET:
   396  		dm.curWrOff = uint64(offset)
   397  		dm.writeStart = uint64(offset)
   398  	case os.SEEK_END:
   399  		return 0, ErrSeekEndNotImpl
   400  	default:
   401  		return 0, ErrUnrecognizedWhence
   402  	}
   403  
   404  	if dm.read != nil {
   405  		_, err = dm.read.Seek(offset, whence)
   406  		if err != nil {
   407  			return 0, err
   408  		}
   409  	}
   410  
   411  	return int64(dm.curWrOff), nil
   412  }
   413  
   414  func (dm *DagModifier) Truncate(size int64) error {
   415  	err := dm.Sync()
   416  	if err != nil {
   417  		return err
   418  	}
   419  
   420  	realSize, err := dm.Size()
   421  	if err != nil {
   422  		return err
   423  	}
   424  
   425  	// Truncate can also be used to expand the file
   426  	if size > int64(realSize) {
   427  		return dm.expandSparse(int64(size) - realSize)
   428  	}
   429  
   430  	nnode, err := dagTruncate(dm.ctx, dm.curNode, uint64(size), dm.dagserv)
   431  	if err != nil {
   432  		return err
   433  	}
   434  
   435  	_, err = dm.dagserv.Add(nnode)
   436  	if err != nil {
   437  		return err
   438  	}
   439  
   440  	dm.curNode = nnode
   441  	return nil
   442  }
   443  
   444  // dagTruncate truncates the given node to 'size' and returns the modified Node
   445  func dagTruncate(ctx context.Context, nd *mdag.Node, size uint64, ds mdag.DAGService) (*mdag.Node, error) {
   446  	if len(nd.Links) == 0 {
   447  		// TODO: this can likely be done without marshaling and remarshaling
   448  		pbn, err := ft.FromBytes(nd.Data)
   449  		if err != nil {
   450  			return nil, err
   451  		}
   452  
   453  		nd.Data = ft.WrapData(pbn.Data[:size])
   454  		return nd, nil
   455  	}
   456  
   457  	var cur uint64
   458  	end := 0
   459  	var modified *mdag.Node
   460  	ndata := new(ft.FSNode)
   461  	for i, lnk := range nd.Links {
   462  		child, err := lnk.GetNode(ctx, ds)
   463  		if err != nil {
   464  			return nil, err
   465  		}
   466  
   467  		childsize, err := ft.DataSize(child.Data)
   468  		if err != nil {
   469  			return nil, err
   470  		}
   471  
   472  		// found the child we want to cut
   473  		if size < cur+childsize {
   474  			nchild, err := dagTruncate(ctx, child, size-cur, ds)
   475  			if err != nil {
   476  				return nil, err
   477  			}
   478  
   479  			ndata.AddBlockSize(size - cur)
   480  
   481  			modified = nchild
   482  			end = i
   483  			break
   484  		}
   485  		cur += childsize
   486  		ndata.AddBlockSize(childsize)
   487  	}
   488  
   489  	_, err := ds.Add(modified)
   490  	if err != nil {
   491  		return nil, err
   492  	}
   493  
   494  	nd.Links = nd.Links[:end]
   495  	err = nd.AddNodeLinkClean("", modified)
   496  	if err != nil {
   497  		return nil, err
   498  	}
   499  
   500  	d, err := ndata.GetBytes()
   501  	if err != nil {
   502  		return nil, err
   503  	}
   504  
   505  	nd.Data = d
   506  
   507  	// invalidate cache and recompute serialized data
   508  	_, err = nd.Encoded(true)
   509  	if err != nil {
   510  		return nil, err
   511  	}
   512  
   513  	return nd, nil
   514  }