github.com/keltia/go-ipfs@v0.3.8-0.20150909044612-210793031c63/unixfs/mod/dagmodifier.go (about) 1 package mod 2 3 import ( 4 "bytes" 5 "errors" 6 "io" 7 "os" 8 9 proto "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/gogo/protobuf/proto" 10 mh "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/jbenet/go-multihash" 11 context "github.com/ipfs/go-ipfs/Godeps/_workspace/src/golang.org/x/net/context" 12 13 key "github.com/ipfs/go-ipfs/blocks/key" 14 imp "github.com/ipfs/go-ipfs/importer" 15 chunk "github.com/ipfs/go-ipfs/importer/chunk" 16 help "github.com/ipfs/go-ipfs/importer/helpers" 17 trickle "github.com/ipfs/go-ipfs/importer/trickle" 18 mdag "github.com/ipfs/go-ipfs/merkledag" 19 pin "github.com/ipfs/go-ipfs/pin" 20 ft "github.com/ipfs/go-ipfs/unixfs" 21 uio "github.com/ipfs/go-ipfs/unixfs/io" 22 u "github.com/ipfs/go-ipfs/util" 23 ) 24 25 var ErrSeekFail = errors.New("failed to seek properly") 26 var ErrSeekEndNotImpl = errors.New("SEEK_END currently not implemented") 27 var ErrUnrecognizedWhence = errors.New("unrecognized whence") 28 29 // 2MB 30 var writebufferSize = 1 << 21 31 32 var log = u.Logger("dagio") 33 34 // DagModifier is the only struct licensed and able to correctly 35 // perform surgery on a DAG 'file' 36 // Dear god, please rename this to something more pleasant 37 type DagModifier struct { 38 dagserv mdag.DAGService 39 curNode *mdag.Node 40 mp pin.ManualPinner 41 42 splitter chunk.SplitterGen 43 ctx context.Context 44 readCancel func() 45 46 writeStart uint64 47 curWrOff uint64 48 wrBuf *bytes.Buffer 49 50 read *uio.DagReader 51 } 52 53 func NewDagModifier(ctx context.Context, from *mdag.Node, serv mdag.DAGService, mp pin.ManualPinner, spl chunk.SplitterGen) (*DagModifier, error) { 54 return &DagModifier{ 55 curNode: from.Copy(), 56 dagserv: serv, 57 splitter: spl, 58 ctx: ctx, 59 mp: mp, 60 }, nil 61 } 62 63 // WriteAt will modify a dag file in place 64 func (dm *DagModifier) WriteAt(b []byte, offset int64) (int, error) { 65 // TODO: this is currently VERY inneficient 66 // each write that happens at an offset other than the current one causes a 67 // flush to disk, and dag rewrite 68 if offset == int64(dm.writeStart) && dm.wrBuf != nil { 69 // If we would overwrite the previous write 70 if len(b) >= dm.wrBuf.Len() { 71 dm.wrBuf.Reset() 72 } 73 } else if uint64(offset) != dm.curWrOff { 74 size, err := dm.Size() 75 if err != nil { 76 return 0, err 77 } 78 if offset > size { 79 err := dm.expandSparse(offset - size) 80 if err != nil { 81 return 0, err 82 } 83 } 84 85 err = dm.Sync() 86 if err != nil { 87 return 0, err 88 } 89 dm.writeStart = uint64(offset) 90 } 91 92 return dm.Write(b) 93 } 94 95 // A reader that just returns zeros 96 type zeroReader struct{} 97 98 func (zr zeroReader) Read(b []byte) (int, error) { 99 for i := range b { 100 b[i] = 0 101 } 102 return len(b), nil 103 } 104 105 // expandSparse grows the file with zero blocks of 4096 106 // A small blocksize is chosen to aid in deduplication 107 func (dm *DagModifier) expandSparse(size int64) error { 108 r := io.LimitReader(zeroReader{}, size) 109 spl := chunk.NewSizeSplitter(r, 4096) 110 blks, errs := chunk.Chan(spl) 111 nnode, err := dm.appendData(dm.curNode, blks, errs) 112 if err != nil { 113 return err 114 } 115 _, err = dm.dagserv.Add(nnode) 116 if err != nil { 117 return err 118 } 119 dm.curNode = nnode 120 return nil 121 } 122 123 // Write continues writing to the dag at the current offset 124 func (dm *DagModifier) Write(b []byte) (int, error) { 125 if dm.read != nil { 126 dm.read = nil 127 } 128 if dm.wrBuf == nil { 129 dm.wrBuf = new(bytes.Buffer) 130 } 131 132 n, err := dm.wrBuf.Write(b) 133 if err != nil { 134 return n, err 135 } 136 dm.curWrOff += uint64(n) 137 if dm.wrBuf.Len() > writebufferSize { 138 err := dm.Sync() 139 if err != nil { 140 return n, err 141 } 142 } 143 return n, nil 144 } 145 146 func (dm *DagModifier) Size() (int64, error) { 147 pbn, err := ft.FromBytes(dm.curNode.Data) 148 if err != nil { 149 return 0, err 150 } 151 152 if dm.wrBuf != nil { 153 if uint64(dm.wrBuf.Len())+dm.writeStart > pbn.GetFilesize() { 154 return int64(dm.wrBuf.Len()) + int64(dm.writeStart), nil 155 } 156 } 157 158 return int64(pbn.GetFilesize()), nil 159 } 160 161 // Sync writes changes to this dag to disk 162 func (dm *DagModifier) Sync() error { 163 // No buffer? Nothing to do 164 if dm.wrBuf == nil { 165 return nil 166 } 167 168 // If we have an active reader, kill it 169 if dm.read != nil { 170 dm.read = nil 171 dm.readCancel() 172 } 173 174 // Number of bytes we're going to write 175 buflen := dm.wrBuf.Len() 176 177 // Grab key for unpinning after mod operation 178 curk, err := dm.curNode.Key() 179 if err != nil { 180 return err 181 } 182 183 // overwrite existing dag nodes 184 thisk, done, err := dm.modifyDag(dm.curNode, dm.writeStart, dm.wrBuf) 185 if err != nil { 186 return err 187 } 188 189 nd, err := dm.dagserv.Get(dm.ctx, thisk) 190 if err != nil { 191 return err 192 } 193 194 dm.curNode = nd 195 196 // need to write past end of current dag 197 if !done { 198 blks, errs := chunk.Chan(dm.splitter(dm.wrBuf)) 199 nd, err = dm.appendData(dm.curNode, blks, errs) 200 if err != nil { 201 return err 202 } 203 204 thisk, err = dm.dagserv.Add(nd) 205 if err != nil { 206 return err 207 } 208 209 dm.curNode = nd 210 } 211 212 // Finalize correct pinning, and flush pinner 213 dm.mp.PinWithMode(thisk, pin.Recursive) 214 dm.mp.RemovePinWithMode(curk, pin.Recursive) 215 err = dm.mp.Flush() 216 if err != nil { 217 return err 218 } 219 220 dm.writeStart += uint64(buflen) 221 222 dm.wrBuf = nil 223 return nil 224 } 225 226 // modifyDag writes the data in 'data' over the data in 'node' starting at 'offset' 227 // returns the new key of the passed in node and whether or not all the data in the reader 228 // has been consumed. 229 func (dm *DagModifier) modifyDag(node *mdag.Node, offset uint64, data io.Reader) (key.Key, bool, error) { 230 f, err := ft.FromBytes(node.Data) 231 if err != nil { 232 return "", false, err 233 } 234 235 // If we've reached a leaf node. 236 if len(node.Links) == 0 { 237 n, err := data.Read(f.Data[offset:]) 238 if err != nil && err != io.EOF { 239 return "", false, err 240 } 241 242 // Update newly written node.. 243 b, err := proto.Marshal(f) 244 if err != nil { 245 return "", false, err 246 } 247 248 nd := &mdag.Node{Data: b} 249 k, err := dm.dagserv.Add(nd) 250 if err != nil { 251 return "", false, err 252 } 253 254 // Hey look! we're done! 255 var done bool 256 if n < len(f.Data[offset:]) { 257 done = true 258 } 259 260 return k, done, nil 261 } 262 263 var cur uint64 264 var done bool 265 for i, bs := range f.GetBlocksizes() { 266 // We found the correct child to write into 267 if cur+bs > offset { 268 // Unpin block 269 ckey := key.Key(node.Links[i].Hash) 270 dm.mp.RemovePinWithMode(ckey, pin.Indirect) 271 272 child, err := node.Links[i].GetNode(dm.ctx, dm.dagserv) 273 if err != nil { 274 return "", false, err 275 } 276 k, sdone, err := dm.modifyDag(child, offset-cur, data) 277 if err != nil { 278 return "", false, err 279 } 280 281 // pin the new node 282 dm.mp.PinWithMode(k, pin.Indirect) 283 284 offset += bs 285 node.Links[i].Hash = mh.Multihash(k) 286 287 // Recache serialized node 288 _, err = node.Encoded(true) 289 if err != nil { 290 return "", false, err 291 } 292 293 if sdone { 294 // No more bytes to write! 295 done = true 296 break 297 } 298 offset = cur + bs 299 } 300 cur += bs 301 } 302 303 k, err := dm.dagserv.Add(node) 304 return k, done, err 305 } 306 307 // appendData appends the blocks from the given chan to the end of this dag 308 func (dm *DagModifier) appendData(node *mdag.Node, blks <-chan []byte, errs <-chan error) (*mdag.Node, error) { 309 dbp := &help.DagBuilderParams{ 310 Dagserv: dm.dagserv, 311 Maxlinks: help.DefaultLinksPerBlock, 312 NodeCB: imp.BasicPinnerCB(dm.mp), 313 } 314 315 return trickle.TrickleAppend(dm.ctx, node, dbp.New(blks, errs)) 316 } 317 318 // Read data from this dag starting at the current offset 319 func (dm *DagModifier) Read(b []byte) (int, error) { 320 err := dm.readPrep() 321 if err != nil { 322 return 0, err 323 } 324 325 n, err := dm.read.Read(b) 326 dm.curWrOff += uint64(n) 327 return n, err 328 } 329 330 func (dm *DagModifier) readPrep() error { 331 err := dm.Sync() 332 if err != nil { 333 return err 334 } 335 336 if dm.read == nil { 337 ctx, cancel := context.WithCancel(dm.ctx) 338 dr, err := uio.NewDagReader(ctx, dm.curNode, dm.dagserv) 339 if err != nil { 340 return err 341 } 342 343 i, err := dr.Seek(int64(dm.curWrOff), os.SEEK_SET) 344 if err != nil { 345 return err 346 } 347 348 if i != int64(dm.curWrOff) { 349 return ErrSeekFail 350 } 351 352 dm.readCancel = cancel 353 dm.read = dr 354 } 355 356 return nil 357 } 358 359 // Read data from this dag starting at the current offset 360 func (dm *DagModifier) CtxReadFull(ctx context.Context, b []byte) (int, error) { 361 err := dm.readPrep() 362 if err != nil { 363 return 0, err 364 } 365 366 n, err := dm.read.CtxReadFull(ctx, b) 367 dm.curWrOff += uint64(n) 368 return n, err 369 } 370 371 // GetNode gets the modified DAG Node 372 func (dm *DagModifier) GetNode() (*mdag.Node, error) { 373 err := dm.Sync() 374 if err != nil { 375 return nil, err 376 } 377 return dm.curNode.Copy(), nil 378 } 379 380 // HasChanges returned whether or not there are unflushed changes to this dag 381 func (dm *DagModifier) HasChanges() bool { 382 return dm.wrBuf != nil 383 } 384 385 func (dm *DagModifier) Seek(offset int64, whence int) (int64, error) { 386 err := dm.Sync() 387 if err != nil { 388 return 0, err 389 } 390 391 switch whence { 392 case os.SEEK_CUR: 393 dm.curWrOff += uint64(offset) 394 dm.writeStart = dm.curWrOff 395 case os.SEEK_SET: 396 dm.curWrOff = uint64(offset) 397 dm.writeStart = uint64(offset) 398 case os.SEEK_END: 399 return 0, ErrSeekEndNotImpl 400 default: 401 return 0, ErrUnrecognizedWhence 402 } 403 404 if dm.read != nil { 405 _, err = dm.read.Seek(offset, whence) 406 if err != nil { 407 return 0, err 408 } 409 } 410 411 return int64(dm.curWrOff), nil 412 } 413 414 func (dm *DagModifier) Truncate(size int64) error { 415 err := dm.Sync() 416 if err != nil { 417 return err 418 } 419 420 realSize, err := dm.Size() 421 if err != nil { 422 return err 423 } 424 425 // Truncate can also be used to expand the file 426 if size > int64(realSize) { 427 return dm.expandSparse(int64(size) - realSize) 428 } 429 430 nnode, err := dagTruncate(dm.ctx, dm.curNode, uint64(size), dm.dagserv) 431 if err != nil { 432 return err 433 } 434 435 _, err = dm.dagserv.Add(nnode) 436 if err != nil { 437 return err 438 } 439 440 dm.curNode = nnode 441 return nil 442 } 443 444 // dagTruncate truncates the given node to 'size' and returns the modified Node 445 func dagTruncate(ctx context.Context, nd *mdag.Node, size uint64, ds mdag.DAGService) (*mdag.Node, error) { 446 if len(nd.Links) == 0 { 447 // TODO: this can likely be done without marshaling and remarshaling 448 pbn, err := ft.FromBytes(nd.Data) 449 if err != nil { 450 return nil, err 451 } 452 453 nd.Data = ft.WrapData(pbn.Data[:size]) 454 return nd, nil 455 } 456 457 var cur uint64 458 end := 0 459 var modified *mdag.Node 460 ndata := new(ft.FSNode) 461 for i, lnk := range nd.Links { 462 child, err := lnk.GetNode(ctx, ds) 463 if err != nil { 464 return nil, err 465 } 466 467 childsize, err := ft.DataSize(child.Data) 468 if err != nil { 469 return nil, err 470 } 471 472 // found the child we want to cut 473 if size < cur+childsize { 474 nchild, err := dagTruncate(ctx, child, size-cur, ds) 475 if err != nil { 476 return nil, err 477 } 478 479 ndata.AddBlockSize(size - cur) 480 481 modified = nchild 482 end = i 483 break 484 } 485 cur += childsize 486 ndata.AddBlockSize(childsize) 487 } 488 489 _, err := ds.Add(modified) 490 if err != nil { 491 return nil, err 492 } 493 494 nd.Links = nd.Links[:end] 495 err = nd.AddNodeLinkClean("", modified) 496 if err != nil { 497 return nil, err 498 } 499 500 d, err := ndata.GetBytes() 501 if err != nil { 502 return nil, err 503 } 504 505 nd.Data = d 506 507 // invalidate cache and recompute serialized data 508 _, err = nd.Encoded(true) 509 if err != nil { 510 return nil, err 511 } 512 513 return nd, nil 514 }