github.com/yasker/longhorn-engine@v0.0.0-20160621014712-6ed6cfca0729/replica/replica.go (about) 1 package replica 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "io/ioutil" 7 "os" 8 "path" 9 "regexp" 10 "strconv" 11 "strings" 12 "sync" 13 "syscall" 14 15 "github.com/Sirupsen/logrus" 16 "github.com/deckarep/golang-set" 17 "github.com/rancher/longhorn/types" 18 ) 19 20 const ( 21 metadataSuffix = ".meta" 22 imgSuffix = ".img" 23 volumeMetaData = "volume.meta" 24 defaultSectorSize = 4096 25 headName = "volume-head-%03d.img" 26 diskName = "volume-snap-%s.img" 27 ) 28 29 var ( 30 diskPattern = regexp.MustCompile(`volume-head-(\d)+.img`) 31 ) 32 33 type Replica struct { 34 sync.RWMutex 35 volume diffDisk 36 dir string 37 info Info 38 diskData map[string]*disk 39 diskChildMap map[string]mapset.Set 40 activeDiskData []*disk 41 readOnly bool 42 } 43 44 type Info struct { 45 Size int64 46 Head string 47 Dirty bool 48 Rebuilding bool 49 Parent string 50 SectorSize int64 51 BackingFileName string 52 BackingFile *BackingFile `json:"-"` 53 } 54 55 type disk struct { 56 name string 57 Parent string 58 Removed bool 59 } 60 61 type BackingFile struct { 62 Size int64 63 SectorSize int64 64 Name string 65 Disk types.DiffDisk 66 } 67 68 type PrepareRemoveAction struct { 69 Action string `json:"action"` 70 Source string `json:"source"` 71 Target string `json:"target"` 72 } 73 74 const ( 75 OpCoalesce = "coalesce" // Source is parent, target is child 76 OpRemove = "remove" 77 OpMarkAsRemoved = "markasremoved" 78 ) 79 80 func ReadInfo(dir string) (Info, error) { 81 var info Info 82 err := (&Replica{dir: dir}).unmarshalFile(volumeMetaData, &info) 83 return info, err 84 } 85 86 func New(size, sectorSize int64, dir string, backingFile *BackingFile) (*Replica, error) { 87 return construct(false, size, sectorSize, dir, "", backingFile) 88 } 89 90 func NewReadOnly(dir, head string, backingFile *BackingFile) (*Replica, error) { 91 // size and sectorSize don't matter because they will be read from metadata 92 return construct(true, 0, 512, dir, head, backingFile) 93 } 94 95 func construct(readonly bool, size, sectorSize int64, dir, head string, backingFile *BackingFile) (*Replica, error) { 96 if size%sectorSize != 0 { 97 return nil, fmt.Errorf("Size %d not a multiple of sector size %d", size, sectorSize) 98 } 99 100 if err := os.Mkdir(dir, 0700); err != nil && !os.IsExist(err) { 101 return nil, err 102 } 103 104 r := &Replica{ 105 dir: dir, 106 activeDiskData: make([]*disk, 1), 107 diskData: make(map[string]*disk), 108 diskChildMap: map[string]mapset.Set{}, 109 } 110 r.info.Size = size 111 r.info.SectorSize = sectorSize 112 r.info.BackingFile = backingFile 113 if backingFile != nil { 114 r.info.BackingFileName = backingFile.Name 115 } 116 r.volume.sectorSize = defaultSectorSize 117 118 exists, err := r.readMetadata() 119 if err != nil { 120 return nil, err 121 } 122 123 // Reference r.info.Size because it may have changed from reading 124 // metadata 125 locationSize := r.info.Size / r.volume.sectorSize 126 if size%defaultSectorSize != 0 { 127 locationSize++ 128 } 129 r.volume.location = make([]byte, locationSize) 130 r.volume.files = []types.DiffDisk{nil} 131 132 if r.readOnly && !exists { 133 return nil, os.ErrNotExist 134 } 135 136 if head != "" { 137 r.info.Head = head 138 } 139 140 if exists { 141 if err := r.openFiles(); err != nil { 142 return nil, err 143 } 144 } else if size <= 0 { 145 return nil, os.ErrNotExist 146 } else { 147 if err := r.createDisk("000"); err != nil { 148 return nil, err 149 } 150 } 151 152 r.info.Parent = r.diskData[r.info.Head].Parent 153 154 r.insertBackingFile() 155 156 return r, r.writeVolumeMetaData(true, r.info.Rebuilding) 157 } 158 159 func GenerateSnapshotDiskName(name string) string { 160 return fmt.Sprintf(diskName, name) 161 } 162 163 func (r *Replica) diskPath(name string) string { 164 return path.Join(r.dir, name) 165 } 166 167 func (r *Replica) insertBackingFile() { 168 if r.info.BackingFile == nil { 169 return 170 } 171 172 d := disk{name: r.info.BackingFile.Name} 173 r.activeDiskData = append([]*disk{&disk{}, &d}, r.activeDiskData[1:]...) 174 r.volume.files = append([]types.DiffDisk{nil, r.info.BackingFile.Disk}, r.volume.files[1:]...) 175 r.diskData[d.name] = &d 176 } 177 178 func (r *Replica) SetRebuilding(rebuilding bool) error { 179 err := r.writeVolumeMetaData(true, rebuilding) 180 if err != nil { 181 return err 182 } 183 r.info.Rebuilding = rebuilding 184 return nil 185 } 186 187 func (r *Replica) Reload() (*Replica, error) { 188 newReplica, err := New(r.info.Size, r.info.SectorSize, r.dir, r.info.BackingFile) 189 if err != nil { 190 return nil, err 191 } 192 newReplica.info.Dirty = r.info.Dirty 193 return newReplica, nil 194 } 195 196 func (r *Replica) findDisk(name string) int { 197 for i, d := range r.activeDiskData { 198 if i == 0 { 199 continue 200 } 201 if d.name == name { 202 return i 203 } 204 } 205 return 0 206 } 207 208 func (r *Replica) RemoveDiffDisk(name string, markOnly bool) error { 209 r.Lock() 210 defer r.Unlock() 211 212 if name == r.info.Head { 213 return fmt.Errorf("Can not delete the active differencing disk") 214 } 215 216 if markOnly { 217 if err := r.markDiskAsRemoved(name); err != nil { 218 // ignore error deleting files 219 logrus.Errorf("Failed to delete %s: %v", name, err) 220 } 221 return nil 222 } 223 224 if err := r.removeDiskNode(name); err != nil { 225 return err 226 } 227 228 if err := r.rmDisk(name); err != nil { 229 return err 230 } 231 232 return nil 233 } 234 235 func (r *Replica) removeDiskNode(name string) error { 236 // If snapshot has no child, then we can safely delete it 237 // And it's definitely not in the live chain 238 children := r.diskChildMap[name] 239 if children == nil { 240 r.updateChildDisk(name, "") 241 delete(r.diskData, name) 242 return nil 243 } 244 245 // If snapshot has more than one child, we cannot really delete it 246 // Caller should call with markOnly=true instead 247 if children.Cardinality() > 1 { 248 return fmt.Errorf("Cannot remove snapshot %v with %v children", 249 name, children.Cardinality()) 250 } 251 252 // only one child from here 253 childIter := <-children.Iter() 254 child := childIter.(string) 255 r.updateChildDisk(name, child) 256 if err := r.updateParentDisk(child, name); err != nil { 257 return err 258 } 259 delete(r.diskData, name) 260 261 index := r.findDisk(name) 262 if index <= 0 { 263 return nil 264 } 265 if err := r.volume.RemoveIndex(index); err != nil { 266 return err 267 } 268 if len(r.activeDiskData)-2 == index { 269 r.info.Parent = r.diskData[r.info.Head].Parent 270 } 271 r.activeDiskData = append(r.activeDiskData[:index], r.activeDiskData[index+1:]...) 272 273 return nil 274 } 275 276 func (r *Replica) PrepareRemoveDisk(name string) ([]PrepareRemoveAction, error) { 277 r.Lock() 278 defer r.Unlock() 279 280 action := []PrepareRemoveAction{} 281 disk := name 282 283 if _, exists := r.diskData[disk]; !exists { 284 disk = GenerateSnapshotDiskName(name) 285 if _, exists := r.diskData[disk]; !exists { 286 return nil, fmt.Errorf("Can not find snapshot %v", disk) 287 } 288 } 289 290 if disk == r.info.Head { 291 return nil, fmt.Errorf("Can not delete the active differencing disk") 292 } 293 294 // 1) leaf node 295 children := r.diskChildMap[disk] 296 if children == nil { 297 action = append(action, PrepareRemoveAction{ 298 Action: OpRemove, 299 Source: disk, 300 }) 301 return action, nil 302 } 303 304 // 2) has only one child and is not head 305 if children.Cardinality() == 1 { 306 child := (<-children.Iter()).(string) 307 if child != r.info.Head { 308 action = append(action, 309 PrepareRemoveAction{ 310 Action: OpCoalesce, 311 Source: disk, 312 Target: child, 313 }, 314 PrepareRemoveAction{ 315 Action: OpRemove, 316 Source: disk, 317 }) 318 return action, nil 319 } 320 } 321 322 // 3) for other situation, we only mark it as removed 323 action = append(action, PrepareRemoveAction{ 324 Action: OpMarkAsRemoved, 325 Source: disk, 326 }) 327 return action, nil 328 } 329 330 func (r *Replica) Info() Info { 331 return r.info 332 } 333 334 func (r *Replica) DisplayChain() ([]string, error) { 335 result := make([]string, 0, len(r.activeDiskData)) 336 337 cur := r.info.Head 338 for cur != "" { 339 disk, ok := r.diskData[cur] 340 if !ok { 341 return nil, fmt.Errorf("Failed to find metadata for %s", cur) 342 } 343 if !disk.Removed { 344 result = append(result, cur) 345 } 346 cur = r.diskData[cur].Parent 347 } 348 349 return result, nil 350 } 351 352 func (r *Replica) Chain() ([]string, error) { 353 result := make([]string, 0, len(r.activeDiskData)) 354 355 cur := r.info.Head 356 for cur != "" { 357 result = append(result, cur) 358 if _, ok := r.diskData[cur]; !ok { 359 return nil, fmt.Errorf("Failed to find metadata for %s", cur) 360 } 361 cur = r.diskData[cur].Parent 362 } 363 364 return result, nil 365 } 366 367 func (r *Replica) writeVolumeMetaData(dirty, rebuilding bool) error { 368 info := r.info 369 info.Dirty = dirty 370 info.Rebuilding = rebuilding 371 return r.encodeToFile(&info, volumeMetaData) 372 } 373 374 func (r *Replica) isBackingFile(index int) bool { 375 if r.info.BackingFile == nil { 376 return false 377 } 378 return index == 1 379 } 380 381 func (r *Replica) close() error { 382 for i, f := range r.volume.files { 383 if f != nil && !r.isBackingFile(i) { 384 f.Close() 385 } 386 } 387 388 return r.writeVolumeMetaData(false, r.info.Rebuilding) 389 } 390 391 func (r *Replica) encodeToFile(obj interface{}, file string) error { 392 if r.readOnly { 393 return nil 394 } 395 396 f, err := os.Create(r.diskPath(file + ".tmp")) 397 if err != nil { 398 return err 399 } 400 defer f.Close() 401 402 if err := json.NewEncoder(f).Encode(&obj); err != nil { 403 return err 404 } 405 406 if err := f.Close(); err != nil { 407 return err 408 } 409 410 return os.Rename(r.diskPath(file+".tmp"), r.diskPath(file)) 411 } 412 413 func (r *Replica) nextFile(parsePattern *regexp.Regexp, pattern, parent string) (string, error) { 414 if parent == "" { 415 return fmt.Sprintf(pattern, 0), nil 416 } 417 418 matches := parsePattern.FindStringSubmatch(parent) 419 if matches == nil { 420 return "", fmt.Errorf("Invalid name %s does not match pattern: %v", parent, parsePattern) 421 } 422 423 index, _ := strconv.Atoi(matches[1]) 424 return fmt.Sprintf(pattern, index+1), nil 425 } 426 427 func (r *Replica) openFile(name string, flag int) (types.DiffDisk, error) { 428 f, err := os.OpenFile(r.diskPath(name), syscall.O_DIRECT|os.O_RDWR|os.O_CREATE|flag, 0666) 429 if err != nil { 430 return nil, err 431 } 432 return &directFile{ 433 File: f, 434 }, nil 435 } 436 437 func (r *Replica) createNewHead(oldHead, parent string) (types.DiffDisk, disk, error) { 438 newHeadName, err := r.nextFile(diskPattern, headName, oldHead) 439 if err != nil { 440 return nil, disk{}, err 441 } 442 443 if _, err := os.Stat(r.diskPath(newHeadName)); err == nil { 444 return nil, disk{}, fmt.Errorf("%s already exists", newHeadName) 445 } 446 447 f, err := r.openFile(newHeadName, os.O_TRUNC) 448 if err != nil { 449 return nil, disk{}, err 450 } 451 if err := syscall.Truncate(r.diskPath(newHeadName), r.info.Size); err != nil { 452 return nil, disk{}, err 453 } 454 455 newDisk := disk{Parent: parent, name: newHeadName, Removed: false} 456 err = r.encodeToFile(&newDisk, newHeadName+metadataSuffix) 457 return f, newDisk, err 458 } 459 460 func (r *Replica) linkDisk(oldname, newname string) error { 461 if oldname == "" { 462 return nil 463 } 464 465 dest := r.diskPath(newname) 466 if _, err := os.Stat(dest); err == nil { 467 logrus.Infof("Old file %s exists, deleting", dest) 468 if err := os.Remove(dest); err != nil { 469 return err 470 } 471 } 472 473 if err := os.Link(r.diskPath(oldname), dest); err != nil { 474 return err 475 } 476 477 return os.Link(r.diskPath(oldname+metadataSuffix), r.diskPath(newname+metadataSuffix)) 478 } 479 480 func (r *Replica) markDiskAsRemoved(name string) error { 481 disk, ok := r.diskData[name] 482 if !ok { 483 return fmt.Errorf("Cannot find disk %v", name) 484 } 485 if stat, err := os.Stat(r.diskPath(name)); err != nil || stat.IsDir() { 486 return fmt.Errorf("Cannot find disk file %v", name) 487 } 488 if stat, err := os.Stat(r.diskPath(name + metadataSuffix)); err != nil || stat.IsDir() { 489 return fmt.Errorf("Cannot find disk metafile %v", name+metadataSuffix) 490 } 491 disk.Removed = true 492 r.diskData[name] = disk 493 return r.encodeToFile(disk, name+metadataSuffix) 494 } 495 496 func (r *Replica) rmDisk(name string) error { 497 if name == "" { 498 return nil 499 } 500 501 lastErr := os.Remove(r.diskPath(name)) 502 if err := os.Remove(r.diskPath(name + metadataSuffix)); err != nil { 503 lastErr = err 504 } 505 return lastErr 506 } 507 508 func (r *Replica) revertDisk(parent string) (*Replica, error) { 509 if _, err := os.Stat(r.diskPath(parent)); err != nil { 510 return nil, err 511 } 512 513 oldHead := r.info.Head 514 f, newHeadDisk, err := r.createNewHead(oldHead, parent) 515 if err != nil { 516 return nil, err 517 } 518 defer f.Close() 519 520 info := r.info 521 info.Head = newHeadDisk.name 522 info.Dirty = true 523 info.Parent = newHeadDisk.Parent 524 525 if err := r.encodeToFile(&info, volumeMetaData); err != nil { 526 r.encodeToFile(&r.info, volumeMetaData) 527 return nil, err 528 } 529 530 // Need to execute before r.Reload() to update r.diskChildMap 531 r.rmDisk(oldHead) 532 533 rNew, err := r.Reload() 534 if err != nil { 535 return nil, err 536 } 537 return rNew, nil 538 } 539 540 func (r *Replica) createDisk(name string) error { 541 if r.readOnly { 542 return fmt.Errorf("Can not create disk on read-only replica") 543 } 544 545 done := false 546 oldHead := r.info.Head 547 newSnapName := GenerateSnapshotDiskName(name) 548 549 if oldHead == "" { 550 newSnapName = "" 551 } 552 553 f, newHeadDisk, err := r.createNewHead(oldHead, newSnapName) 554 if err != nil { 555 return err 556 } 557 defer func() { 558 if !done { 559 r.rmDisk(newHeadDisk.name) 560 r.rmDisk(newHeadDisk.Parent) 561 f.Close() 562 return 563 } 564 r.rmDisk(oldHead) 565 }() 566 567 if err := r.linkDisk(r.info.Head, newHeadDisk.Parent); err != nil { 568 return err 569 } 570 571 info := r.info 572 info.Head = newHeadDisk.name 573 info.Dirty = true 574 info.Parent = newHeadDisk.Parent 575 576 if err := r.encodeToFile(&info, volumeMetaData); err != nil { 577 return err 578 } 579 580 done = true 581 r.diskData[newHeadDisk.name] = &newHeadDisk 582 if newHeadDisk.Parent != "" { 583 r.addChildDisk(newHeadDisk.Parent, newHeadDisk.name) 584 585 r.diskData[newHeadDisk.Parent] = r.diskData[oldHead] 586 r.updateChildDisk(oldHead, newHeadDisk.Parent) 587 r.activeDiskData[len(r.activeDiskData)-1].name = newHeadDisk.Parent 588 } 589 delete(r.diskData, oldHead) 590 591 r.info = info 592 r.volume.files = append(r.volume.files, f) 593 r.activeDiskData = append(r.activeDiskData, &newHeadDisk) 594 595 return nil 596 } 597 598 func (r *Replica) addChildDisk(parent, child string) { 599 children, exists := r.diskChildMap[parent] 600 if !exists { 601 children = mapset.NewSet() 602 } 603 children.Add(child) 604 r.diskChildMap[parent] = children 605 } 606 607 func (r *Replica) rmChildDisk(parent, child string) { 608 children, exists := r.diskChildMap[parent] 609 if !exists { 610 return 611 } 612 if !children.Contains(child) { 613 return 614 } 615 children.Remove(child) 616 if children.Cardinality() == 0 { 617 delete(r.diskChildMap, parent) 618 return 619 } 620 r.diskChildMap[parent] = children 621 } 622 623 func (r *Replica) updateChildDisk(oldName, newName string) { 624 parent := r.diskData[oldName].Parent 625 r.rmChildDisk(parent, oldName) 626 if newName != "" { 627 r.addChildDisk(parent, newName) 628 } 629 } 630 631 func (r *Replica) updateParentDisk(name, oldParent string) error { 632 child := r.diskData[name] 633 if oldParent != "" { 634 child.Parent = r.diskData[oldParent].Parent 635 } else { 636 child.Parent = "" 637 } 638 r.diskData[name] = child 639 return r.encodeToFile(child, child.name+metadataSuffix) 640 } 641 642 func (r *Replica) openFiles() error { 643 // We have live chain, which will be included here 644 // We also need to scan all other disks, and track them properly 645 chain, err := r.Chain() 646 if err != nil { 647 return err 648 } 649 650 for i := len(chain) - 1; i >= 0; i-- { 651 parent := chain[i] 652 f, err := r.openFile(parent, 0) 653 if err != nil { 654 return err 655 } 656 657 r.volume.files = append(r.volume.files, f) 658 r.activeDiskData = append(r.activeDiskData, r.diskData[parent]) 659 } 660 661 return nil 662 } 663 664 func (r *Replica) readMetadata() (bool, error) { 665 r.diskData = make(map[string]*disk) 666 667 files, err := ioutil.ReadDir(r.dir) 668 if os.IsNotExist(err) { 669 return false, nil 670 } 671 if err != nil { 672 return false, err 673 } 674 675 for _, file := range files { 676 if file.Name() == volumeMetaData { 677 if err := r.unmarshalFile(file.Name(), &r.info); err != nil { 678 return false, err 679 } 680 r.volume.sectorSize = defaultSectorSize 681 } else if strings.HasSuffix(file.Name(), metadataSuffix) { 682 if err := r.readDiskData(file.Name()); err != nil { 683 return false, err 684 } 685 } 686 } 687 688 return len(r.diskData) > 0, nil 689 } 690 691 func (r *Replica) readDiskData(file string) error { 692 var data disk 693 if err := r.unmarshalFile(file, &data); err != nil { 694 return err 695 } 696 697 name := file[:len(file)-len(metadataSuffix)] 698 data.name = name 699 r.diskData[name] = &data 700 if data.Parent != "" { 701 r.addChildDisk(data.Parent, data.name) 702 } 703 return nil 704 } 705 706 func (r *Replica) unmarshalFile(file string, obj interface{}) error { 707 p := r.diskPath(file) 708 f, err := os.Open(p) 709 if err != nil { 710 return err 711 } 712 defer f.Close() 713 714 dec := json.NewDecoder(f) 715 return dec.Decode(obj) 716 } 717 718 func (r *Replica) Close() error { 719 r.Lock() 720 defer r.Unlock() 721 722 return r.close() 723 } 724 725 func (r *Replica) Delete() error { 726 r.Lock() 727 defer r.Unlock() 728 729 for name := range r.diskData { 730 if name != r.info.BackingFileName { 731 r.rmDisk(name) 732 } 733 } 734 735 os.Remove(r.diskPath(volumeMetaData)) 736 return nil 737 } 738 739 func (r *Replica) Snapshot(name string) error { 740 r.Lock() 741 defer r.Unlock() 742 743 return r.createDisk(name) 744 } 745 746 func (r *Replica) Revert(name string) (*Replica, error) { 747 r.Lock() 748 defer r.Unlock() 749 750 return r.revertDisk(name) 751 } 752 753 func (r *Replica) WriteAt(buf []byte, offset int64) (int, error) { 754 if r.readOnly { 755 return 0, fmt.Errorf("Can not write on read-only replica") 756 } 757 758 r.RLock() 759 r.info.Dirty = true 760 c, err := r.volume.WriteAt(buf, offset) 761 r.RUnlock() 762 return c, err 763 } 764 765 func (r *Replica) ReadAt(buf []byte, offset int64) (int, error) { 766 r.RLock() 767 c, err := r.volume.ReadAt(buf, offset) 768 r.RUnlock() 769 return c, err 770 }