github.com/whamcloud/lemur@v0.0.0-20190827193804-4655df8a52af/cmd/lhsm-plugin-posix/posix/mover.go (about)

     1  // Copyright (c) 2018 DDN. All rights reserved.
     2  // Use of this source code is governed by a MIT-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package posix
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"compress/gzip"
    11  	"fmt"
    12  	"io"
    13  	"os"
    14  	"path"
    15  	"path/filepath"
    16  	"strings"
    17  	"time"
    18  
    19  	"github.com/dustin/go-humanize"
    20  	"github.com/intel-hpdd/lemur/dmplugin"
    21  	"github.com/intel-hpdd/lemur/dmplugin/dmio"
    22  	"github.com/intel-hpdd/lemur/pkg/checksum"
    23  	"github.com/intel-hpdd/lemur/pkg/zipcheck"
    24  	"github.com/intel-hpdd/logging/alert"
    25  	"github.com/intel-hpdd/logging/audit"
    26  	"github.com/intel-hpdd/logging/debug"
    27  	"github.com/pborman/uuid"
    28  	"github.com/pkg/errors"
    29  	"github.com/rcrowley/go-metrics"
    30  )
    31  
    32  var rate metrics.Meter
    33  
    34  func init() {
    35  	rate = metrics.NewMeter()
    36  
    37  	// if debug.Enabled() {
    38  	go func() {
    39  		var lastCount int64
    40  		for {
    41  			if lastCount != rate.Count() {
    42  				audit.Logf("total %s (1 min/5 min/15 min/inst): %s/%s/%s/%s msg/sec\n",
    43  					humanize.Comma(rate.Count()),
    44  					humanize.Comma(int64(rate.Rate1())),
    45  					humanize.Comma(int64(rate.Rate5())),
    46  					humanize.Comma(int64(rate.Rate15())),
    47  					humanize.Comma(int64(rate.RateMean())),
    48  				)
    49  				lastCount = rate.Count()
    50  			}
    51  			time.Sleep(10 * time.Second)
    52  		}
    53  	}()
    54  	// }
    55  }
    56  
    57  // Should this be configurable?
    58  const updateInterval = 10 * time.Second
    59  
    60  type (
    61  
    62  	// ArchiveConfig is configuration for one mover.
    63  	ArchiveConfig struct {
    64  		Name        string          `hcl:",key"`
    65  		ID          int             `hcl:"id"`
    66  		Root        string          `hcl:"root"`
    67  		Compression string          `hcl:"compression"`
    68  		Checksums   *ChecksumConfig `hcl:"checksums"`
    69  	}
    70  
    71  	// ArchiveSet is a list of mover configs.
    72  	ArchiveSet []*ArchiveConfig
    73  
    74  	// ChecksumConfig defines the configured behavior for file
    75  	// checksumming in the POSIX data mover
    76  	ChecksumConfig struct {
    77  		Disabled                bool `hcl:"disabled"`
    78  		DisableCompareOnRestore bool `hcl:"disable_compare_on_restore"`
    79  	}
    80  	// CompressionOption value determines  if data compression is enabled.
    81  	CompressionOption int
    82  
    83  	// Mover is a POSIX data mover
    84  	Mover struct {
    85  		Name        string
    86  		ArchiveDir  string
    87  		Compression CompressionOption
    88  		Checksums   ChecksumConfig
    89  	}
    90  )
    91  
    92  const (
    93  	// CompressOff disables data compression
    94  	CompressOff CompressionOption = iota
    95  	// CompressOn enables data compression
    96  	CompressOn
    97  	// CompressAuto enables compression when a compressible file is detection
    98  	CompressAuto
    99  )
   100  
   101  var (
   102  	// DefaultChecksums are enabled
   103  	DefaultChecksums ChecksumConfig
   104  )
   105  
   106  func (a *ArchiveConfig) String() string {
   107  	return fmt.Sprintf("%d:%s", a.ID, a.Root)
   108  }
   109  
   110  // CheckValid determines if the archive configuration is a valid one.
   111  func (a *ArchiveConfig) CheckValid() error {
   112  	var errs []string
   113  
   114  	if a.Root == "" {
   115  		errs = append(errs, fmt.Sprintf("Archive %s: archive root not set", a.Name))
   116  	}
   117  
   118  	if a.ID < 1 {
   119  		errs = append(errs, fmt.Sprintf("Archive %s: archive id not set", a.Name))
   120  	}
   121  
   122  	if len(errs) > 0 {
   123  		return errors.Errorf("Errors: %s", strings.Join(errs, ", "))
   124  	}
   125  
   126  	return nil
   127  }
   128  
   129  // CompressionOption parses Compression config parameter
   130  func (a *ArchiveConfig) CompressionOption() CompressionOption {
   131  	switch a.Compression {
   132  	case "on":
   133  		return CompressOn
   134  	case "off":
   135  		return CompressOff
   136  	case "auto":
   137  		return CompressAuto
   138  	default:
   139  		return CompressOff
   140  	}
   141  }
   142  
   143  // Merge the two configs and return a copy.
   144  // Does not return nil, even if both a and other are nil.
   145  func (a *ArchiveConfig) Merge(other *ArchiveConfig) *ArchiveConfig {
   146  	var result ArchiveConfig
   147  	if a != nil {
   148  		result = *a
   149  	}
   150  	if other != nil {
   151  		if other.Name != "" {
   152  			result.Name = other.Name
   153  		}
   154  		if other.Root != "" {
   155  			result.Root = other.Root
   156  		}
   157  		if other.Compression != "" {
   158  			result.Compression = other.Compression
   159  		}
   160  		result.Checksums = result.Checksums.Merge(other.Checksums)
   161  	} else {
   162  		// Ensure we have a new copy of Checksums
   163  		result.Checksums = result.Checksums.Merge(nil)
   164  	}
   165  	return &result
   166  }
   167  
   168  // Merge the two sets. Actually just returns the other one if set
   169  // otherwise it returns the original set.
   170  // TODO: actually merge the sets here
   171  func (as ArchiveSet) Merge(other ArchiveSet) ArchiveSet {
   172  	if len(other) > 0 {
   173  		return other
   174  	}
   175  	return as
   176  }
   177  
   178  // Merge the two configurations. Returns a copy of
   179  // other if it is not nil, otherwise retuns a copy of c.
   180  func (c *ChecksumConfig) Merge(other *ChecksumConfig) *ChecksumConfig {
   181  	var result ChecksumConfig
   182  	if other != nil {
   183  		result = *other
   184  
   185  	} else if c != nil {
   186  		result = *c
   187  	} else {
   188  		return nil
   189  	}
   190  
   191  	return &result
   192  }
   193  
   194  // NewMover returns a new *Mover
   195  func NewMover(config *ArchiveConfig) (*Mover, error) {
   196  	if config.Root == "" {
   197  		return nil, errors.Errorf("Invalid mover config: ArchiveDir is unset")
   198  	}
   199  
   200  	return &Mover{
   201  		Name:        config.Name,
   202  		ArchiveDir:  config.Root,
   203  		Compression: config.CompressionOption(),
   204  		Checksums:   *DefaultChecksums.Merge(config.Checksums),
   205  	}, nil
   206  }
   207  
   208  func newFileID() string {
   209  	return uuid.New()
   210  }
   211  
   212  // CopyWithProgress initiates a movement of data with progress updates
   213  func CopyWithProgress(dst io.Writer, src io.Reader, length int64, action dmplugin.Action) (int64, error) {
   214  	progressFunc := func(offset, n int64) error {
   215  		return action.Update(offset, n, length)
   216  	}
   217  	progressWriter := dmio.NewProgressWriter(dst, updateInterval, progressFunc)
   218  	defer progressWriter.StopUpdates()
   219  
   220  	n, err := io.Copy(progressWriter, src)
   221  
   222  	return n, err
   223  }
   224  
   225  // ChecksumConfig returns the mover's checksum configuration
   226  // Returns a pointer so the caller can modify the config.
   227  func (m *Mover) ChecksumConfig() *ChecksumConfig {
   228  	return &m.Checksums
   229  }
   230  
   231  // ChecksumEnabled returns true if user has enabled checksum calculation.
   232  func (m *Mover) ChecksumEnabled() bool {
   233  	return !m.Checksums.Disabled
   234  }
   235  
   236  // ChecksumWriter returns an instance of its namesake.
   237  func (m *Mover) ChecksumWriter(dst io.Writer) (cw checksum.Writer) {
   238  	if m.ChecksumEnabled() {
   239  		cw = checksum.NewSha1HashWriter(dst)
   240  	} else {
   241  		cw = checksum.NewNoopHashWriter(dst)
   242  	}
   243  	return
   244  }
   245  
   246  // Destination returns the path to archived file.
   247  // Exported for testing.
   248  func (m *Mover) Destination(id string) string {
   249  	dir := path.Join(m.ArchiveDir,
   250  		"objects",
   251  		fmt.Sprintf("%s", id[0:2]),
   252  		fmt.Sprintf("%s", id[2:4]))
   253  
   254  	err := os.MkdirAll(dir, 0700)
   255  	if err != nil {
   256  		alert.Abort(errors.Wrap(err, "mkdirall failed"))
   257  	}
   258  	return path.Join(dir, id)
   259  }
   260  
   261  // Start signals the mover to begin any asynchronous processing (e.g. stats)
   262  func (m *Mover) Start() {
   263  	debug.Printf("%s started", m.Name)
   264  }
   265  
   266  // Archive fulfills an HSM Archive request
   267  func (m *Mover) Archive(action dmplugin.Action) error {
   268  	debug.Printf("%s id:%d ARCHIVE %s", m.Name, action.ID(), action.PrimaryPath())
   269  	rate.Mark(1)
   270  	start := time.Now()
   271  
   272  	// Initialize Reader for Lustre file
   273  	rdr, total, err := dmio.NewBufferedActionReader(action)
   274  	if err != nil {
   275  		return errors.Wrapf(err, "Could not create archive reader for %s", action)
   276  	}
   277  	defer rdr.Close()
   278  
   279  	// If auto-compression enabled, determine "compressibility"
   280  	enableZip := false
   281  	switch m.Compression {
   282  	case CompressOn:
   283  		enableZip = true
   284  	case CompressAuto:
   285  		reduction, e2 := zipcheck.AnalyzeFile(action.PrimaryPath())
   286  		if e2 != nil {
   287  			return errors.Wrapf(e2, "AnalyzeFile failed")
   288  		}
   289  		if reduction > 30.0 {
   290  			debug.Printf("%s id:%d ZIP %s estimate %0.1f%% reduction", m.Name, action.ID(), action.PrimaryPath(), reduction)
   291  			enableZip = true
   292  		}
   293  	}
   294  
   295  	// Initialize Writer for backing file
   296  	fileID := newFileID()
   297  	if enableZip {
   298  		fileID += ".gz"
   299  	}
   300  
   301  	dst, err := os.Create(m.Destination(fileID))
   302  	if err != nil {
   303  		return errors.Wrapf(err, "%s: create backing file failed", m.Destination(fileID))
   304  	}
   305  	defer dst.Close()
   306  
   307  	var cw checksum.Writer
   308  	if enableZip {
   309  		zip := gzip.NewWriter(dst)
   310  		defer zip.Close()
   311  		cw = m.ChecksumWriter(zip)
   312  	} else {
   313  		cw = m.ChecksumWriter(dst)
   314  	}
   315  
   316  	// Copy
   317  	n, err := CopyWithProgress(cw, rdr, total, action)
   318  	if err != nil {
   319  		debug.Printf("copy error %v read %d expected %d", err, n, total)
   320  		return errors.Wrap(err, "copy failed")
   321  	}
   322  
   323  	debug.Printf("%s id:%d Archived %d bytes in %v from %s to %s %x", m.Name, action.ID(), n,
   324  		time.Since(start),
   325  		action.PrimaryPath(),
   326  		m.Destination(fileID),
   327  		cw.Sum())
   328  
   329  	action.SetUUID(fileID)
   330  	action.SetHash(cw.Sum())
   331  	return nil
   332  }
   333  
   334  // Restore fulfills an HSM Restore request
   335  func (m *Mover) Restore(action dmplugin.Action) error {
   336  	debug.Printf("%s id:%d RESTORE %s %s %x", m.Name, action.ID(), action.PrimaryPath(), action.UUID(), action.Hash())
   337  	rate.Mark(1)
   338  	start := time.Now()
   339  
   340  	// Initialize Reader for backing file
   341  	if action.UUID() == "" {
   342  		return errors.New("Missing UUID")
   343  	}
   344  
   345  	enableUnzip := false
   346  	if filepath.Ext(action.UUID()) == ".gz" {
   347  		debug.Printf("%s: id:%d decompressing %s", m.Name, action.ID(), action.UUID())
   348  		enableUnzip = true
   349  	}
   350  
   351  	src, err := os.Open(m.Destination(action.UUID()))
   352  	if err != nil {
   353  		return errors.Wrapf(err, "%s: open failed", m.Destination(action.UUID()))
   354  	}
   355  	defer src.Close()
   356  
   357  	var rdr io.Reader = bufio.NewReaderSize(src, dmio.BufferSize)
   358  
   359  	if enableUnzip {
   360  		unzip, er2 := gzip.NewReader(rdr)
   361  		if er2 != nil {
   362  			return errors.Wrap(er2, "gzip NewReader failed")
   363  		}
   364  		defer unzip.Close()
   365  		rdr = unzip
   366  	}
   367  
   368  	// Initialize Writer for restore file on Lustre
   369  	dst, err := dmio.NewActionWriter(action)
   370  	if err != nil {
   371  		return errors.Wrapf(err, "Failed to create ActionWriter for %s", action)
   372  	}
   373  	defer dst.Close()
   374  
   375  	length, err := dmio.ActualLength(action, dst)
   376  	if err != nil {
   377  		return errors.Wrap(err, "Unable to determine actual file length")
   378  	}
   379  
   380  	cw := m.ChecksumWriter(dst)
   381  
   382  	// Copy
   383  	n, err := CopyWithProgress(cw, rdr, length, action)
   384  	if err != nil {
   385  		debug.Printf("copy error %v read %d expected %d", err, n, length)
   386  		return errors.Wrap(err, "copy failed")
   387  	}
   388  
   389  	if action.Hash() != nil && !m.Checksums.DisableCompareOnRestore {
   390  		if bytes.Compare(action.Hash(), cw.Sum()) != 0 {
   391  			alert.Warnf("original checksum doesn't match new:  %x != %x", action.Hash(), cw.Sum())
   392  			return errors.New("Checksum mismatch!")
   393  		}
   394  	}
   395  
   396  	debug.Printf("%s id:%d Restored %d bytes in %v to %s %x", m.Name, action.ID(), n,
   397  		time.Since(start),
   398  		action.PrimaryPath(),
   399  		cw.Sum())
   400  	action.SetActualLength(n)
   401  	return nil
   402  }
   403  
   404  // Remove fulfills an HSM Remove request
   405  func (m *Mover) Remove(action dmplugin.Action) error {
   406  	debug.Printf("%s id:%d REMOVE %s %s", m.Name, action.ID(), action.PrimaryPath(), action.UUID())
   407  	rate.Mark(1)
   408  	if action.UUID() == "" {
   409  		return errors.New("Missing uuid")
   410  	}
   411  
   412  	return os.Remove(m.Destination(action.UUID()))
   413  }