github.com/cs3org/reva/v2@v2.27.7/pkg/storage/utils/decomposedfs/tree/propagator/async.go (about)

     1  // Copyright 2018-2021 CERN
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // In applying this license, CERN does not waive the privileges and immunities
    16  // granted to it by virtue of its status as an Intergovernmental Organization
    17  // or submit itself to any jurisdiction.
    18  
    19  package propagator
    20  
    21  import (
    22  	"context"
    23  	"os"
    24  	"path/filepath"
    25  	"strconv"
    26  	"strings"
    27  	"time"
    28  
    29  	"github.com/cs3org/reva/v2/pkg/appctx"
    30  	"github.com/cs3org/reva/v2/pkg/storage/utils/decomposedfs/lookup"
    31  	"github.com/cs3org/reva/v2/pkg/storage/utils/decomposedfs/metadata"
    32  	"github.com/cs3org/reva/v2/pkg/storage/utils/decomposedfs/metadata/prefixes"
    33  	"github.com/cs3org/reva/v2/pkg/storage/utils/decomposedfs/node"
    34  	"github.com/cs3org/reva/v2/pkg/storage/utils/decomposedfs/options"
    35  	"github.com/google/renameio/v2"
    36  	"github.com/google/uuid"
    37  	"github.com/pkg/errors"
    38  	"github.com/rogpeppe/go-internal/lockedfile"
    39  	"github.com/rs/zerolog"
    40  	"github.com/shamaton/msgpack/v2"
    41  )
    42  
    43  var _propagationGracePeriod = 3 * time.Minute
    44  
    45  // AsyncPropagator implements asynchronous treetime & treesize propagation
    46  type AsyncPropagator struct {
    47  	treeSizeAccounting bool
    48  	treeTimeAccounting bool
    49  	propagationDelay   time.Duration
    50  	lookup             node.PathLookup
    51  	log                *zerolog.Logger
    52  }
    53  
    54  // Change represents a change to the tree
    55  type Change struct {
    56  	SyncTime time.Time
    57  	SizeDiff int64
    58  }
    59  
    60  // NewAsyncPropagator returns a new AsyncPropagator instance
    61  func NewAsyncPropagator(treeSizeAccounting, treeTimeAccounting bool, o options.AsyncPropagatorOptions, lookup node.PathLookup, log *zerolog.Logger) AsyncPropagator {
    62  	p := AsyncPropagator{
    63  		treeSizeAccounting: treeSizeAccounting,
    64  		treeTimeAccounting: treeTimeAccounting,
    65  		propagationDelay:   o.PropagationDelay,
    66  		lookup:             lookup,
    67  		log:                log,
    68  	}
    69  
    70  	log.Info().Msg("async propagator starting up...")
    71  
    72  	// spawn a goroutine that watches for stale .processing dirs and fixes them
    73  	go func() {
    74  		if !p.treeTimeAccounting && !p.treeSizeAccounting {
    75  			// no propagation enabled
    76  			log.Debug().Msg("propagation disabled or nothing to propagate")
    77  			return
    78  		}
    79  
    80  		changesDirPath := filepath.Join(p.lookup.InternalRoot(), "changes")
    81  		doSleep := false // switch to not sleep on the first iteration
    82  		for {
    83  			if doSleep {
    84  				time.Sleep(5 * time.Minute)
    85  			}
    86  			doSleep = true
    87  			log.Debug().Msg("scanning for stale .processing dirs")
    88  
    89  			entries, err := filepath.Glob(changesDirPath + "/**/*")
    90  			if err != nil {
    91  				log.Error().Err(err).Msg("failed to list changes")
    92  				continue
    93  			}
    94  
    95  			for _, e := range entries {
    96  				changesDirPath := e
    97  				entry, err := os.Stat(changesDirPath)
    98  				if err != nil {
    99  					continue
   100  				}
   101  				// recover all dirs that seem to have been stuck
   102  				if !entry.IsDir() || time.Now().Before(entry.ModTime().Add(_propagationGracePeriod)) {
   103  					continue
   104  				}
   105  
   106  				go func() {
   107  					if !strings.HasSuffix(changesDirPath, ".processing") {
   108  						// first rename the existing node dir
   109  						err = os.Rename(changesDirPath, changesDirPath+".processing")
   110  						if err != nil {
   111  							return
   112  						}
   113  						changesDirPath += ".processing"
   114  					}
   115  
   116  					log.Debug().Str("dir", changesDirPath).Msg("propagating stale .processing dir")
   117  					parts := strings.SplitN(entry.Name(), ":", 2)
   118  					if len(parts) != 2 {
   119  						log.Error().Str("file", entry.Name()).Msg("encountered invalid .processing dir")
   120  						return
   121  					}
   122  
   123  					now := time.Now()
   124  					_ = os.Chtimes(changesDirPath, now, now)
   125  					p.propagate(context.Background(), parts[0], strings.TrimSuffix(parts[1], ".processing"), true, *log)
   126  				}()
   127  			}
   128  		}
   129  	}()
   130  
   131  	return p
   132  }
   133  
   134  // Propagate triggers a propagation
   135  func (p AsyncPropagator) Propagate(ctx context.Context, n *node.Node, sizeDiff int64) error {
   136  	ctx, span := tracer.Start(ctx, "Propagate")
   137  	defer span.End()
   138  	log := appctx.GetLogger(ctx).With().
   139  		Str("method", "async.Propagate").
   140  		Str("spaceid", n.SpaceID).
   141  		Str("nodeid", n.ID).
   142  		Str("parentid", n.ParentID).
   143  		Int64("sizeDiff", sizeDiff).
   144  		Logger()
   145  
   146  	if !p.treeTimeAccounting && (!p.treeSizeAccounting || sizeDiff == 0) {
   147  		// no propagation enabled
   148  		log.Debug().Msg("propagation disabled or nothing to propagate")
   149  		return nil
   150  	}
   151  
   152  	// add a change to the parent node
   153  	c := Change{
   154  		// use a sync time and don't rely on the mtime of the current node, as the stat might not change when a rename happened too quickly
   155  		SyncTime: time.Now().UTC(),
   156  		SizeDiff: sizeDiff,
   157  	}
   158  	go p.queuePropagation(ctx, n.SpaceID, n.ParentID, c, log)
   159  
   160  	return nil
   161  }
   162  
   163  func (p AsyncPropagator) queuePropagation(ctx context.Context, spaceID, nodeID string, change Change, log zerolog.Logger) {
   164  	// add a change to the parent node
   165  	changePath := p.changesPath(spaceID, nodeID, uuid.New().String()+".mpk")
   166  
   167  	data, err := msgpack.Marshal(change)
   168  	if err != nil {
   169  		log.Error().Err(err).Msg("failed to marshal Change")
   170  		return
   171  	}
   172  
   173  	_, subspan := tracer.Start(ctx, "write changes file")
   174  	ready := false
   175  	triggerPropagation := false
   176  	_ = os.MkdirAll(filepath.Dir(filepath.Dir(changePath)), 0700)
   177  	err = os.Mkdir(filepath.Dir(changePath), 0700)
   178  	triggerPropagation = err == nil || os.IsExist(err) // only the first goroutine, which succeeds to create the directory, is supposed to actually trigger the propagation
   179  	for retries := 0; retries <= 500; retries++ {
   180  		err := renameio.WriteFile(changePath, data, 0644)
   181  		if err == nil {
   182  			ready = true
   183  			break
   184  		}
   185  		log.Debug().Err(err).Msg("failed to write Change to disk (retrying)")
   186  		err = os.Mkdir(filepath.Dir(changePath), 0700)
   187  		triggerPropagation = err == nil || os.IsExist(err) // only the first goroutine, which succeeds to create the directory, is supposed to actually trigger the propagation
   188  	}
   189  
   190  	if !ready {
   191  		log.Error().Err(err).Msg("failed to write Change to disk")
   192  		return
   193  	}
   194  	subspan.End()
   195  
   196  	if !triggerPropagation {
   197  		return
   198  	}
   199  
   200  	_, subspan = tracer.Start(ctx, "delay propagation")
   201  	time.Sleep(p.propagationDelay) // wait a moment before propagating
   202  	subspan.End()
   203  
   204  	log.Debug().Msg("propagating")
   205  	// add a change to the parent node
   206  	changeDirPath := p.changesPath(spaceID, nodeID, "")
   207  
   208  	// first rename the existing node dir
   209  	err = os.Rename(changeDirPath, changeDirPath+".processing")
   210  	if err != nil {
   211  		// This can fail in 2 ways
   212  		// 1. source does not exist anymore as it has already been propagated by another goroutine
   213  		//    -> ignore, as the change is already being processed
   214  		// 2. target already exists because a previous propagation is still running
   215  		//    -> ignore, the previous propagation will pick the new changes up
   216  		return
   217  	}
   218  	p.propagate(ctx, spaceID, nodeID, false, log)
   219  }
   220  
   221  func (p AsyncPropagator) propagate(ctx context.Context, spaceID, nodeID string, recalculateTreeSize bool, log zerolog.Logger) {
   222  	changeDirPath := p.changesPath(spaceID, nodeID, "")
   223  	processingPath := changeDirPath + ".processing"
   224  
   225  	cleanup := func() {
   226  		err := os.RemoveAll(processingPath)
   227  		if err != nil {
   228  			log.Error().Err(err).Msg("Could not remove .processing dir")
   229  		}
   230  	}
   231  
   232  	_, subspan := tracer.Start(ctx, "list changes files")
   233  	d, err := os.Open(processingPath)
   234  	if err != nil {
   235  		log.Error().Err(err).Msg("Could not open change .processing dir")
   236  		cleanup()
   237  		return
   238  	}
   239  	defer d.Close()
   240  	names, err := d.Readdirnames(0)
   241  	if err != nil {
   242  		log.Error().Err(err).Msg("Could not read dirnames")
   243  		cleanup()
   244  		return
   245  	}
   246  	subspan.End()
   247  
   248  	_, subspan = tracer.Start(ctx, "read changes files")
   249  	pc := Change{}
   250  	for _, name := range names {
   251  		if !strings.HasSuffix(name, ".mpk") {
   252  			continue
   253  		}
   254  
   255  		b, err := os.ReadFile(filepath.Join(processingPath, name))
   256  		if err != nil {
   257  			log.Error().Err(err).Msg("Could not read change")
   258  			cleanup()
   259  			return
   260  		}
   261  		c := Change{}
   262  		err = msgpack.Unmarshal(b, &c)
   263  		if err != nil {
   264  			log.Error().Err(err).Msg("Could not unmarshal change")
   265  			cleanup()
   266  			return
   267  		}
   268  		if c.SyncTime.After(pc.SyncTime) {
   269  			pc.SyncTime = c.SyncTime
   270  		}
   271  		pc.SizeDiff += c.SizeDiff
   272  	}
   273  	subspan.End()
   274  
   275  	// TODO do we need to write an aggregated parentchange file?
   276  
   277  	attrs := node.Attributes{}
   278  
   279  	var f *lockedfile.File
   280  	// lock parent before reading treesize or tree time
   281  	nodePath := filepath.Join(p.lookup.InternalRoot(), "spaces", lookup.Pathify(spaceID, 1, 2), "nodes", lookup.Pathify(nodeID, 4, 2))
   282  
   283  	_, subspan = tracer.Start(ctx, "lockedfile.OpenFile")
   284  	lockFilepath := p.lookup.MetadataBackend().LockfilePath(nodePath)
   285  	f, err = lockedfile.OpenFile(lockFilepath, os.O_RDWR|os.O_CREATE, 0600)
   286  	subspan.End()
   287  	if err != nil {
   288  		log.Error().Err(err).
   289  			Str("lock filepath", lockFilepath).
   290  			Msg("Propagation failed. Could not open metadata for node with lock.")
   291  		cleanup()
   292  		return
   293  	}
   294  	// always log error if closing node fails
   295  	defer func() {
   296  		// ignore already closed error
   297  		cerr := f.Close()
   298  		if err == nil && cerr != nil && !errors.Is(cerr, os.ErrClosed) {
   299  			err = cerr // only overwrite err with en error from close if the former was nil
   300  		}
   301  	}()
   302  
   303  	_, subspan = tracer.Start(ctx, "node.ReadNode")
   304  	var n *node.Node
   305  	if n, err = node.ReadNode(ctx, p.lookup, spaceID, nodeID, false, nil, false); err != nil {
   306  		log.Error().Err(err).
   307  			Msg("Propagation failed. Could not read node.")
   308  		cleanup()
   309  		return
   310  	}
   311  	subspan.End()
   312  
   313  	if !n.Exists {
   314  		log.Debug().Str("attr", prefixes.PropagationAttr).Msg("node does not exist anymore, not propagating")
   315  		cleanup()
   316  		return
   317  	}
   318  
   319  	if !n.HasPropagation(ctx) {
   320  		log.Debug().Str("attr", prefixes.PropagationAttr).Msg("propagation attribute not set or unreadable, not propagating")
   321  		cleanup()
   322  		return
   323  	}
   324  
   325  	if p.treeTimeAccounting {
   326  		// update the parent tree time if it is older than the nodes mtime
   327  		updateSyncTime := false
   328  
   329  		var tmTime time.Time
   330  		tmTime, err = n.GetTMTime(ctx)
   331  		switch {
   332  		case err != nil:
   333  			// missing attribute, or invalid format, overwrite
   334  			log.Debug().Err(err).
   335  				Msg("could not read tmtime attribute, overwriting")
   336  			updateSyncTime = true
   337  		case tmTime.Before(pc.SyncTime):
   338  			log.Debug().
   339  				Time("tmtime", tmTime).
   340  				Time("stime", pc.SyncTime).
   341  				Msg("parent tmtime is older than node mtime, updating")
   342  			updateSyncTime = true
   343  		default:
   344  			log.Debug().
   345  				Time("tmtime", tmTime).
   346  				Time("stime", pc.SyncTime).
   347  				Dur("delta", pc.SyncTime.Sub(tmTime)).
   348  				Msg("node tmtime is younger than stime, not updating")
   349  		}
   350  
   351  		if updateSyncTime {
   352  			// update the tree time of the parent node
   353  			attrs.SetString(prefixes.TreeMTimeAttr, pc.SyncTime.UTC().Format(time.RFC3339Nano))
   354  		}
   355  
   356  		attrs.SetString(prefixes.TmpEtagAttr, "")
   357  	}
   358  
   359  	// size accounting
   360  	if p.treeSizeAccounting && pc.SizeDiff != 0 {
   361  		var newSize uint64
   362  
   363  		// read treesize
   364  		treeSize, err := n.GetTreeSize(ctx)
   365  		switch {
   366  		case recalculateTreeSize || metadata.IsAttrUnset(err):
   367  			// fallback to calculating the treesize
   368  			log.Warn().Msg("treesize attribute unset, falling back to calculating the treesize")
   369  			newSize, err = calculateTreeSize(ctx, p.lookup, n.InternalPath())
   370  			if err != nil {
   371  				log.Error().Err(err).
   372  					Msg("Error when calculating treesize of node.") // FIXME wat?
   373  				cleanup()
   374  				return
   375  			}
   376  		case err != nil:
   377  			log.Error().Err(err).
   378  				Msg("Failed to propagate treesize change. Error when reading the treesize attribute from node")
   379  			cleanup()
   380  			return
   381  		case pc.SizeDiff > 0:
   382  			newSize = treeSize + uint64(pc.SizeDiff)
   383  		case uint64(-pc.SizeDiff) > treeSize:
   384  			// The sizeDiff is larger than the current treesize. Which would result in
   385  			// a negative new treesize. Something must have gone wrong with the accounting.
   386  			// Reset the current treesize to 0.
   387  			log.Error().Uint64("treeSize", treeSize).Int64("sizeDiff", pc.SizeDiff).
   388  				Msg("Error when updating treesize of node. Updated treesize < 0. Resetting to 0")
   389  			newSize = 0
   390  		default:
   391  			newSize = treeSize - uint64(-pc.SizeDiff)
   392  		}
   393  
   394  		// update the tree size of the node
   395  		attrs.SetString(prefixes.TreesizeAttr, strconv.FormatUint(newSize, 10))
   396  		log.Debug().Uint64("newSize", newSize).Msg("updated treesize of node")
   397  	}
   398  
   399  	if err = n.SetXattrsWithContext(ctx, attrs, false); err != nil {
   400  		log.Error().Err(err).Msg("Failed to update extend attributes of node")
   401  		cleanup()
   402  		return
   403  	}
   404  
   405  	// Release node lock early, ignore already closed error
   406  	_, subspan = tracer.Start(ctx, "f.Close")
   407  	cerr := f.Close()
   408  	subspan.End()
   409  	if cerr != nil && !errors.Is(cerr, os.ErrClosed) {
   410  		log.Error().Err(cerr).Msg("Failed to close node and release lock")
   411  	}
   412  
   413  	log.Info().Msg("Propagation done. cleaning up")
   414  	cleanup()
   415  
   416  	if !n.IsSpaceRoot(ctx) {
   417  		p.queuePropagation(ctx, n.SpaceID, n.ParentID, pc, log)
   418  	}
   419  
   420  	// Check for a changes dir that might have been added meanwhile and pick it up
   421  	if _, err = os.Open(changeDirPath); err == nil {
   422  		log.Info().Msg("Found a new changes dir. starting next propagation")
   423  		time.Sleep(p.propagationDelay) // wait a moment before propagating
   424  		err = os.Rename(changeDirPath, processingPath)
   425  		if err != nil {
   426  			// This can fail in 2 ways
   427  			// 1. source does not exist anymore as it has already been propagated by another goroutine
   428  			//    -> ignore, as the change is already being processed
   429  			// 2. target already exists because a previous propagation is still running
   430  			//    -> ignore, the previous propagation will pick the new changes up
   431  			return
   432  		}
   433  		p.propagate(ctx, spaceID, nodeID, false, log)
   434  	}
   435  }
   436  
   437  func (p AsyncPropagator) changesPath(spaceID, nodeID, filename string) string {
   438  	return filepath.Join(p.lookup.InternalRoot(), "changes", spaceID[0:2], spaceID+":"+nodeID, filename)
   439  }