github.com/whamcloud/lemur@v0.0.0-20190827193804-4655df8a52af/cmd/lhsmd/agent/agent_action.go (about)

     1  // Copyright (c) 2018 DDN. All rights reserved.
     2  // Use of this source code is governed by a MIT-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package agent
     6  
     7  import (
     8  	"encoding/hex"
     9  	"encoding/json"
    10  	"fmt"
    11  	"sync/atomic"
    12  	"time"
    13  
    14  	"github.com/pkg/errors"
    15  
    16  	"github.com/intel-hpdd/lemur/cmd/lhsmd/agent/fileid"
    17  	pb "github.com/intel-hpdd/lemur/pdm"
    18  	"github.com/intel-hpdd/logging/alert"
    19  	"github.com/intel-hpdd/logging/audit"
    20  	"github.com/intel-hpdd/logging/debug"
    21  
    22  	"github.com/intel-hpdd/go-lustre/fs"
    23  	"github.com/intel-hpdd/go-lustre/hsm"
    24  	"github.com/intel-hpdd/go-lustre/llapi"
    25  )
    26  
    27  type (
    28  	// ActionID is a unique (per agent instance) ID for HSM actions
    29  	ActionID uint64
    30  
    31  	// Action represents an HSM action
    32  	Action struct {
    33  		id    ActionID
    34  		aih   hsm.ActionHandle
    35  		agent *HsmAgent
    36  		start time.Time
    37  		UUID  string
    38  		Hash  []byte
    39  		URL   string
    40  		Data  []byte
    41  	}
    42  
    43  	// ActionData is extra data passed to the Agent by policy engine
    44  	ActionData struct {
    45  		FileID    []byte `json:"file_id"`
    46  		MoverData []byte `json:"mover_data"`
    47  	}
    48  )
    49  
    50  var actionIDCounter ActionID
    51  
    52  // NextActionID returns monotonically-increasing ActionIDs
    53  func NextActionID() ActionID {
    54  	return ActionID(atomic.AddUint64((*uint64)(&actionIDCounter), 1))
    55  }
    56  
    57  func (action *Action) String() string {
    58  	return fmt.Sprintf("id:%d %s %v ", action.id, action.aih.Action(), action.aih.Fid())
    59  }
    60  
    61  func hsm2Command(a llapi.HsmAction) (c pb.Command) {
    62  	switch a {
    63  	case llapi.HsmActionArchive:
    64  		c = pb.Command_ARCHIVE
    65  	case llapi.HsmActionRestore:
    66  		c = pb.Command_RESTORE
    67  	case llapi.HsmActionRemove:
    68  		c = pb.Command_REMOVE
    69  	case llapi.HsmActionCancel:
    70  		c = pb.Command_CANCEL
    71  	default:
    72  		alert.Abort(errors.Errorf("unknown command: %v", a))
    73  	}
    74  
    75  	return
    76  }
    77  
    78  // Handle returns the raw hsm.ActionHandle (temporary function until queue
    79  // transport is updated)
    80  func (action *Action) Handle() hsm.ActionHandle {
    81  	return action.aih
    82  }
    83  
    84  // ID Returns the action id.
    85  func (action *Action) ID() ActionID {
    86  	return action.id
    87  }
    88  
    89  // MarshalActionData returns an initallized and marshalled ActionData struct. The moverData
    90  // value is also marshalled before adding it to the ActionData.
    91  func MarshalActionData(fileID []byte, moverData interface{}) ([]byte, error) {
    92  	mdata, err := json.Marshal(moverData)
    93  	if err != nil {
    94  		return nil, err
    95  	}
    96  	return json.Marshal(
    97  		&ActionData{
    98  			FileID:    fileID,
    99  			MoverData: mdata,
   100  		})
   101  }
   102  
   103  // Prepare ensure action is ready to be sent.
   104  // Complete any actions that may require accessing the filesystem.
   105  func (action *Action) Prepare() error {
   106  	var data ActionData
   107  	if len(action.aih.Data()) > 0 {
   108  		err := json.Unmarshal(action.aih.Data(), &data)
   109  		if err != nil {
   110  			alert.Warnf("unrecognized data passed to agent: %v: %v", action.aih.Data(), err)
   111  			action.Data = action.aih.Data()
   112  		}
   113  	}
   114  
   115  	if len(data.MoverData) > 0 {
   116  		action.Data = data.MoverData
   117  	}
   118  
   119  	if len(data.FileID) > 0 {
   120  		debug.Printf("found fileID from user: %v %d", data.FileID, len(data.FileID))
   121  		action.UUID = string(data.FileID)
   122  	} else {
   123  		switch action.aih.Action() {
   124  		case llapi.HsmActionRestore, llapi.HsmActionRemove:
   125  			uuid, err := fileid.UUID.GetByFid(action.agent.Root(), action.aih.Fid())
   126  			if err != nil {
   127  				alert.Warnf("Error reading UUID: %v (%v)", err, action)
   128  			} else {
   129  				action.UUID = string(uuid)
   130  			}
   131  
   132  			buf, err := fileid.Hash.GetByFid(action.agent.Root(), action.aih.Fid())
   133  			if err != nil {
   134  				debug.Printf("Error reading Hash: %v (%v)", err, action)
   135  			}
   136  			hash := make([]byte, hex.DecodedLen(len(buf)))
   137  			_, err = hex.Decode(hash, buf)
   138  			action.Hash = hash
   139  			if err != nil {
   140  				debug.Printf("Error decoding Hash: %v (%v)", err, action)
   141  			}
   142  
   143  			url, err := fileid.URL.GetByFid(action.agent.Root(), action.aih.Fid())
   144  			if err != nil {
   145  				debug.Printf("Error reading URL: %v (%v)", err, action)
   146  			} else {
   147  				action.URL = string(url)
   148  			}
   149  
   150  		}
   151  	}
   152  	return nil
   153  }
   154  
   155  // AsMessage returns the protobuf version of an Action.
   156  func (action *Action) AsMessage() *pb.ActionItem {
   157  	msg := &pb.ActionItem{
   158  		Id:          uint64(action.id),
   159  		Op:          hsm2Command(action.aih.Action()),
   160  		PrimaryPath: fs.FidRelativePath(action.aih.Fid()),
   161  		Offset:      action.aih.Offset(),
   162  		Length:      action.aih.Length(),
   163  		Uuid:        action.UUID,
   164  		Hash:        action.Hash,
   165  		Url:         action.URL,
   166  		Data:        action.Data,
   167  	}
   168  
   169  	dfid, err := action.aih.DataFid()
   170  	if err == nil {
   171  		msg.WritePath = fs.FidRelativePath(dfid)
   172  	}
   173  
   174  	return msg
   175  }
   176  
   177  // Update handles the Status messages from the data mover. The Status
   178  // updates the current progress of the Action. if the Completed flag is true,
   179  // then the Action is completed and true is returned so the transport can remove
   180  // any related state. After an action is completed any further status updates
   181  // should be ignored.
   182  //
   183  // If this function returns an error then the transport layer should notify
   184  // the mover that this action has been terminated. In this case the Action will
   185  // be completed immediately and no further updates are required.
   186  //
   187  func (action *Action) Update(status *pb.ActionStatus) (bool, error) {
   188  	debug.Printf("id:%d update offset: %d length: %d complete: %v status: %d", status.Id,
   189  		status.Offset,
   190  		status.Length,
   191  		status.Completed, status.Error)
   192  	if status.Completed {
   193  		duration := time.Since(action.start)
   194  		debug.Printf("id:%d completed status: %v in %v", status.Id, status.Error, duration)
   195  
   196  		if status.Uuid != "" {
   197  			fileid.UUID.UpdateByFid(action.agent.Root(), action.aih.Fid(), []byte(status.Uuid))
   198  		}
   199  		if status.Hash != nil {
   200  			buf := make([]byte, hex.EncodedLen(len(status.Hash)))
   201  			hex.Encode(buf, status.Hash)
   202  			fileid.Hash.UpdateByFid(action.agent.Root(), action.aih.Fid(), buf)
   203  		}
   204  		if status.Url != "" {
   205  			fileid.URL.UpdateByFid(action.agent.Root(), action.aih.Fid(), []byte(status.Url))
   206  		}
   207  		action.agent.stats.CompleteAction(action, int(status.Error))
   208  		err := action.aih.End(status.Offset, status.Length, 0, int(status.Error))
   209  		if err != nil {
   210  			audit.Logf("id:%d completion failed: %v", status.Id, err)
   211  			return true, err // Completed, but Failed. Internal HSM state is not updated
   212  		}
   213  		<-action.agent.rpcsInFlight
   214  		if action.aih.Action() == llapi.HsmActionArchive && action.agent.config.Snapshots.Enabled && status.Uuid != "" {
   215  			createSnapshot(action.agent.Root(), action.aih.ArchiveID(), action.aih.Fid(), []byte(status.Uuid))
   216  		}
   217  		return true, nil // Completed
   218  	}
   219  	err := action.aih.Progress(status.Offset, status.Length, action.aih.Length(), 0)
   220  	if err != nil {
   221  		debug.Printf("id:%d progress update failed: %v", status.Id, err)
   222  		action.agent.stats.CompleteAction(action, -1)
   223  		if err2 := action.aih.End(0, 0, 0, -1); err2 != nil {
   224  			<-action.agent.rpcsInFlight
   225  			debug.Printf("id:%d completion after error failed: %v", status.Id, err2)
   226  			return false, fmt.Errorf("err: %s/err2: %s", err, err2)
   227  		}
   228  		<-action.agent.rpcsInFlight
   229  		return false, err // Incomplete Failed Action
   230  	}
   231  
   232  	return false, nil
   233  }
   234  
   235  // Fail signals that the action has failed
   236  func (action *Action) Fail(rc int) error {
   237  	audit.Logf("id:%d fail %x %v: %v", action.id, action.aih.Cookie(), action.aih.Fid(), rc)
   238  	action.agent.stats.CompleteAction(action, rc)
   239  	err := action.aih.End(0, 0, 0, rc)
   240  	if err != nil {
   241  		audit.Logf("id:%d fail after fail %x: %v", action.id, action.aih.Cookie(), err)
   242  	}
   243  	<-action.agent.rpcsInFlight
   244  	return errors.Wrap(err, "end action failed")
   245  
   246  }