github.com/whamcloud/lemur@v0.0.0-20190827193804-4655df8a52af/cmd/lhsmd/agent/agent_action.go (about) 1 // Copyright (c) 2018 DDN. All rights reserved. 2 // Use of this source code is governed by a MIT-style 3 // license that can be found in the LICENSE file. 4 5 package agent 6 7 import ( 8 "encoding/hex" 9 "encoding/json" 10 "fmt" 11 "sync/atomic" 12 "time" 13 14 "github.com/pkg/errors" 15 16 "github.com/intel-hpdd/lemur/cmd/lhsmd/agent/fileid" 17 pb "github.com/intel-hpdd/lemur/pdm" 18 "github.com/intel-hpdd/logging/alert" 19 "github.com/intel-hpdd/logging/audit" 20 "github.com/intel-hpdd/logging/debug" 21 22 "github.com/intel-hpdd/go-lustre/fs" 23 "github.com/intel-hpdd/go-lustre/hsm" 24 "github.com/intel-hpdd/go-lustre/llapi" 25 ) 26 27 type ( 28 // ActionID is a unique (per agent instance) ID for HSM actions 29 ActionID uint64 30 31 // Action represents an HSM action 32 Action struct { 33 id ActionID 34 aih hsm.ActionHandle 35 agent *HsmAgent 36 start time.Time 37 UUID string 38 Hash []byte 39 URL string 40 Data []byte 41 } 42 43 // ActionData is extra data passed to the Agent by policy engine 44 ActionData struct { 45 FileID []byte `json:"file_id"` 46 MoverData []byte `json:"mover_data"` 47 } 48 ) 49 50 var actionIDCounter ActionID 51 52 // NextActionID returns monotonically-increasing ActionIDs 53 func NextActionID() ActionID { 54 return ActionID(atomic.AddUint64((*uint64)(&actionIDCounter), 1)) 55 } 56 57 func (action *Action) String() string { 58 return fmt.Sprintf("id:%d %s %v ", action.id, action.aih.Action(), action.aih.Fid()) 59 } 60 61 func hsm2Command(a llapi.HsmAction) (c pb.Command) { 62 switch a { 63 case llapi.HsmActionArchive: 64 c = pb.Command_ARCHIVE 65 case llapi.HsmActionRestore: 66 c = pb.Command_RESTORE 67 case llapi.HsmActionRemove: 68 c = pb.Command_REMOVE 69 case llapi.HsmActionCancel: 70 c = pb.Command_CANCEL 71 default: 72 alert.Abort(errors.Errorf("unknown command: %v", a)) 73 } 74 75 return 76 } 77 78 // Handle returns the raw hsm.ActionHandle (temporary function until queue 79 // transport is updated) 80 func (action *Action) Handle() hsm.ActionHandle { 81 return action.aih 82 } 83 84 // ID Returns the action id. 85 func (action *Action) ID() ActionID { 86 return action.id 87 } 88 89 // MarshalActionData returns an initallized and marshalled ActionData struct. The moverData 90 // value is also marshalled before adding it to the ActionData. 91 func MarshalActionData(fileID []byte, moverData interface{}) ([]byte, error) { 92 mdata, err := json.Marshal(moverData) 93 if err != nil { 94 return nil, err 95 } 96 return json.Marshal( 97 &ActionData{ 98 FileID: fileID, 99 MoverData: mdata, 100 }) 101 } 102 103 // Prepare ensure action is ready to be sent. 104 // Complete any actions that may require accessing the filesystem. 105 func (action *Action) Prepare() error { 106 var data ActionData 107 if len(action.aih.Data()) > 0 { 108 err := json.Unmarshal(action.aih.Data(), &data) 109 if err != nil { 110 alert.Warnf("unrecognized data passed to agent: %v: %v", action.aih.Data(), err) 111 action.Data = action.aih.Data() 112 } 113 } 114 115 if len(data.MoverData) > 0 { 116 action.Data = data.MoverData 117 } 118 119 if len(data.FileID) > 0 { 120 debug.Printf("found fileID from user: %v %d", data.FileID, len(data.FileID)) 121 action.UUID = string(data.FileID) 122 } else { 123 switch action.aih.Action() { 124 case llapi.HsmActionRestore, llapi.HsmActionRemove: 125 uuid, err := fileid.UUID.GetByFid(action.agent.Root(), action.aih.Fid()) 126 if err != nil { 127 alert.Warnf("Error reading UUID: %v (%v)", err, action) 128 } else { 129 action.UUID = string(uuid) 130 } 131 132 buf, err := fileid.Hash.GetByFid(action.agent.Root(), action.aih.Fid()) 133 if err != nil { 134 debug.Printf("Error reading Hash: %v (%v)", err, action) 135 } 136 hash := make([]byte, hex.DecodedLen(len(buf))) 137 _, err = hex.Decode(hash, buf) 138 action.Hash = hash 139 if err != nil { 140 debug.Printf("Error decoding Hash: %v (%v)", err, action) 141 } 142 143 url, err := fileid.URL.GetByFid(action.agent.Root(), action.aih.Fid()) 144 if err != nil { 145 debug.Printf("Error reading URL: %v (%v)", err, action) 146 } else { 147 action.URL = string(url) 148 } 149 150 } 151 } 152 return nil 153 } 154 155 // AsMessage returns the protobuf version of an Action. 156 func (action *Action) AsMessage() *pb.ActionItem { 157 msg := &pb.ActionItem{ 158 Id: uint64(action.id), 159 Op: hsm2Command(action.aih.Action()), 160 PrimaryPath: fs.FidRelativePath(action.aih.Fid()), 161 Offset: action.aih.Offset(), 162 Length: action.aih.Length(), 163 Uuid: action.UUID, 164 Hash: action.Hash, 165 Url: action.URL, 166 Data: action.Data, 167 } 168 169 dfid, err := action.aih.DataFid() 170 if err == nil { 171 msg.WritePath = fs.FidRelativePath(dfid) 172 } 173 174 return msg 175 } 176 177 // Update handles the Status messages from the data mover. The Status 178 // updates the current progress of the Action. if the Completed flag is true, 179 // then the Action is completed and true is returned so the transport can remove 180 // any related state. After an action is completed any further status updates 181 // should be ignored. 182 // 183 // If this function returns an error then the transport layer should notify 184 // the mover that this action has been terminated. In this case the Action will 185 // be completed immediately and no further updates are required. 186 // 187 func (action *Action) Update(status *pb.ActionStatus) (bool, error) { 188 debug.Printf("id:%d update offset: %d length: %d complete: %v status: %d", status.Id, 189 status.Offset, 190 status.Length, 191 status.Completed, status.Error) 192 if status.Completed { 193 duration := time.Since(action.start) 194 debug.Printf("id:%d completed status: %v in %v", status.Id, status.Error, duration) 195 196 if status.Uuid != "" { 197 fileid.UUID.UpdateByFid(action.agent.Root(), action.aih.Fid(), []byte(status.Uuid)) 198 } 199 if status.Hash != nil { 200 buf := make([]byte, hex.EncodedLen(len(status.Hash))) 201 hex.Encode(buf, status.Hash) 202 fileid.Hash.UpdateByFid(action.agent.Root(), action.aih.Fid(), buf) 203 } 204 if status.Url != "" { 205 fileid.URL.UpdateByFid(action.agent.Root(), action.aih.Fid(), []byte(status.Url)) 206 } 207 action.agent.stats.CompleteAction(action, int(status.Error)) 208 err := action.aih.End(status.Offset, status.Length, 0, int(status.Error)) 209 if err != nil { 210 audit.Logf("id:%d completion failed: %v", status.Id, err) 211 return true, err // Completed, but Failed. Internal HSM state is not updated 212 } 213 <-action.agent.rpcsInFlight 214 if action.aih.Action() == llapi.HsmActionArchive && action.agent.config.Snapshots.Enabled && status.Uuid != "" { 215 createSnapshot(action.agent.Root(), action.aih.ArchiveID(), action.aih.Fid(), []byte(status.Uuid)) 216 } 217 return true, nil // Completed 218 } 219 err := action.aih.Progress(status.Offset, status.Length, action.aih.Length(), 0) 220 if err != nil { 221 debug.Printf("id:%d progress update failed: %v", status.Id, err) 222 action.agent.stats.CompleteAction(action, -1) 223 if err2 := action.aih.End(0, 0, 0, -1); err2 != nil { 224 <-action.agent.rpcsInFlight 225 debug.Printf("id:%d completion after error failed: %v", status.Id, err2) 226 return false, fmt.Errorf("err: %s/err2: %s", err, err2) 227 } 228 <-action.agent.rpcsInFlight 229 return false, err // Incomplete Failed Action 230 } 231 232 return false, nil 233 } 234 235 // Fail signals that the action has failed 236 func (action *Action) Fail(rc int) error { 237 audit.Logf("id:%d fail %x %v: %v", action.id, action.aih.Cookie(), action.aih.Fid(), rc) 238 action.agent.stats.CompleteAction(action, rc) 239 err := action.aih.End(0, 0, 0, rc) 240 if err != nil { 241 audit.Logf("id:%d fail after fail %x: %v", action.id, action.aih.Cookie(), err) 242 } 243 <-action.agent.rpcsInFlight 244 return errors.Wrap(err, "end action failed") 245 246 }