github.com/kubewharf/katalyst-core@v0.5.3/pkg/metaserver/spd/checkpoint/checkpoint.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package checkpoint 18 19 import ( 20 "encoding/json" 21 "errors" 22 "fmt" 23 "strings" 24 25 "k8s.io/klog/v2" 26 "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" 27 "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum" 28 cpmerrors "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors" 29 30 "github.com/kubewharf/katalyst-api/pkg/apis/workload/v1alpha1" 31 ) 32 33 const ( 34 // Delimiter used on checkpoints written to disk 35 delimiter = "_" 36 spdPrefix = "SPD" 37 ) 38 39 // ServiceProfileCheckpoint defines the operations to retrieve spd 40 type ServiceProfileCheckpoint interface { 41 checkpointmanager.Checkpoint 42 GetSPD() *v1alpha1.ServiceProfileDescriptor 43 } 44 45 // Data to be stored as checkpoint 46 type Data struct { 47 SPD *v1alpha1.ServiceProfileDescriptor 48 Checksum checksum.Checksum 49 } 50 51 // NewServiceProfileCheckpoint returns new spd checkpoint 52 func NewServiceProfileCheckpoint(spd *v1alpha1.ServiceProfileDescriptor) ServiceProfileCheckpoint { 53 return &Data{SPD: spd} 54 } 55 56 // MarshalCheckpoint returns marshaled data 57 func (cp *Data) MarshalCheckpoint() ([]byte, error) { 58 cp.Checksum = checksum.New(*cp.SPD) 59 return json.Marshal(*cp) 60 } 61 62 // UnmarshalCheckpoint returns unmarshalled data 63 func (cp *Data) UnmarshalCheckpoint(blob []byte) error { 64 return json.Unmarshal(blob, cp) 65 } 66 67 // VerifyChecksum verifies that passed checksum is same as calculated checksum 68 func (cp *Data) VerifyChecksum() error { 69 return cp.Checksum.Verify(*cp.SPD) 70 } 71 72 // GetSPD retrieves the spd from the checkpoint 73 func (cp *Data) GetSPD() *v1alpha1.ServiceProfileDescriptor { 74 return cp.SPD 75 } 76 77 // getSPDKey returns the full qualified path for the spd checkpoint 78 func getSPDKey(spd *v1alpha1.ServiceProfileDescriptor) string { 79 return fmt.Sprintf("%s%s%s%s%s.yaml", spdPrefix, delimiter, spd.Namespace, delimiter, spd.Name) 80 } 81 82 // LoadSPDs Loads All Checkpoints from disk 83 func LoadSPDs(cpm checkpointmanager.CheckpointManager, skipCorruptionError bool) ([]*v1alpha1.ServiceProfileDescriptor, error) { 84 spd := make([]*v1alpha1.ServiceProfileDescriptor, 0) 85 86 checkpointKeys, err := cpm.ListCheckpoints() 87 if err != nil { 88 klog.Errorf("Failed to list checkpoints: %v", err) 89 } 90 91 for _, key := range checkpointKeys { 92 if !strings.HasPrefix(key, spdPrefix) { 93 continue 94 } 95 96 checkpoint := NewServiceProfileCheckpoint(nil) 97 err := cpm.GetCheckpoint(key, checkpoint) 98 if err != nil { 99 klog.Errorf("Failed to retrieve checkpoint for spd %q, error: %v", key, err) 100 if !errors.Is(err, cpmerrors.ErrCorruptCheckpoint) { 101 continue 102 } else { 103 if !skipCorruptionError { 104 continue 105 } 106 klog.Warningf("Skip corruption error for spd %q", key) 107 } 108 } 109 spd = append(spd, checkpoint.GetSPD()) 110 } 111 return spd, nil 112 } 113 114 // WriteSPD a checkpoint to a file on disk if annotation is present 115 func WriteSPD(cpm checkpointmanager.CheckpointManager, spd *v1alpha1.ServiceProfileDescriptor) error { 116 if spd == nil { 117 return fmt.Errorf("spd is nil") 118 } 119 120 data := NewServiceProfileCheckpoint(spd) 121 return cpm.CreateCheckpoint(getSPDKey(spd), data) 122 } 123 124 // DeleteSPD deletes a checkpoint from disk if present 125 func DeleteSPD(cpm checkpointmanager.CheckpointManager, spd *v1alpha1.ServiceProfileDescriptor) error { 126 if spd == nil { 127 return nil 128 } 129 return cpm.RemoveCheckpoint(getSPDKey(spd)) 130 }