github.com/koko1123/flow-go-1@v0.29.6/cmd/dynamic_startup.go (about) 1 package cmd 2 3 import ( 4 "context" 5 "encoding/hex" 6 "encoding/json" 7 "fmt" 8 "path/filepath" 9 "strconv" 10 "strings" 11 "time" 12 13 "github.com/rs/zerolog" 14 "github.com/sethvargo/go-retry" 15 16 client "github.com/onflow/flow-go-sdk/access/grpc" 17 "github.com/koko1123/flow-go-1/cmd/util/cmd/common" 18 "github.com/koko1123/flow-go-1/model/bootstrap" 19 "github.com/koko1123/flow-go-1/state/protocol" 20 badgerstate "github.com/koko1123/flow-go-1/state/protocol/badger" 21 utilsio "github.com/koko1123/flow-go-1/utils/io" 22 "github.com/onflow/flow-go/crypto" 23 24 "github.com/koko1123/flow-go-1/model/flow" 25 "github.com/koko1123/flow-go-1/state/protocol/inmem" 26 ) 27 28 const getSnapshotTimeout = 30 * time.Second 29 30 // GetProtocolSnapshot callback that will get latest finalized protocol snapshot 31 type GetProtocolSnapshot func(ctx context.Context) (protocol.Snapshot, error) 32 33 // GetSnapshot will attempt to get the latest finalized protocol snapshot with the given flow configs 34 func GetSnapshot(ctx context.Context, client *client.Client) (*inmem.Snapshot, error) { 35 ctx, cancel := context.WithTimeout(ctx, getSnapshotTimeout) 36 defer cancel() 37 38 b, err := client.GetLatestProtocolStateSnapshot(ctx) 39 if err != nil { 40 return nil, fmt.Errorf("failed to get latest finalized protocol state snapshot during pre-initialization: %w", err) 41 } 42 43 var snapshotEnc inmem.EncodableSnapshot 44 err = json.Unmarshal(b, &snapshotEnc) 45 if err != nil { 46 return nil, fmt.Errorf("failed to unmarshal protocol state snapshot: %w", err) 47 } 48 49 snapshot := inmem.SnapshotFromEncodable(snapshotEnc) 50 return snapshot, nil 51 } 52 53 // GetSnapshotAtEpochAndPhase will get the latest finalized protocol snapshot and check the current epoch and epoch phase. 54 // If we are past the target epoch and epoch phase we exit the retry mechanism immediately. 55 // If not check the snapshot at the specified interval until we reach the target epoch and phase. 56 func GetSnapshotAtEpochAndPhase(ctx context.Context, log zerolog.Logger, startupEpoch uint64, startupEpochPhase flow.EpochPhase, retryInterval time.Duration, getSnapshot GetProtocolSnapshot) (protocol.Snapshot, error) { 57 start := time.Now() 58 59 log = log.With(). 60 Uint64("target_epoch_counter", startupEpoch). 61 Str("target_epoch_phase", startupEpochPhase.String()). 62 Logger() 63 64 log.Info().Msg("starting dynamic startup - waiting until target epoch/phase to start...") 65 66 var snapshot protocol.Snapshot 67 var err error 68 69 backoff := retry.NewConstant(retryInterval) 70 err = retry.Do(ctx, backoff, func(ctx context.Context) error { 71 snapshot, err = getSnapshot(ctx) 72 if err != nil { 73 err = fmt.Errorf("failed to get protocol snapshot: %w", err) 74 log.Error().Err(err).Msg("could not get protocol snapshot") 75 return retry.RetryableError(err) 76 } 77 78 // if we encounter any errors interpreting the snapshot something went wrong stop retrying 79 currEpochCounter, err := snapshot.Epochs().Current().Counter() 80 if err != nil { 81 return fmt.Errorf("failed to get the current epoch counter: %w", err) 82 } 83 84 currEpochPhase, err := snapshot.Phase() 85 if err != nil { 86 return fmt.Errorf("failed to get the current epoch phase: %w", err) 87 } 88 89 // check if we are in or past the target epoch and phase 90 if currEpochCounter > startupEpoch || (currEpochCounter == startupEpoch && currEpochPhase >= startupEpochPhase) { 91 log.Info(). 92 Dur("time-waiting", time.Since(start)). 93 Uint64("current-epoch", currEpochCounter). 94 Str("current-epoch-phase", currEpochPhase.String()). 95 Msg("finished dynamic startup - reached desired epoch and phase") 96 97 return nil 98 } 99 100 // wait then poll for latest snapshot again 101 log.Info(). 102 Dur("time-waiting", time.Since(start)). 103 Uint64("current-epoch", currEpochCounter). 104 Str("current-epoch-phase", currEpochPhase.String()). 105 Msgf("waiting for epoch %d and phase %s", startupEpoch, startupEpochPhase.String()) 106 107 return retry.RetryableError(fmt.Errorf("dynamic startup epoch and epoch phase not reached")) 108 }) 109 if err != nil { 110 return nil, fmt.Errorf("failed to wait for target epoch and phase: %w", err) 111 } 112 113 return snapshot, nil 114 } 115 116 // ValidateDynamicStartupFlags will validate flags necessary for dynamic node startup 117 // - assert dynamic-startup-access-publickey is valid ECDSA_P256 public key hex 118 // - assert dynamic-startup-access-address is not empty 119 // - assert dynamic-startup-startup-epoch-phase is > 0 (EpochPhaseUndefined) 120 func ValidateDynamicStartupFlags(accessPublicKey, accessAddress string, startPhase flow.EpochPhase) error { 121 b, err := hex.DecodeString(strings.TrimPrefix(accessPublicKey, "0x")) 122 if err != nil { 123 return fmt.Errorf("invalid flag --dynamic-startup-access-publickey: %w", err) 124 } 125 126 _, err = crypto.DecodePublicKey(crypto.ECDSAP256, b) 127 if err != nil { 128 return fmt.Errorf("invalid flag --dynamic-startup-access-publickey: %w", err) 129 } 130 131 if accessAddress == "" { 132 return fmt.Errorf("invalid flag --dynamic-startup-access-address can not be empty") 133 } 134 135 if startPhase <= flow.EpochPhaseUndefined { 136 return fmt.Errorf("invalid flag --dynamic-startup-startup-epoch-phase unknown epoch phase") 137 } 138 139 return nil 140 } 141 142 // DynamicStartPreInit is the pre-init func that will check if a node has already bootstrapped 143 // from a root protocol snapshot. If not attempt to get a protocol snapshot where the following 144 // conditions are met. 145 // 1. Target epoch < current epoch (in the past), set root snapshot to current snapshot 146 // 2. Target epoch == "current", wait until target phase == current phase before setting root snapshot 147 // 3. Target epoch > current epoch (in future), wait until target epoch and target phase is reached before 148 // setting root snapshot 149 func DynamicStartPreInit(nodeConfig *NodeConfig) error { 150 ctx := context.Background() 151 152 log := nodeConfig.Logger.With().Str("component", "dynamic-startup").Logger() 153 154 // skip dynamic startup if the protocol state is bootstrapped 155 isBootstrapped, err := badgerstate.IsBootstrapped(nodeConfig.DB) 156 if err != nil { 157 return fmt.Errorf("could not check if state is boostrapped: %w", err) 158 } 159 if isBootstrapped { 160 log.Info().Msg("protocol state already bootstrapped, skipping dynamic startup") 161 return nil 162 } 163 164 // skip dynamic startup if a root snapshot file is specified - this takes priority 165 rootSnapshotPath := filepath.Join(nodeConfig.BootstrapDir, bootstrap.PathRootProtocolStateSnapshot) 166 if utilsio.FileExists(rootSnapshotPath) { 167 log.Info(). 168 Str("root_snapshot_path", rootSnapshotPath). 169 Msg("protocol state is not bootstrapped, will bootstrap using configured root snapshot file, skipping dynamic startup") 170 return nil 171 } 172 173 // get flow client with secure client connection to download protocol snapshot from access node 174 config, err := common.NewFlowClientConfig(nodeConfig.DynamicStartupANAddress, nodeConfig.DynamicStartupANPubkey, flow.ZeroID, false) 175 if err != nil { 176 return fmt.Errorf("failed to create flow client config for node dynamic startup pre-init: %w", err) 177 } 178 179 flowClient, err := common.FlowClient(config) 180 if err != nil { 181 return fmt.Errorf("failed to create flow client for node dynamic startup pre-init: %w", err) 182 } 183 184 getSnapshotFunc := func(ctx context.Context) (protocol.Snapshot, error) { 185 return GetSnapshot(ctx, flowClient) 186 } 187 188 // validate dynamic startup epoch flag 189 startupEpoch, err := validateDynamicStartEpochFlags(ctx, getSnapshotFunc, nodeConfig.DynamicStartupEpoch) 190 if err != nil { 191 return fmt.Errorf("failed to validate flag --dynamic-start-epoch: %w", err) 192 } 193 194 startupPhase := flow.GetEpochPhase(nodeConfig.DynamicStartupEpochPhase) 195 196 // validate the rest of the dynamic startup flags 197 err = ValidateDynamicStartupFlags(nodeConfig.DynamicStartupANPubkey, nodeConfig.DynamicStartupANAddress, startupPhase) 198 if err != nil { 199 return err 200 } 201 202 snapshot, err := GetSnapshotAtEpochAndPhase( 203 ctx, 204 log, 205 startupEpoch, 206 startupPhase, 207 nodeConfig.BaseConfig.DynamicStartupSleepInterval, 208 getSnapshotFunc, 209 ) 210 if err != nil { 211 return fmt.Errorf("failed to get snapshot at start up epoch (%d) and phase (%s): %w", startupEpoch, startupPhase.String(), err) 212 } 213 214 // set the root snapshot in the config - we will use this later to bootstrap 215 nodeConfig.RootSnapshot = snapshot 216 return nil 217 } 218 219 // validateDynamicStartEpochFlags parse the start epoch flag and return the uin64 value, 220 // if epoch = current return the current epoch counter 221 func validateDynamicStartEpochFlags(ctx context.Context, getSnapshot GetProtocolSnapshot, flagEpoch string) (uint64, error) { 222 223 // if flag is not `current` sentinel, it must be a specific epoch counter (uint64) 224 if flagEpoch != "current" { 225 epochCounter, err := strconv.ParseUint(flagEpoch, 10, 64) 226 if err != nil { 227 return 0, fmt.Errorf("invalid epoch counter flag (%s): %w", flagEpoch, err) 228 } 229 return epochCounter, nil 230 } 231 232 // we are using the current epoch, retrieve latest snapshot to determine this value 233 snapshot, err := getSnapshot(ctx) 234 if err != nil { 235 return 0, fmt.Errorf("failed to get snapshot: %w", err) 236 } 237 238 epochCounter, err := snapshot.Epochs().Current().Counter() 239 if err != nil { 240 return 0, fmt.Errorf("failed to get current epoch counter: %w", err) 241 } 242 243 return epochCounter, nil 244 }