go.uber.org/cadence@v1.2.9/internal/workflow_shadower.go (about) 1 // Copyright (c) 2017-2021 Uber Technologies Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package internal 22 23 import ( 24 "context" 25 "errors" 26 "fmt" 27 "math" 28 "math/rand" 29 "sync" 30 "sync/atomic" 31 "time" 32 33 "github.com/facebookgo/clock" 34 "go.uber.org/zap" 35 36 "go.uber.org/cadence/.gen/go/cadence/workflowserviceclient" 37 "go.uber.org/cadence/.gen/go/shadower" 38 "go.uber.org/cadence/internal/common" 39 "go.uber.org/cadence/internal/common/util" 40 ) 41 42 const ( 43 statusInitialized int32 = 0 44 statusStarted int32 = 1 45 statusStopped int32 = 2 46 ) 47 48 const ( 49 defaultWaitDurationPerIteration = 5 * time.Minute 50 ) 51 52 type ( 53 // ShadowOptions is used to configure workflow shadowing. 54 ShadowOptions struct { 55 // Optional: Workflow visibility query for getting workflows that should be replayed 56 // if specified, WorkflowTypes, WorkflowStatus, WorkflowStartTimeFilter fields must not be specified. 57 // default: empty query, which matches all workflows 58 WorkflowQuery string 59 60 // Optional: A list of workflow type names. 61 // The list will be used to construct WorkflowQuery. Only workflows with types listed will be replayed. 62 // default: empty list, which matches all workflow types 63 WorkflowTypes []string 64 65 // Optional: A list of workflow status. 66 // The list will be used to construct WorkflowQuery. Only workflows with status listed will be replayed. 67 // accepted values (case-insensitive): OPEN, CLOSED, ALL, COMPLETED, FAILED, CANCELED, TERMINATED, CONTINUED_AS_NEW, TIMED_OUT 68 // default: OPEN, which matches only open workflows 69 WorkflowStatus []string 70 71 // Optional: Min and Max workflow start timestamp. 72 // Timestamps will be used to construct WorkflowQuery. Only workflows started within the time range will be replayed. 73 // default: no time filter, which matches all workflow start timestamp 74 WorkflowStartTimeFilter TimeFilter 75 76 // Optional: Min and Max workflow close timestamp. 77 // Timestamps will be used to construct WorkflowQuery. Only workflows closed within the time range will be replayed. If this filter is being used along with the thee StartTime filter then make sure the Min Close time stamp 78 // is within the range of Start timestamp. 79 // default: no time filter, which matches all workflow closed timestamp 80 WorkflowCloseTimeFilter TimeFilter 81 82 // Optional: sampling rate for the workflows matches WorkflowQuery 83 // only sampled workflows will be replayed 84 // default: 1.0 85 SamplingRate float64 86 87 // Optional: sets if shadowing should continue after all workflows matches the WorkflowQuery have been replayed. 88 // If set to ShadowModeContinuous, ExitCondition must be specified. 89 // default: ShadowModeNormal, which means shadowing will complete after all workflows have been replayed 90 Mode ShadowMode 91 92 // Required if Mode is set to ShadowModeContinuous: controls when shadowing should complete 93 ExitCondition ShadowExitCondition 94 95 // Optional: workflow shadowing concurrency (# of concurrent workflow replay activities) 96 // Note: this field only applies to shadow worker. For the local WorkflowShadower, 97 // the concurrency will always be 1. 98 // An error will be returned if it's set to be larger than 1 when used to NewWorkflowShadower 99 // default: 1 100 Concurrency int 101 } 102 103 // TimeFilter represents a time range through the min and max timestamp 104 TimeFilter struct { 105 MinTimestamp time.Time 106 MaxTimestamp time.Time 107 } 108 109 // ShadowMode is an enum for configuring if shadowing should continue after all workflows matches the WorkflowQuery have been replayed. 110 ShadowMode int 111 112 // ShadowExitCondition configures when the workflow shadower should exit. 113 // If not specified shadower will exit after replaying all workflows satisfying the visibility query. 114 ShadowExitCondition struct { 115 // Optional: Expiration interval for shadowing. 116 // Shadowing will exit when this interval has passed. 117 // default: no expiration interval 118 ExpirationInterval time.Duration 119 // Optional: Target number of shadowed workflows. 120 // Shadowing will exit after this number is reached. 121 // default: no limit on shadow count 122 ShadowCount int 123 } 124 125 // WorkflowShadower retrieves and replays workflow history from Cadence service 126 // to determine if there's any nondeterministic changes in the workflow definition 127 WorkflowShadower struct { 128 service workflowserviceclient.Interface 129 domain string 130 shadowOptions ShadowOptions 131 logger *zap.Logger 132 replayer *WorkflowReplayer 133 134 status int32 135 shutdownCh chan struct{} 136 shutdownWG sync.WaitGroup 137 138 clock clock.Clock 139 } 140 ) 141 142 const ( 143 // ShadowModeNormal is the default mode for workflow shadowing. 144 // Shadowing will complete after all workflows matches WorkflowQuery have been replayed. 145 ShadowModeNormal ShadowMode = iota 146 // ShadowModeContinuous mode will start a new round of shadowing 147 // after all workflows matches WorkflowQuery have been replayed. 148 // There will be a 5 min wait period between each round, 149 // currently this wait period is not configurable. 150 // Shadowing will complete only when ExitCondition is met. 151 // ExitCondition must be specified when using this mode 152 ShadowModeContinuous 153 ) 154 155 // NewWorkflowShadower creates an instance of the WorkflowShadower for testing 156 // The logger is an optional parameter. Defaults to noop logger if not provided and will override the logger in WorkerOptions 157 func NewWorkflowShadower( 158 service workflowserviceclient.Interface, 159 domain string, 160 shadowOptions ShadowOptions, 161 replayOptions ReplayOptions, 162 logger *zap.Logger, 163 ) (*WorkflowShadower, error) { 164 if len(domain) == 0 { 165 return nil, errors.New("domain is not set") 166 } 167 168 if err := shadowOptions.validateAndPopulateFields(); err != nil { 169 return nil, err 170 } 171 172 if shadowOptions.Concurrency > 1 { 173 return nil, errors.New("local workflow shadower doesn't support concurrency > 1") 174 } 175 176 if logger == nil { 177 logger = zap.NewNop() 178 } 179 180 return &WorkflowShadower{ 181 service: service, 182 domain: domain, 183 shadowOptions: shadowOptions, 184 logger: logger, 185 replayer: NewWorkflowReplayerWithOptions(replayOptions), 186 187 status: statusInitialized, 188 shutdownCh: make(chan struct{}), 189 190 clock: clock.New(), 191 }, nil 192 } 193 194 // RegisterWorkflow registers workflow function to replay 195 func (s *WorkflowShadower) RegisterWorkflow(w interface{}) { 196 s.replayer.RegisterWorkflow(w) 197 } 198 199 // RegisterWorkflowWithOptions registers workflow function with custom workflow name to replay 200 func (s *WorkflowShadower) RegisterWorkflowWithOptions(w interface{}, options RegisterWorkflowOptions) { 201 s.replayer.RegisterWorkflowWithOptions(w, options) 202 } 203 204 // Run starts WorkflowShadower in a blocking fashion 205 func (s *WorkflowShadower) Run() error { 206 if !atomic.CompareAndSwapInt32(&s.status, statusInitialized, statusStarted) { 207 return errors.New("Workflow shadower already started") 208 } 209 210 return s.shadowWorker() 211 } 212 213 // Stop stops WorkflowShadower and wait up to one minute for all goroutines to finish before returning 214 func (s *WorkflowShadower) Stop() { 215 if !atomic.CompareAndSwapInt32(&s.status, statusStarted, statusStopped) { 216 return 217 } 218 219 close(s.shutdownCh) 220 221 if success := util.AwaitWaitGroup(&s.shutdownWG, time.Minute); !success { 222 s.logger.Warn("Workflow Shadower timedout on shutdown") 223 } 224 } 225 226 func (s *WorkflowShadower) shadowWorker() error { 227 s.shutdownWG.Add(1) 228 defer s.shutdownWG.Done() 229 230 scanRequest := shadower.ScanWorkflowActivityParams{ 231 Domain: common.StringPtr(s.domain), 232 WorkflowQuery: common.StringPtr(s.shadowOptions.WorkflowQuery), 233 SamplingRate: common.Float64Ptr(s.shadowOptions.SamplingRate), 234 } 235 s.logger.Info("Shadow workflow query", 236 zap.String(tagVisibilityQuery, s.shadowOptions.WorkflowQuery), 237 ) 238 239 ctx := context.Background() 240 expirationTime := time.Unix(0, math.MaxInt64) 241 if s.shadowOptions.ExitCondition.ExpirationInterval != 0 { 242 expirationTime = s.clock.Now().Add(s.shadowOptions.ExitCondition.ExpirationInterval) 243 } 244 245 replayCount := 0 246 maxReplayCount := math.MaxInt64 247 if s.shadowOptions.ExitCondition.ShadowCount != 0 { 248 maxReplayCount = s.shadowOptions.ExitCondition.ShadowCount 249 } 250 rand.Seed(s.clock.Now().UnixNano()) 251 for { 252 scanResult, err := scanWorkflowExecutionsHelper(ctx, s.service, scanRequest, s.logger) 253 if err != nil { 254 return err 255 } 256 257 for _, execution := range scanResult.Executions { 258 if s.clock.Now().After(expirationTime) { 259 return nil 260 } 261 262 success, err := replayWorkflowExecutionHelper( 263 ctx, 264 s.replayer, 265 s.service, 266 s.logger, 267 s.domain, 268 WorkflowExecution{ 269 ID: execution.GetWorkflowId(), 270 RunID: execution.GetRunId(), 271 }, 272 ) 273 if err != nil { 274 return err 275 } 276 if success { 277 replayCount++ 278 } 279 280 if replayCount == maxReplayCount { 281 return nil 282 } 283 } 284 285 if len(scanResult.NextPageToken) == 0 { 286 if s.shadowOptions.Mode == ShadowModeNormal || s.clock.Now().Add(defaultWaitDurationPerIteration).After(expirationTime) { 287 return nil 288 } 289 290 s.clock.Sleep(defaultWaitDurationPerIteration) 291 } 292 293 scanRequest.NextPageToken = scanResult.NextPageToken 294 } 295 296 } 297 298 func (o *ShadowOptions) validateAndPopulateFields() error { 299 exitConditionSpecified := o.ExitCondition.ExpirationInterval > 0 || o.ExitCondition.ShadowCount > 0 300 if o.Mode == ShadowModeContinuous && !exitConditionSpecified { 301 return errors.New("exit condition must be specified if shadow mode is set to continuous") 302 } 303 304 if o.SamplingRate < 0 || o.SamplingRate > 1 { 305 return errors.New("sampling rate should be in range [0, 1]") 306 } 307 308 if len(o.WorkflowQuery) != 0 && (len(o.WorkflowTypes) != 0 || len(o.WorkflowStatus) != 0 || !o.WorkflowStartTimeFilter.isEmpty()) { 309 return errors.New("workflow types, status, start time and close time filter can't be specified when workflow query is specified") 310 } 311 312 if len(o.WorkflowQuery) == 0 { 313 queryBuilder := NewQueryBuilder().WorkflowTypes(o.WorkflowTypes) 314 315 statuses := make([]WorkflowStatus, 0, len(o.WorkflowStatus)) 316 for _, statusString := range o.WorkflowStatus { 317 status, err := ToWorkflowStatus(statusString) 318 if err != nil { 319 return err 320 } 321 statuses = append(statuses, status) 322 } 323 //All the open statuses are taken by default. This list seems to not work as expected. 324 //TODO: verify that the status list works as expected. currently all wfs of all types get picked up. 325 if len(statuses) == 0 { 326 statuses = []WorkflowStatus{WorkflowStatusOpen} 327 } 328 queryBuilder.WorkflowStatus(statuses) 329 330 if !o.WorkflowStartTimeFilter.isEmpty() { 331 if err := o.WorkflowStartTimeFilter.validateAndPopulateFields(); err != nil { 332 return fmt.Errorf("invalid start time filter, error: %v", err) 333 } 334 queryBuilder.StartTime(o.WorkflowStartTimeFilter.MinTimestamp, o.WorkflowStartTimeFilter.MaxTimestamp) 335 } 336 337 if !o.WorkflowCloseTimeFilter.isEmpty() { 338 if err := o.WorkflowCloseTimeFilter.validateAndPopulateFields(); err != nil { 339 return fmt.Errorf("invalid close time filter, error: %v", err) 340 } 341 queryBuilder.CloseTime(o.WorkflowCloseTimeFilter.MinTimestamp, o.WorkflowCloseTimeFilter.MaxTimestamp) 342 } 343 o.WorkflowQuery = queryBuilder.Build() 344 } 345 346 if o.SamplingRate == 0 { 347 // if not set, defaults to replay all workflows 348 o.SamplingRate = 1 349 } 350 351 if o.Concurrency == 0 { 352 // if not set, defaults to 1 353 o.Concurrency = 1 354 } 355 356 return nil 357 } 358 359 func (t *TimeFilter) validateAndPopulateFields() error { 360 if t.MaxTimestamp.IsZero() { 361 t.MaxTimestamp = maxTimestamp 362 } 363 364 if t.MaxTimestamp.Before(t.MinTimestamp) { 365 return errors.New("maxTimestamp should be after minTimestamp") 366 } 367 368 return nil 369 } 370 371 func (t *TimeFilter) isEmpty() bool { 372 if t == nil { 373 return true 374 } 375 376 return t.MinTimestamp.IsZero() && t.MaxTimestamp.IsZero() 377 } 378 379 func (m ShadowMode) toThriftPtr() *shadower.Mode { 380 switch m { 381 case ShadowModeNormal: 382 return shadower.ModeNormal.Ptr() 383 case ShadowModeContinuous: 384 return shadower.ModeContinuous.Ptr() 385 default: 386 panic(fmt.Sprintf("unknown shadow mode %v", m)) 387 } 388 } 389 390 func (e ShadowExitCondition) toThriftPtr() *shadower.ExitCondition { 391 return &shadower.ExitCondition{ 392 ShadowCount: common.Int32Ptr(int32(e.ShadowCount)), 393 ExpirationIntervalInSeconds: common.Int32Ptr(int32(e.ExpirationInterval.Seconds())), 394 } 395 }