github.com/jenkins-x/jx/v2@v2.1.155/pkg/kube/build_lock.go (about) 1 package kube 2 3 import ( 4 "fmt" 5 "os" 6 "strconv" 7 "time" 8 9 "github.com/jenkins-x/jx-logging/pkg/log" 10 11 v1 "k8s.io/api/core/v1" 12 "k8s.io/apimachinery/pkg/api/errors" 13 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 14 "k8s.io/apimachinery/pkg/watch" 15 "k8s.io/client-go/kubernetes" 16 ) 17 18 // Labels required to be a lock. Anything else should be ignored. 19 var buildLockLabels map[string]string = map[string]string{ 20 "jenkins-x.io/kind": "build-lock", 21 } 22 var buildLockExpires time.Duration = time.Hour 23 var buildLockPhaseRunning map[v1.PodPhase]bool = map[v1.PodPhase]bool{ 24 v1.PodPending: true, 25 v1.PodRunning: true, 26 v1.PodUnknown: true, 27 } 28 29 // DisableBuildLockEnvKey environment variable used to disable build lock in jx step helm apply 30 const DisableBuildLockEnvKey = "JX_DISABLE_BUILD_LOCK" 31 32 // AcquireBuildLock acquires a build lock, to avoid other builds to edit the 33 // same namespace while a deployment is already running, other deployments 34 // can negotiate which one should run after, by editing its data. 35 // Returns a function to release the lock (to be called in a defer) 36 // Returns an error if a newer build is already running, or if an error happened 37 func AcquireBuildLock(kubeClient kubernetes.Interface, devNamespace, namespace string) (func() error, error) { 38 // Only lock if running in Tekton 39 if ok, err := IsTektonEnabled(kubeClient, devNamespace); err != nil { 40 log.Logger().Warnf("error while looking for Tekton: %s\n", err.Error()) 41 return nil, err 42 } else if !ok { 43 log.Logger().Debugf("lock cancelled because not running in tekton") 44 return func() error { return nil }, nil 45 } 46 // Create the lock object 47 lock, err := makeBuildLock(kubeClient, devNamespace, namespace) 48 if err != nil { 49 return nil, err 50 } 51 // this loop continuously tries to create the lock 52 Create: 53 for { 54 // no pod to follow, set an expiration date 55 if len(lock.OwnerReferences) == 0 { 56 expires := time.Now().UTC().Add(buildLockExpires).Format(time.RFC3339) 57 lock.Annotations["expires"] = expires 58 lock.Data["expires"] = expires 59 } 60 log.Logger().Infof("creating the lock configmap %s", lock.Name) 61 // create the lock 62 new, err := kubeClient.CoreV1().ConfigMaps(devNamespace).Create(lock) 63 if err != nil { 64 status, ok := err.(*errors.StatusError) 65 // an error while creating the lock 66 if !ok || status.Status().Reason != metav1.StatusReasonAlreadyExists { 67 log.Logger().Warnf("failed to create the lock configmap %s: %s\n", lock.Name, err.Error()) 68 return nil, err 69 } 70 // there is already a similat lock 71 log.Logger().Infof("lock configmap %s already exists", lock.Name) 72 } else { 73 // the lock is created, can now perform the updates 74 log.Logger().Infof("lock configmap %s created", lock.Name) 75 // returns a function that releases the lock 76 return func() error { 77 log.Logger().Infof("cleaning the lock configmap %s", lock.Name) 78 err := kubeClient.CoreV1().ConfigMaps(devNamespace).Delete(lock.Name, 79 &metav1.DeleteOptions{ 80 Preconditions: &metav1.Preconditions{ 81 UID: &new.UID, 82 }, 83 }) 84 if err != nil { 85 log.Logger().Warnf("failed to cleanup the lock configmap %s: %s\n", lock.Name, err.Error()) 86 } 87 return err 88 }, nil 89 } 90 // create these variables outside, to be able to edit them before the next loop 91 var old *v1.ConfigMap 92 var pod *v1.Pod 93 Read: 94 for { 95 // get the current lock if not already provided 96 if old == nil { 97 old, err = kubeClient.CoreV1().ConfigMaps(devNamespace).Get(lock.Name, metav1.GetOptions{}) 98 if err != nil { 99 status, ok := err.(*errors.StatusError) 100 // the lock does not exist anymore, try to create it 101 if ok && status.Status().Reason == metav1.StatusReasonNotFound { 102 log.Logger().Infof("lock configmap %s deleted", lock.Name) 103 continue Create 104 } 105 // an error getting the lock 106 log.Logger().Warnf("failed to get the lock configmap %s: %s\n", lock.Name, err.Error()) 107 return nil, err 108 } 109 } 110 // get the locking pod 111 var remove bool 112 remove, pod, err = getLockingPod(kubeClient, namespace, old, pod) 113 if err != nil { 114 return nil, err 115 // the lock should simply be removed 116 } else if remove { 117 log.Logger().Infof("cleaning the old lock configmap %s", lock.Name) 118 err := kubeClient.CoreV1().ConfigMaps(devNamespace).Delete(lock.Name, 119 &metav1.DeleteOptions{ 120 Preconditions: &metav1.Preconditions{ 121 UID: &old.UID, 122 }, 123 }) 124 // removed, now try to create it 125 if err == nil { 126 continue Create 127 } 128 status, ok := err.(*errors.StatusError) 129 // already deleted, try to create it 130 if ok && status.Status().Reason == metav1.StatusReasonNotFound { 131 continue Create 132 // the lock changed, read it again 133 } else if ok && status.Status().Reason == metav1.StatusReasonConflict { 134 log.Logger().Infof("lock configmap %s changed", lock.Name) 135 old = nil 136 continue Read 137 // an error while removing the pod 138 } else { 139 log.Logger().Warnf("failed to cleanup the old lock configmap %s: %s\n", lock.Name, err.Error()) 140 return nil, err 141 } 142 } 143 // compare the builds 144 if data, err := compareBuildLocks(old.Data, lock.Data); err != nil { 145 return nil, err 146 // should update the build to wait 147 } else if data != nil { 148 old.Data = data 149 old, err = kubeClient.CoreV1().ConfigMaps(devNamespace).Update(old) 150 if err != nil { 151 status, ok := err.(*errors.StatusError) 152 // the lock does not exist anymore, try to create it 153 if ok && status.Status().Reason == metav1.StatusReasonNotFound { 154 log.Logger().Infof("lock configmap %s deleted", lock.Name) 155 continue Create 156 // the lock has changed, read it again 157 } else if ok && status.Status().Reason == metav1.StatusReasonConflict { 158 log.Logger().Infof("lock configmap %s changed", lock.Name) 159 old = nil 160 continue Read 161 } 162 // an error updating the lock 163 log.Logger().Warnf("failed to update the lock configmap %s: %s\n", lock.Name, err.Error()) 164 return nil, err 165 } 166 } 167 // watch the lock for updates 168 if old, err = watchBuildLock(kubeClient, old, pod, lock.Data); err != nil { 169 return nil, err 170 // lock configmap was updated, read it again 171 } else if old != nil { 172 continue Read 173 // lock configmap was (probably) deleted, try to create it again 174 } else { 175 continue Create 176 } 177 } 178 } 179 } 180 181 // makeBuildLock make the lock configmap of the current build 182 func makeBuildLock(kubeClient kubernetes.Interface, devNamespace, namespace string) (*v1.ConfigMap, error) { 183 // Get infos from the headers 184 now := time.Now().UTC().Format(time.RFC3339) 185 owner := os.Getenv("REPO_OWNER") 186 if owner == "" { 187 log.Logger().Warnf("no REPO_OWNER provided") 188 return nil, fmt.Errorf("no REPO_OWNER provided") 189 } 190 repository := os.Getenv("REPO_NAME") 191 if repository == "" { 192 log.Logger().Warnf("no REPO_NAME provided") 193 return nil, fmt.Errorf("no REPO_NAME provided") 194 } 195 branch := os.Getenv("BRANCH_NAME") 196 if branch == "" { 197 log.Logger().Warnf("no BRANCH_NAME provided") 198 return nil, fmt.Errorf("no BRANCH_NAME provided") 199 } 200 build := os.Getenv("BUILD_NUMBER") 201 if _, err := strconv.Atoi(build); err != nil { 202 log.Logger().Warnf("no BUILD_NUMBER provided: %s\n", err.Error()) 203 return nil, err 204 } 205 interpret := os.Getenv("JX_INTERPRET_PIPELINE") == "true" 206 // Create the lock object 207 lock := &v1.ConfigMap{ 208 ObjectMeta: metav1.ObjectMeta{ 209 Name: fmt.Sprintf("jx-lock-%s", namespace), 210 Namespace: devNamespace, 211 Labels: map[string]string{ 212 "namespace": namespace, 213 "owner": owner, 214 "repository": repository, 215 "branch": branch, 216 "build": build, 217 }, 218 Annotations: map[string]string{ 219 "jenkins-x.io/created-by": "Jenkins X", 220 "warning": "DO NOT REMOVE", 221 "purpose": fmt.Sprintf("This is a deployment lock for the "+ 222 "namespace \"%s\". It prevents several deployments to "+ 223 "edit the same namespace at the same time. It will "+ 224 "automatically be removed once the deployemnt is "+ 225 "finished, or replaced by the next deployemnt to run.", 226 namespace), 227 }, 228 }, 229 Data: map[string]string{ 230 "namespace": namespace, 231 "owner": owner, 232 "repository": repository, 233 "branch": branch, 234 "build": build, 235 "timestamp": now, 236 }, 237 } 238 for k, v := range buildLockLabels { 239 lock.Labels[k] = v 240 } 241 // Find our pod 242 if !interpret { 243 podList, err := kubeClient.CoreV1().Pods(devNamespace).List(metav1.ListOptions{ 244 LabelSelector: fmt.Sprintf("owner=%s,repository=%s,branch=%s,build=%s,jenkins.io/pipelineType=build", owner, repository, branch, build), 245 }) 246 if err != nil { 247 return nil, err 248 } else if len(podList.Items) != 1 { 249 return nil, fmt.Errorf("%d pods found for this job (owner=%s,repository=%s,branch=%s,build=%s,jenkins.io/pipelineType=build)", 250 len(podList.Items), owner, repository, branch, build) 251 } 252 pod := &podList.Items[0] 253 // kubernetes library seems to forget APIVersoin and Kind 254 // fill those if they're missing 255 if pod.APIVersion == "" { 256 pod.APIVersion = "v1" 257 } 258 if pod.Kind == "" { 259 pod.Kind = "Pod" 260 } 261 lock.OwnerReferences = []metav1.OwnerReference{{ 262 APIVersion: pod.APIVersion, 263 Kind: pod.Kind, 264 Name: pod.Name, 265 UID: pod.UID, 266 }} 267 lock.Data["pod"] = pod.Name 268 } 269 return lock, nil 270 } 271 272 // getLockingPod checks the lock and return its locking pod 273 // receives the previously known pod, to avoid refreshing it if not needed 274 // Returns true if the lock should be removed (because the lock is invalid, 275 // or its pod is missing or finished) 276 // Returns the pod if one is running, or nil if running locally 277 func getLockingPod(kubeClient kubernetes.Interface, namespace string, lock *v1.ConfigMap, pod *v1.Pod) (bool, *v1.Pod, error) { 278 // check the lock 279 for k, v := range buildLockLabels { 280 if lock.Labels[k] != v { 281 log.Logger().Warnf("the lock %s should have annotation \"%s: %s\"", lock.Name, k, v) 282 return true, nil, nil 283 } 284 } 285 if lock.Labels["namespace"] != namespace { 286 log.Logger().Warnf("the lock %s should have label \"namespace: %s\"", lock.Name, namespace) 287 return true, nil, nil 288 } 289 // the lock has no owner, check the timeout 290 if len(lock.OwnerReferences) == 0 { 291 expires, err := time.Parse(time.RFC3339, lock.Annotations["expires"]) 292 if err != nil { 293 log.Logger().Warnf("cannot parse the lock's annotation \"expires: %s\": %s\n", lock.Annotations["expires"], err.Error()) 294 return false, nil, err 295 } else if !expires.After(time.Now()) { 296 log.Logger().Infof("the lock %s has expired", lock.Name) 297 return true, nil, nil 298 } 299 return false, nil, nil 300 } 301 302 var owner *metav1.OwnerReference 303 if len(lock.OwnerReferences) != 1 { 304 err := fmt.Errorf("the lock %s has %d OwnerReferences", lock.Name, len(lock.OwnerReferences)) 305 log.Logger().Warnf(err.Error()) 306 return false, nil, err 307 } else if owner = &lock.OwnerReferences[0]; owner.Kind != "Pod" || owner.Name == "" { 308 err := fmt.Errorf("the lock %s has invalid OwnerReference %v", lock.Name, owner) 309 log.Logger().Warn(err.Error()) 310 return false, nil, err 311 } 312 // get the current locking pod if not already provided 313 if pod == nil || pod.Name != owner.Name { 314 var err error 315 pod, err = kubeClient.CoreV1().Pods(lock.Namespace).Get(owner.Name, metav1.GetOptions{}) 316 if err != nil { 317 status, ok := err.(*errors.StatusError) 318 // the pod does not exist anymore, the lock should be removed 319 if ok && status.Status().Reason == metav1.StatusReasonNotFound { 320 log.Logger().Infof("locking pod %s finished", owner.Name) 321 return true, nil, nil 322 // an error while getting the pod 323 } else { 324 log.Logger().Warnf("failed to get the locking pod %s: %s\n", lock.Data["pod"], err.Error()) 325 return false, nil, err 326 } 327 } 328 } 329 // check the pod's phase 330 log.Logger().Infof("locking pod %s is in phase %s", pod.Name, pod.Status.Phase) 331 if !buildLockPhaseRunning[pod.Status.Phase] { 332 return true, nil, nil 333 } 334 return false, pod, nil 335 } 336 337 // watchBuildLock watches a lock configmap and its locking pod to detect any change 338 // Returns nil if the lock was deleted, or is expected to be deleted 339 // Returns the new lock configmap if another build is waiting 340 func watchBuildLock(kubeClient kubernetes.Interface, lock *v1.ConfigMap, pod *v1.Pod, build map[string]string) (*v1.ConfigMap, error) { 341 log.Logger().Infof("waiting for updates on the lock configmap %s", lock.Name) 342 // watch a timer for expiration 343 var expChan <-chan time.Time 344 if pod == nil { 345 expires, err := time.Parse(time.RFC3339, lock.Annotations["expires"]) 346 if err != nil { 347 log.Logger().Warnf("cannot parse the lock's annotation \"expires: %s\": %s\n", lock.Annotations["expires"], err.Error()) 348 return nil, err 349 } 350 remaining := expires.Sub(time.Now()) 351 // the lock has already expired, no need to wait for anything 352 if remaining <= time.Duration(0) { 353 return lock, nil 354 } 355 log.Logger().Infof("waiting for the lock configmap %s for %s. "+ 356 "if you are sure that the local build %s/%s #%s has finished, "+ 357 "you can clean the lock with\n\t`kubectl delete configmap -n %s %s`", 358 lock.Name, remaining.Round(time.Second), lock.Labels["repository"], 359 lock.Labels["branch"], lock.Labels["build"], lock.Namespace, lock.Name) 360 timer := time.NewTimer(remaining) 361 defer timer.Stop() 362 expChan = timer.C 363 } else { 364 expChan = make(chan time.Time) 365 } 366 // watch the lock for updates 367 lockWatch, err := kubeClient.CoreV1().ConfigMaps(lock.Namespace).Watch(metav1.SingleObject(lock.ObjectMeta)) 368 if err != nil { 369 log.Logger().Warnf("cannot watch the lock configmap %s: %s\n", lock.Name, err.Error()) 370 return nil, err 371 } 372 defer lockWatch.Stop() 373 lockChan := lockWatch.ResultChan() 374 // watch the pod for updates 375 var podChan <-chan watch.Event 376 if pod != nil { 377 podWatch, err := kubeClient.CoreV1().Pods(pod.Namespace).Watch(metav1.SingleObject(pod.ObjectMeta)) 378 if err != nil { 379 log.Logger().Warnf("cannot watch the locking pod %s: %s\n", pod.Name, err.Error()) 380 return nil, err 381 } 382 defer podWatch.Stop() 383 podChan = podWatch.ResultChan() 384 } else { 385 podChan = make(chan watch.Event) 386 } 387 for { 388 select { 389 // an event about the lock 390 case event := <-lockChan: 391 switch event.Type { 392 // the lock has changed 393 case watch.Added, watch.Modified: 394 lock = event.Object.(*v1.ConfigMap) 395 // if the waiting build has changed, read again 396 if next, err := compareBuildLocks(lock.Data, build); err != nil { 397 return nil, err 398 } else if next != nil { 399 return lock, nil 400 } 401 // the lock is deleted, try to create it 402 case watch.Deleted: 403 return nil, nil 404 // an error 405 case watch.Error: 406 err := errors.FromObject(event.Object) 407 log.Logger().Warnf("cannot watch the lock configmap %s: %s\n", lock.Name, err.Error()) 408 return nil, err 409 } 410 // an event about the locking pod 411 case event := <-podChan: 412 switch event.Type { 413 // the pod has changed, if its phase has changed, 414 // let's assume that the configmap has been deleted 415 case watch.Added, watch.Modified: 416 pod = event.Object.(*v1.Pod) 417 if !buildLockPhaseRunning[pod.Status.Phase] { 418 return nil, nil 419 } 420 // the pod was deleted, let's assume the configmap too 421 case watch.Deleted: 422 return nil, nil 423 // an error 424 case watch.Error: 425 err := errors.FromObject(event.Object) 426 log.Logger().Warnf("cannot watch the locking pod %s: %s\n", pod.Name, err.Error()) 427 return nil, err 428 } 429 // the lock has expired 430 case <-expChan: 431 return lock, nil 432 } 433 } 434 } 435 436 // compareBuildLocks compares two builds 437 // If next is nil, the build is already waiting 438 // if next is not nil, the build should wait by updating the lock with these data 439 func compareBuildLocks(old, new map[string]string) (map[string]string, error) { 440 sameRepo := true 441 for _, k := range [3]string{"owner", "repository", "branch"} { 442 if old[k] != new[k] { 443 sameRepo = false 444 } 445 } 446 // both are deplying the same repo and branch, compare build number 447 if sameRepo { 448 // same build and pod, we're already waiting 449 if old["build"] == new["build"] && old["pod"] == new["pod"] && old["expires"] == new["expires"] { 450 return nil, nil 451 } 452 // parse the builds 453 if oldBuild, err := strconv.Atoi(old["build"]); err != nil { 454 log.Logger().Warnf("cannot parse the lock's build number %s: %s\n", old["build"], err.Error()) 455 return nil, err 456 } else if newBuild, err := strconv.Atoi(new["build"]); err != nil { 457 log.Logger().Warnf("cannot parse the lock's build number %s: %s\n", new["build"], err.Error()) 458 return nil, err 459 // older build, give up 460 } else if oldBuild >= newBuild { 461 log.Logger().Warnf("newer build %d is waiting already", oldBuild) 462 return nil, fmt.Errorf("newer build %d is waiting already", oldBuild) 463 } 464 // parse the timestamps in order to keep th newest one 465 if oldTime, err := time.Parse(time.RFC3339, old["timestamp"]); err != nil { 466 log.Logger().Warnf("cannot parse the lock's timestamp %s: %s\n", old["timestamp"], err.Error()) 467 return nil, err 468 } else if newTime, err := time.Parse(time.RFC3339, new["timestamp"]); err != nil { 469 log.Logger().Warnf("cannot parse the lock's timestamp %s: %s\n", new["timestamp"], err.Error()) 470 return nil, err 471 // keep increasing the timestamp, for consistency reasons 472 } else if oldTime.After(newTime) { 473 next := map[string]string{} 474 for k, v := range new { 475 next[k] = v 476 } 477 next["timestamp"] = old["timestamp"] 478 return next, nil 479 // timestamp already right 480 } else { 481 return new, nil 482 } 483 // both are deploying different repos, keep the newest one 484 // it is a corner case for consistency 485 // but should not happen on a standard cluster 486 } else { 487 // parse the timestamps 488 if oldTime, err := time.Parse(time.RFC3339, old["timestamp"]); err != nil { 489 log.Logger().Warnf("cannot parse the lock's timestamp %s: %s\n", old["timestamp"], err.Error()) 490 return nil, err 491 } else if newTime, err := time.Parse(time.RFC3339, new["timestamp"]); err != nil { 492 log.Logger().Warnf("cannot parse the lock's timestamp %s: %s\n", new["timestamp"], err.Error()) 493 return nil, err 494 // newer deployment, wait 495 } else if newTime.After(oldTime) { 496 return new, nil 497 // older deployment, give up 498 } else { 499 return nil, fmt.Errorf("newer build %s is waiting already", oldTime) 500 } 501 } 502 }