sigs.k8s.io/kueue@v0.6.2/site/static/examples/python/sample-mpijob.py (about) 1 #!/usr/bin/env python3 2 3 import argparse 4 from kubernetes import config, client 5 import mpijob.models as models 6 7 # sample-mpijob.py 8 # This example will demonstrate full steps to submit a Job via the MPI Operator 9 10 # Make sure your cluster is running! 11 config.load_kube_config() 12 crd_api = client.CustomObjectsApi() 13 api_client = crd_api.api_client 14 15 16 def get_parser(): 17 parser = argparse.ArgumentParser( 18 description="Submit Kueue MPI Operator Job Example", 19 formatter_class=argparse.RawTextHelpFormatter, 20 ) 21 parser.add_argument( 22 "--job-name", 23 help="generateName field to set for job (job prefix does not work here)", 24 default="pi", 25 ) 26 parser.add_argument( 27 "--image", 28 help="container image to use", 29 default="mpioperator/mpi-pi:openmpi", 30 ) 31 parser.add_argument( 32 "--command", 33 help="command to run", 34 default="mpirun", 35 ) 36 parser.add_argument( 37 "--args", 38 nargs="+", 39 help="args for container", 40 default=["-n", "2", "/home/mpiuser/pi"], 41 ) 42 return parser 43 44 45 def generate_job_crd(job_name, image, command, args): 46 """ 47 Generate an equivalent job CRD to sample-job.yaml 48 """ 49 metadata = client.V1ObjectMeta( 50 name=job_name, labels={"kueue.x-k8s.io/queue-name": "user-queue"} 51 ) 52 53 # containers for launcher and worker 54 launcher_container = client.V1Container( 55 image=image, 56 name="mpi-launcher", 57 command=[command], 58 args=args, 59 security_context=client.V1SecurityContext(run_as_user=1000), 60 resources={ 61 "limits": { 62 "cpu": 1, 63 "memory": "1Gi", 64 } 65 }, 66 ) 67 68 worker_container = client.V1Container( 69 image=image, 70 name="mpi-worker", 71 command=["/usr/sbin/sshd"], 72 args=["-De", "-f", "/home/mpiuser/.sshd_config"], 73 security_context=client.V1SecurityContext(run_as_user=1000), 74 resources={ 75 "limits": { 76 "cpu": 1, 77 "memory": "1Gi", 78 } 79 }, 80 ) 81 82 # Create the Launcher and worker replica specs 83 launcher = models.V2beta1ReplicaSpec( 84 replicas=1, 85 template=client.V1PodTemplateSpec( 86 spec=client.V1PodSpec(containers=[launcher_container]) 87 ), 88 ) 89 90 worker = models.V2beta1ReplicaSpec( 91 replicas=2, 92 template=client.V1PodTemplateSpec( 93 spec=client.V1PodSpec(containers=[worker_container]) 94 ), 95 ) 96 97 # runPolicy for jobspec 98 policy = models.V2beta1RunPolicy( 99 clean_pod_policy="Running", ttl_seconds_after_finished=60 100 ) 101 102 # Create the jobspec 103 jobspec = models.V2beta1MPIJobSpec( 104 slots_per_worker=1, 105 run_policy=policy, 106 ssh_auth_mount_path="/home/mpiuser/.ssh", 107 mpi_replica_specs={"Launcher": launcher, "Worker": worker}, 108 ) 109 return models.V2beta1MPIJob( 110 metadata=metadata, 111 api_version="kubeflow.org/v2beta1", 112 kind="MPIJob", 113 spec=jobspec, 114 ) 115 116 117 def main(): 118 """ 119 Run an MPI job. This requires the MPI Operator to be installed. 120 """ 121 parser = get_parser() 122 args, _ = parser.parse_known_args() 123 124 # Generate a CRD spec 125 crd = generate_job_crd(args.job_name, args.image, args.command, args.args) 126 crd_api = client.CustomObjectsApi() 127 128 print(f"📦️ Container image selected is {args.image}...") 129 print(f"⭐️ Creating sample job with prefix {args.job_name}...") 130 crd_api.create_namespaced_custom_object( 131 group="kubeflow.org", 132 version="v2beta1", 133 namespace="default", 134 plural="mpijobs", 135 body=crd, 136 ) 137 print( 138 'Use:\n"kubectl get queue" to see queue assignment\n"kubectl get jobs" to see jobs' 139 ) 140 141 142 if __name__ == "__main__": 143 main()