sigs.k8s.io/kueue@v0.6.2/site/static/examples/jobs/sample-mxjob.yaml (about)

     1  apiVersion: kubeflow.org/v1
     2  kind: MXJob
     3  metadata:
     4    name: mxnet-job
     5    labels:
     6      kueue.x-k8s.io/queue-name: user-queue
     7  spec:
     8    jobMode: MXTrain
     9    mxReplicaSpecs:
    10      Scheduler:
    11        replicas: 1
    12        restartPolicy: Never
    13        template:
    14          spec:
    15            containers:
    16              - name: mxnet
    17                image: kubeflow/mxnet-gpu:latest
    18                resources:
    19                  limits:
    20                    cpu: 100m
    21                    memory: 0.2Gi
    22                ports:
    23                  - containerPort: 9991
    24                    name: mxjob-port
    25      Server:
    26        replicas: 1
    27        restartPolicy: Never
    28        template:
    29          spec:
    30            containers:
    31              - name: mxnet
    32                image: kubeflow/mxnet-gpu:latest
    33                resources:
    34                  limits:
    35                    cpu: 100m
    36                    memory: 0.2Gi
    37                ports:
    38                  - containerPort: 9991
    39                    name: mxjob-port
    40      Worker:
    41        replicas: 1
    42        restartPolicy: Never
    43        template:
    44          spec:
    45            containers:
    46              - name: mxnet
    47                image: kubeflow/mxnet-gpu:latest
    48                command:
    49                - python3
    50                args:
    51                - /mxnet/mxnet/example/image-classification/train_mnist.py
    52                - --num-epochs=1
    53                - --num-layers=2
    54                - --kv-store=dist_device_sync
    55                resources:
    56                  limits:
    57                    cpu: 2
    58                    memory: 1Gi
    59                ports:
    60                  - containerPort: 9991
    61                    name: mxjob-port