github.com/kubeflow/training-operator@v1.7.0/sdk/python/examples/kubeflow-tfjob-sdk.ipynb (about) 1 { 2 "cells": [ 3 { 4 "cell_type": "markdown", 5 "metadata": { 6 "pycharm": { 7 "name": "#%% md\n" 8 } 9 }, 10 "source": [ 11 "# Sample for Kubeflow TFJob SDK" 12 ] 13 }, 14 { 15 "attachments": {}, 16 "cell_type": "markdown", 17 "metadata": { 18 "pycharm": { 19 "name": "#%% md\n" 20 } 21 }, 22 "source": [ 23 "TODO (andreyvelich): This example should be updated with the new SDK version.\n", 24 "\n", 25 "This is a sample for Kubeflow TFJob SDK `kubeflow-tfjob`.\n", 26 "\n", 27 "The notebook shows how to use Kubeflow TFJob SDK to create, get, wait, check and delete tfjob." 28 ] 29 }, 30 { 31 "cell_type": "code", 32 "execution_count": 1, 33 "metadata": { 34 "pycharm": { 35 "name": "#%%\n" 36 } 37 }, 38 "outputs": [], 39 "source": [ 40 "from kubernetes.client import V1PodTemplateSpec\n", 41 "from kubernetes.client import V1ObjectMeta\n", 42 "from kubernetes.client import V1PodSpec\n", 43 "from kubernetes.client import V1Container\n", 44 "\n", 45 "from kubeflow.training import constants\n", 46 "from kubeflow.training.utils import utils\n", 47 "from kubeflow.training import V1ReplicaSpec\n", 48 "from kubeflow.training import KubeflowOrgV1TFJob\n", 49 "from kubeflow.training import KubeflowOrgV1TFJobSpec\n", 50 "from kubeflow.training import V1RunPolicy\n", 51 "from kubeflow.training import TFJobClient" 52 ] 53 }, 54 { 55 "cell_type": "markdown", 56 "metadata": { 57 "pycharm": { 58 "name": "#%% md\n" 59 } 60 }, 61 "source": [ 62 "Define namespace where tfjob needs to be created to. If not specified, below function defines namespace to the current one where SDK is running in the cluster, otherwise it will deploy to default namespace." 63 ] 64 }, 65 { 66 "cell_type": "code", 67 "execution_count": 2, 68 "metadata": { 69 "pycharm": { 70 "name": "#%%\n" 71 } 72 }, 73 "outputs": [], 74 "source": [ 75 "namespace = utils.get_default_target_namespace()" 76 ] 77 }, 78 { 79 "cell_type": "markdown", 80 "metadata": { 81 "pycharm": { 82 "name": "#%% md\n" 83 } 84 }, 85 "source": [ 86 "### Define TFJob" 87 ] 88 }, 89 { 90 "cell_type": "markdown", 91 "metadata": { 92 "pycharm": { 93 "name": "#%% md\n" 94 } 95 }, 96 "source": [ 97 "The demo only creates a worker of TFJob to run mnist sample." 98 ] 99 }, 100 { 101 "cell_type": "code", 102 "execution_count": 3, 103 "metadata": { 104 "pycharm": { 105 "name": "#%%\n" 106 } 107 }, 108 "outputs": [], 109 "source": [ 110 "container = V1Container(\n", 111 " name=\"tensorflow\",\n", 112 " image=\"gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0\",\n", 113 " command=[\n", 114 " \"python\",\n", 115 " \"/var/tf_mnist/mnist_with_summaries.py\",\n", 116 " \"--log_dir=/train/logs\", \"--learning_rate=0.01\",\n", 117 " \"--batch_size=150\"\n", 118 " ]\n", 119 ")\n", 120 "\n", 121 "worker = V1ReplicaSpec(\n", 122 " replicas=2,\n", 123 " restart_policy=\"Never\",\n", 124 " template=V1PodTemplateSpec(\n", 125 " spec=V1PodSpec(\n", 126 " containers=[container]\n", 127 " )\n", 128 " )\n", 129 ")\n", 130 "\n", 131 "chief = V1ReplicaSpec(\n", 132 " replicas=1,\n", 133 " restart_policy=\"Never\",\n", 134 " template=V1PodTemplateSpec(\n", 135 " spec=V1PodSpec(\n", 136 " containers=[container]\n", 137 " )\n", 138 " )\n", 139 ")\n", 140 "\n", 141 "ps = V1ReplicaSpec(\n", 142 " replicas=1,\n", 143 " restart_policy=\"Never\",\n", 144 " template=V1PodTemplateSpec(\n", 145 " spec=V1PodSpec(\n", 146 " containers=[container]\n", 147 " )\n", 148 " )\n", 149 ")\n", 150 "\n", 151 "tfjob = KubeflowOrgV1TFJob(\n", 152 " api_version=\"kubeflow.org/v1\",\n", 153 " kind=\"TFJob\",\n", 154 " metadata=V1ObjectMeta(name=\"mnist\",namespace=namespace),\n", 155 " spec=KubeflowOrgV1TFJobSpec(\n", 156 " run_policy=V1RunPolicy(clean_pod_policy=\"None\"),\n", 157 " tf_replica_specs={\"Worker\": worker,\n", 158 " \"Chief\": chief,\n", 159 " \"PS\": ps}\n", 160 " )\n", 161 ")" 162 ] 163 }, 164 { 165 "cell_type": "markdown", 166 "metadata": { 167 "pycharm": { 168 "name": "#%% md\n" 169 } 170 }, 171 "source": [ 172 "### Create TFJob" 173 ] 174 }, 175 { 176 "cell_type": "code", 177 "execution_count": 4, 178 "metadata": { 179 "pycharm": { 180 "name": "#%%\n" 181 } 182 }, 183 "outputs": [ 184 { 185 "data": { 186 "text/plain": [ 187 "{'apiVersion': 'kubeflow.org/v1',\n", 188 " 'kind': 'TFJob',\n", 189 " 'metadata': {'creationTimestamp': '2021-10-02T19:02:08Z',\n", 190 " 'generation': 1,\n", 191 " 'managedFields': [{'apiVersion': 'kubeflow.org/v1',\n", 192 " 'fieldsType': 'FieldsV1',\n", 193 " 'fieldsV1': {'f:spec': {'.': {},\n", 194 " 'f:runPolicy': {'.': {}, 'f:cleanPodPolicy': {}},\n", 195 " 'f:tfReplicaSpecs': {'.': {},\n", 196 " 'f:Chief': {'.': {},\n", 197 " 'f:replicas': {},\n", 198 " 'f:restartPolicy': {},\n", 199 " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}},\n", 200 " 'f:PS': {'.': {},\n", 201 " 'f:replicas': {},\n", 202 " 'f:restartPolicy': {},\n", 203 " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}},\n", 204 " 'f:Worker': {'.': {},\n", 205 " 'f:replicas': {},\n", 206 " 'f:restartPolicy': {},\n", 207 " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}}}}},\n", 208 " 'manager': 'OpenAPI-Generator',\n", 209 " 'operation': 'Update',\n", 210 " 'time': '2021-10-02T19:02:08Z'}],\n", 211 " 'name': 'mnist',\n", 212 " 'namespace': 'default',\n", 213 " 'resourceVersion': '6042',\n", 214 " 'uid': '4a0b9764-b5c4-4d30-95c3-d3c56d342803'},\n", 215 " 'spec': {'runPolicy': {'cleanPodPolicy': 'None'},\n", 216 " 'tfReplicaSpecs': {'Chief': {'replicas': 1,\n", 217 " 'restartPolicy': 'Never',\n", 218 " 'template': {'spec': {'containers': [{'command': ['python',\n", 219 " '/var/tf_mnist/mnist_with_summaries.py',\n", 220 " '--log_dir=/train/logs',\n", 221 " '--learning_rate=0.01',\n", 222 " '--batch_size=150'],\n", 223 " 'image': 'gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0',\n", 224 " 'name': 'tensorflow'}]}}},\n", 225 " 'PS': {'replicas': 1,\n", 226 " 'restartPolicy': 'Never',\n", 227 " 'template': {'spec': {'containers': [{'command': ['python',\n", 228 " '/var/tf_mnist/mnist_with_summaries.py',\n", 229 " '--log_dir=/train/logs',\n", 230 " '--learning_rate=0.01',\n", 231 " '--batch_size=150'],\n", 232 " 'image': 'gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0',\n", 233 " 'name': 'tensorflow'}]}}},\n", 234 " 'Worker': {'replicas': 2,\n", 235 " 'restartPolicy': 'Never',\n", 236 " 'template': {'spec': {'containers': [{'command': ['python',\n", 237 " '/var/tf_mnist/mnist_with_summaries.py',\n", 238 " '--log_dir=/train/logs',\n", 239 " '--learning_rate=0.01',\n", 240 " '--batch_size=150'],\n", 241 " 'image': 'gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0',\n", 242 " 'name': 'tensorflow'}]}}}}}}" 243 ] 244 }, 245 "execution_count": 4, 246 "metadata": {}, 247 "output_type": "execute_result" 248 } 249 ], 250 "source": [ 251 "tfjob_client = TFJobClient()\n", 252 "tfjob_client.create(tfjob, namespace=namespace)" 253 ] 254 }, 255 { 256 "cell_type": "markdown", 257 "metadata": { 258 "pycharm": { 259 "name": "#%% md\n" 260 } 261 }, 262 "source": [ 263 "### Get the created TFJob " 264 ] 265 }, 266 { 267 "cell_type": "code", 268 "execution_count": 5, 269 "metadata": { 270 "pycharm": { 271 "name": "#%%\n" 272 } 273 }, 274 "outputs": [ 275 { 276 "data": { 277 "text/plain": [ 278 "{'apiVersion': 'kubeflow.org/v1',\n", 279 " 'kind': 'TFJob',\n", 280 " 'metadata': {'creationTimestamp': '2021-10-02T19:02:08Z',\n", 281 " 'generation': 1,\n", 282 " 'managedFields': [{'apiVersion': 'kubeflow.org/v1',\n", 283 " 'fieldsType': 'FieldsV1',\n", 284 " 'fieldsV1': {'f:spec': {'.': {},\n", 285 " 'f:runPolicy': {'.': {}, 'f:cleanPodPolicy': {}},\n", 286 " 'f:tfReplicaSpecs': {'.': {},\n", 287 " 'f:Chief': {'.': {},\n", 288 " 'f:replicas': {},\n", 289 " 'f:restartPolicy': {},\n", 290 " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}},\n", 291 " 'f:PS': {'.': {},\n", 292 " 'f:replicas': {},\n", 293 " 'f:restartPolicy': {},\n", 294 " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}},\n", 295 " 'f:Worker': {'.': {},\n", 296 " 'f:replicas': {},\n", 297 " 'f:restartPolicy': {},\n", 298 " 'f:template': {'.': {}, 'f:spec': {'.': {}, 'f:containers': {}}}}}}},\n", 299 " 'manager': 'OpenAPI-Generator',\n", 300 " 'operation': 'Update',\n", 301 " 'time': '2021-10-02T19:02:08Z'},\n", 302 " {'apiVersion': 'kubeflow.org/v1',\n", 303 " 'fieldsType': 'FieldsV1',\n", 304 " 'fieldsV1': {'f:status': {'.': {},\n", 305 " 'f:conditions': {},\n", 306 " 'f:replicaStatuses': {'.': {},\n", 307 " 'f:Chief': {'.': {}, 'f:active': {}},\n", 308 " 'f:PS': {'.': {}, 'f:active': {}},\n", 309 " 'f:Worker': {}},\n", 310 " 'f:startTime': {}}},\n", 311 " 'manager': 'manager',\n", 312 " 'operation': 'Update',\n", 313 " 'time': '2021-10-02T19:02:10Z'}],\n", 314 " 'name': 'mnist',\n", 315 " 'namespace': 'default',\n", 316 " 'resourceVersion': '6105',\n", 317 " 'uid': '4a0b9764-b5c4-4d30-95c3-d3c56d342803'},\n", 318 " 'spec': {'runPolicy': {'cleanPodPolicy': 'None'},\n", 319 " 'tfReplicaSpecs': {'Chief': {'replicas': 1,\n", 320 " 'restartPolicy': 'Never',\n", 321 " 'template': {'spec': {'containers': [{'command': ['python',\n", 322 " '/var/tf_mnist/mnist_with_summaries.py',\n", 323 " '--log_dir=/train/logs',\n", 324 " '--learning_rate=0.01',\n", 325 " '--batch_size=150'],\n", 326 " 'image': 'gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0',\n", 327 " 'name': 'tensorflow'}]}}},\n", 328 " 'PS': {'replicas': 1,\n", 329 " 'restartPolicy': 'Never',\n", 330 " 'template': {'spec': {'containers': [{'command': ['python',\n", 331 " '/var/tf_mnist/mnist_with_summaries.py',\n", 332 " '--log_dir=/train/logs',\n", 333 " '--learning_rate=0.01',\n", 334 " '--batch_size=150'],\n", 335 " 'image': 'gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0',\n", 336 " 'name': 'tensorflow'}]}}},\n", 337 " 'Worker': {'replicas': 2,\n", 338 " 'restartPolicy': 'Never',\n", 339 " 'template': {'spec': {'containers': [{'command': ['python',\n", 340 " '/var/tf_mnist/mnist_with_summaries.py',\n", 341 " '--log_dir=/train/logs',\n", 342 " '--learning_rate=0.01',\n", 343 " '--batch_size=150'],\n", 344 " 'image': 'gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0',\n", 345 " 'name': 'tensorflow'}]}}}}},\n", 346 " 'status': {'conditions': [{'lastTransitionTime': '2021-10-02T19:02:08Z',\n", 347 " 'lastUpdateTime': '2021-10-02T19:02:08Z',\n", 348 " 'message': 'TFJob mnist is created.',\n", 349 " 'reason': 'TFJobCreated',\n", 350 " 'status': 'True',\n", 351 " 'type': 'Created'},\n", 352 " {'lastTransitionTime': '2021-10-02T19:02:10Z',\n", 353 " 'lastUpdateTime': '2021-10-02T19:02:10Z',\n", 354 " 'message': 'TFJob default/mnist is running.',\n", 355 " 'reason': 'TFJobRunning',\n", 356 " 'status': 'True',\n", 357 " 'type': 'Running'}],\n", 358 " 'replicaStatuses': {'Chief': {'active': 1},\n", 359 " 'PS': {'active': 1},\n", 360 " 'Worker': {}},\n", 361 " 'startTime': '2021-10-02T19:02:09Z'}}" 362 ] 363 }, 364 "execution_count": 5, 365 "metadata": {}, 366 "output_type": "execute_result" 367 } 368 ], 369 "source": [ 370 "tfjob_client.get('mnist', namespace=namespace)" 371 ] 372 }, 373 { 374 "cell_type": "markdown", 375 "metadata": { 376 "pycharm": { 377 "name": "#%% md\n" 378 } 379 }, 380 "source": [ 381 "### Get the TFJob status, check if the TFJob has been started." 382 ] 383 }, 384 { 385 "cell_type": "code", 386 "execution_count": 6, 387 "metadata": { 388 "pycharm": { 389 "name": "#%%\n" 390 } 391 }, 392 "outputs": [ 393 { 394 "data": { 395 "text/plain": [ 396 "'Running'" 397 ] 398 }, 399 "execution_count": 6, 400 "metadata": {}, 401 "output_type": "execute_result" 402 } 403 ], 404 "source": [ 405 "tfjob_client.get_job_status('mnist', namespace=namespace)" 406 ] 407 }, 408 { 409 "cell_type": "markdown", 410 "metadata": { 411 "pycharm": { 412 "name": "#%% md\n" 413 } 414 }, 415 "source": [ 416 "### Wait for the specified job to finish" 417 ] 418 }, 419 { 420 "cell_type": "code", 421 "execution_count": 7, 422 "metadata": { 423 "pycharm": { 424 "name": "#%%\n" 425 } 426 }, 427 "outputs": [ 428 { 429 "name": "stdout", 430 "output_type": "stream", 431 "text": [ 432 "NAME STATE TIME \n", 433 "mnist Running 2021-10-02T19:02:10Z \n", 434 "mnist Running 2021-10-02T19:02:10Z \n", 435 "mnist Running 2021-10-02T19:02:10Z \n", 436 "mnist Succeeded 2021-10-02T19:04:10Z \n" 437 ] 438 } 439 ], 440 "source": [ 441 "tfjob_client.wait_for_job('mnist', namespace=namespace, watch=True)" 442 ] 443 }, 444 { 445 "cell_type": "markdown", 446 "metadata": { 447 "pycharm": { 448 "name": "#%% md\n" 449 } 450 }, 451 "source": [ 452 "### Check if the TFJob succeeded" 453 ] 454 }, 455 { 456 "cell_type": "code", 457 "execution_count": 8, 458 "metadata": { 459 "pycharm": { 460 "name": "#%%\n" 461 }, 462 "scrolled": true 463 }, 464 "outputs": [ 465 { 466 "data": { 467 "text/plain": [ 468 "True" 469 ] 470 }, 471 "execution_count": 8, 472 "metadata": {}, 473 "output_type": "execute_result" 474 } 475 ], 476 "source": [ 477 "tfjob_client.is_job_succeeded('mnist', namespace=namespace)" 478 ] 479 }, 480 { 481 "cell_type": "markdown", 482 "metadata": { 483 "pycharm": { 484 "name": "#%% md\n" 485 } 486 }, 487 "source": [ 488 "### Get the TFJob training logs." 489 ] 490 }, 491 { 492 "cell_type": "code", 493 "execution_count": 9, 494 "metadata": { 495 "pycharm": { 496 "name": "#%%\n" 497 } 498 }, 499 "outputs": [ 500 { 501 "name": "stderr", 502 "output_type": "stream", 503 "text": [ 504 "The logs of Pod mnist-chief-0:\n", 505 " WARNING:tensorflow:From /var/tf_mnist/mnist_with_summaries.py:39: read_data_sets (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n", 506 "Instructions for updating:\n", 507 "Please use alternatives such as official/mnist/dataset.py from tensorflow/models.\n", 508 "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:260: maybe_download (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.\n", 509 "Instructions for updating:\n", 510 "Please write your own downloading logic.\n", 511 "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/datasets/base.py:252: wrapped_fn (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.\n", 512 "Instructions for updating:\n", 513 "Please use urllib or similar directly.\n", 514 "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:262: extract_images (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n", 515 "Instructions for updating:\n", 516 "Please use tf.data to implement this functionality.\n", 517 "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:267: extract_labels (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n", 518 "Instructions for updating:\n", 519 "Please use tf.data to implement this functionality.\n", 520 "WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:290: __init__ (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n", 521 "Instructions for updating:\n", 522 "Please use alternatives such as official/mnist/dataset.py from tensorflow/models.\n", 523 "2021-10-02 19:02:25.434889: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\n", 524 "Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.\n", 525 "Extracting /tmp/tensorflow/mnist/input_data/train-images-idx3-ubyte.gz\n", 526 "Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.\n", 527 "Extracting /tmp/tensorflow/mnist/input_data/train-labels-idx1-ubyte.gz\n", 528 "Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.\n", 529 "Extracting /tmp/tensorflow/mnist/input_data/t10k-images-idx3-ubyte.gz\n", 530 "Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.\n", 531 "Extracting /tmp/tensorflow/mnist/input_data/t10k-labels-idx1-ubyte.gz\n", 532 "Accuracy at step 0: 0.1348\n", 533 "Accuracy at step 10: 0.787\n", 534 "Accuracy at step 20: 0.8648\n", 535 "Accuracy at step 30: 0.9056\n", 536 "Accuracy at step 40: 0.9162\n", 537 "Accuracy at step 50: 0.9237\n", 538 "Accuracy at step 60: 0.926\n", 539 "Accuracy at step 70: 0.9365\n", 540 "Accuracy at step 80: 0.9371\n", 541 "Accuracy at step 90: 0.9352\n", 542 "Adding run metadata for 99\n", 543 "Accuracy at step 100: 0.9439\n", 544 "Accuracy at step 110: 0.9434\n", 545 "Accuracy at step 120: 0.9382\n", 546 "Accuracy at step 130: 0.9444\n", 547 "Accuracy at step 140: 0.9487\n", 548 "Accuracy at step 150: 0.9462\n", 549 "Accuracy at step 160: 0.9454\n", 550 "Accuracy at step 170: 0.9426\n", 551 "Accuracy at step 180: 0.9473\n", 552 "Accuracy at step 190: 0.9536\n", 553 "Adding run metadata for 199\n", 554 "Accuracy at step 200: 0.9559\n", 555 "Accuracy at step 210: 0.9519\n", 556 "Accuracy at step 220: 0.9485\n", 557 "Accuracy at step 230: 0.95\n", 558 "Accuracy at step 240: 0.9563\n", 559 "Accuracy at step 250: 0.9575\n", 560 "Accuracy at step 260: 0.9591\n", 561 "Accuracy at step 270: 0.9589\n", 562 "Accuracy at step 280: 0.957\n", 563 "Accuracy at step 290: 0.9581\n", 564 "Adding run metadata for 299\n", 565 "Accuracy at step 300: 0.9606\n", 566 "Accuracy at step 310: 0.9585\n", 567 "Accuracy at step 320: 0.9593\n", 568 "Accuracy at step 330: 0.958\n", 569 "Accuracy at step 340: 0.9537\n", 570 "Accuracy at step 350: 0.961\n", 571 "Accuracy at step 360: 0.9615\n", 572 "Accuracy at step 370: 0.962\n", 573 "Accuracy at step 380: 0.956\n", 574 "Accuracy at step 390: 0.9591\n", 575 "Adding run metadata for 399\n", 576 "Accuracy at step 400: 0.9554\n", 577 "Accuracy at step 410: 0.9604\n", 578 "Accuracy at step 420: 0.9638\n", 579 "Accuracy at step 430: 0.9614\n", 580 "Accuracy at step 440: 0.9645\n", 581 "Accuracy at step 450: 0.9683\n", 582 "Accuracy at step 460: 0.9591\n", 583 "Accuracy at step 470: 0.9645\n", 584 "Accuracy at step 480: 0.9557\n", 585 "Accuracy at step 490: 0.9647\n", 586 "Adding run metadata for 499\n", 587 "Accuracy at step 500: 0.9611\n", 588 "Accuracy at step 510: 0.9623\n", 589 "Accuracy at step 520: 0.9606\n", 590 "Accuracy at step 530: 0.9661\n", 591 "Accuracy at step 540: 0.9684\n", 592 "Accuracy at step 550: 0.9629\n", 593 "Accuracy at step 560: 0.9605\n", 594 "Accuracy at step 570: 0.9672\n", 595 "Accuracy at step 580: 0.9712\n", 596 "Accuracy at step 590: 0.9649\n", 597 "Adding run metadata for 599\n", 598 "Accuracy at step 600: 0.9679\n", 599 "Accuracy at step 610: 0.9689\n", 600 "Accuracy at step 620: 0.9664\n", 601 "Accuracy at step 630: 0.9667\n", 602 "Accuracy at step 640: 0.9644\n", 603 "Accuracy at step 650: 0.9721\n", 604 "Accuracy at step 660: 0.965\n", 605 "Accuracy at step 670: 0.9646\n", 606 "Accuracy at step 680: 0.9661\n", 607 "Accuracy at step 690: 0.9623\n", 608 "Adding run metadata for 699\n", 609 "Accuracy at step 700: 0.9581\n", 610 "Accuracy at step 710: 0.9649\n", 611 "Accuracy at step 720: 0.9633\n", 612 "Accuracy at step 730: 0.9659\n", 613 "Accuracy at step 740: 0.9607\n", 614 "Accuracy at step 750: 0.9676\n", 615 "Accuracy at step 760: 0.9697\n", 616 "Accuracy at step 770: 0.9662\n", 617 "Accuracy at step 780: 0.9659\n", 618 "Accuracy at step 790: 0.9633\n", 619 "Adding run metadata for 799\n", 620 "Accuracy at step 800: 0.9638\n", 621 "Accuracy at step 810: 0.9592\n", 622 "Accuracy at step 820: 0.9642\n", 623 "Accuracy at step 830: 0.9682\n", 624 "Accuracy at step 840: 0.9695\n", 625 "Accuracy at step 850: 0.9657\n", 626 "Accuracy at step 860: 0.9696\n", 627 "Accuracy at step 870: 0.9695\n", 628 "Accuracy at step 880: 0.9711\n", 629 "Accuracy at step 890: 0.9687\n", 630 "Adding run metadata for 899\n", 631 "Accuracy at step 900: 0.9689\n", 632 "Accuracy at step 910: 0.9699\n", 633 "Accuracy at step 920: 0.9677\n", 634 "Accuracy at step 930: 0.9689\n", 635 "Accuracy at step 940: 0.9702\n", 636 "Accuracy at step 950: 0.9716\n", 637 "Accuracy at step 960: 0.9692\n", 638 "Accuracy at step 970: 0.967\n", 639 "Accuracy at step 980: 0.9687\n", 640 "Accuracy at step 990: 0.9665\n", 641 "Adding run metadata for 999\n", 642 "\n" 643 ] 644 } 645 ], 646 "source": [ 647 "tfjob_client.get_logs('mnist', namespace=namespace)" 648 ] 649 }, 650 { 651 "cell_type": "markdown", 652 "metadata": { 653 "pycharm": { 654 "name": "#%% md\n" 655 } 656 }, 657 "source": [ 658 "### Delete the TFJob" 659 ] 660 }, 661 { 662 "cell_type": "code", 663 "execution_count": 10, 664 "metadata": { 665 "pycharm": { 666 "name": "#%%\n" 667 } 668 }, 669 "outputs": [ 670 { 671 "data": { 672 "text/plain": [ 673 "{'kind': 'Status',\n", 674 " 'apiVersion': 'v1',\n", 675 " 'metadata': {},\n", 676 " 'status': 'Success',\n", 677 " 'details': {'name': 'mnist',\n", 678 " 'group': 'kubeflow.org',\n", 679 " 'kind': 'tfjobs',\n", 680 " 'uid': '4a0b9764-b5c4-4d30-95c3-d3c56d342803'}}" 681 ] 682 }, 683 "execution_count": 10, 684 "metadata": {}, 685 "output_type": "execute_result" 686 } 687 ], 688 "source": [ 689 "tfjob_client.delete('mnist', namespace=namespace)" 690 ] 691 } 692 ], 693 "metadata": { 694 "kernelspec": { 695 "display_name": "Python 3", 696 "language": "python", 697 "name": "python3" 698 }, 699 "language_info": { 700 "codemirror_mode": { 701 "name": "ipython", 702 "version": 3 703 }, 704 "file_extension": ".py", 705 "mimetype": "text/x-python", 706 "name": "python", 707 "nbconvert_exporter": "python", 708 "pygments_lexer": "ipython3", 709 "version": "3.7.3" 710 } 711 }, 712 "nbformat": 4, 713 "nbformat_minor": 4 714 }