github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/python/examples/sdk/multi-object-operations.ipynb (about)

     1  {
     2   "cells": [
     3    {
     4     "attachments": {},
     5     "cell_type": "markdown",
     6     "metadata": {
     7      "collapsed": false
     8     },
     9     "source": [
    10      "## Working with multiple objects in the Python SDK\n",
    11      "AIS supports multi-object operations on groups of objects. An `ObjectGroup` can be created with one of:\n",
    12      "- a list of object names\n",
    13      "- an [ObjectRange](https://github.com/NVIDIA/aistore/blob/main/python/aistore/sdk/multiobj/object_range.py)\n",
    14      "- a string template."
    15     ]
    16    },
    17    {
    18     "cell_type": "code",
    19     "execution_count": null,
    20     "metadata": {},
    21     "outputs": [],
    22     "source": [
    23      "pip install aistore"
    24     ]
    25    },
    26    {
    27     "cell_type": "markdown",
    28     "metadata": {
    29      "collapsed": false
    30     },
    31     "source": [
    32      "### Set up the client and create necessary buckets"
    33     ]
    34    },
    35    {
    36     "cell_type": "code",
    37     "execution_count": null,
    38     "metadata": {
    39      "collapsed": false
    40     },
    41     "outputs": [],
    42     "source": [
    43      "from aistore import Client\n",
    44      "from aistore.sdk.errors import AISError\n",
    45      "import os\n",
    46      "\n",
    47      "ais_url = os.getenv(\"AIS_ENDPOINT\", \"http://localhost:8080\")\n",
    48      "client = Client(ais_url)\n",
    49      "bucket = client.bucket(\"my-bck\").create(exist_ok=True)\n",
    50      "copy_dest_bucket = client.bucket(\"copy-destination-bucket\").create(exist_ok=True)\n",
    51      "transform_dest_bucket = client.bucket(\"transform-destination-bucket\").create(\n",
    52      "    exist_ok=True\n",
    53      ")"
    54     ]
    55    },
    56    {
    57     "cell_type": "markdown",
    58     "metadata": {
    59      "collapsed": false
    60     },
    61     "source": [
    62      "### Create some objects in the bucket"
    63     ]
    64    },
    65    {
    66     "cell_type": "code",
    67     "execution_count": null,
    68     "metadata": {
    69      "collapsed": false
    70     },
    71     "outputs": [],
    72     "source": [
    73      "object_names = [f\"example_obj_{i}\" for i in range(10)]\n",
    74      "for name in object_names:\n",
    75      "    bucket.object(name).put_content(\"object content\".encode(\"utf-8\"))"
    76     ]
    77    },
    78    {
    79     "cell_type": "markdown",
    80     "metadata": {
    81      "collapsed": false
    82     },
    83     "source": [
    84      "### Create Object Group by list of names"
    85     ]
    86    },
    87    {
    88     "cell_type": "code",
    89     "execution_count": null,
    90     "metadata": {
    91      "collapsed": false
    92     },
    93     "outputs": [],
    94     "source": [
    95      "my_objects = bucket.objects(obj_names=object_names)"
    96     ]
    97    },
    98    {
    99     "cell_type": "markdown",
   100     "metadata": {
   101      "collapsed": false
   102     },
   103     "source": [
   104      "### Create Object Group by ObjectRange"
   105     ]
   106    },
   107    {
   108     "cell_type": "code",
   109     "execution_count": null,
   110     "metadata": {
   111      "collapsed": false
   112     },
   113     "outputs": [],
   114     "source": [
   115      "from aistore.sdk.multiobj import ObjectRange\n",
   116      "\n",
   117      "my_object_range = ObjectRange(prefix=\"example_obj_\", min_index=1, max_index=3)\n",
   118      "my_objects = bucket.objects(obj_range=my_object_range)"
   119     ]
   120    },
   121    {
   122     "cell_type": "markdown",
   123     "metadata": {
   124      "collapsed": false
   125     },
   126     "source": [
   127      "### Create Object Group by Template String\n",
   128      "String templates can be passed directly to AIS following the [syntax described here](https://github.com/NVIDIA/aistore/blob/master/docs/batch.md#operations-on-multiple-selected-objects)"
   129     ]
   130    },
   131    {
   132     "cell_type": "code",
   133     "execution_count": null,
   134     "metadata": {
   135      "collapsed": false
   136     },
   137     "outputs": [],
   138     "source": [
   139      "# Equivalent to the range above\n",
   140      "my_object_template = \"example_obj_{1..3}\"\n",
   141      "my_objects = bucket.objects(obj_template=my_object_template)\n",
   142      "# More advanced template example with multiple ranges and defined steps\n",
   143      "complex_range = \"example_obj_{0..10..2}_details_{1..9..2}.file-extension\""
   144     ]
   145    },
   146    {
   147     "cell_type": "markdown",
   148     "metadata": {
   149      "collapsed": false
   150     },
   151     "source": [
   152      "### Prefetch or evict multiple objects when using a bucket with a cloud backend"
   153     ]
   154    },
   155    {
   156     "cell_type": "code",
   157     "execution_count": null,
   158     "metadata": {
   159      "collapsed": false
   160     },
   161     "outputs": [],
   162     "source": [
   163      "my_objects.prefetch()\n",
   164      "my_objects.evict()"
   165     ]
   166    },
   167    {
   168     "cell_type": "markdown",
   169     "metadata": {
   170      "collapsed": false
   171     },
   172     "source": [
   173      "### Copy multiple objects\n",
   174      "\n",
   175      "Copies selected objects directly to the new bucket"
   176     ]
   177    },
   178    {
   179     "cell_type": "code",
   180     "execution_count": null,
   181     "metadata": {
   182      "collapsed": false
   183     },
   184     "outputs": [],
   185     "source": [
   186      "copy_job = my_objects.copy(to_bck=copy_dest_bucket)\n",
   187      "# The job will reach an idle state before finishing, so wait for idle\n",
   188      "client.job(job_id=copy_job).wait_for_idle()\n",
   189      "# See the objects in the destination bucket\n",
   190      "copy_dest_bucket.list_all_objects()"
   191     ]
   192    },
   193    {
   194     "cell_type": "markdown",
   195     "metadata": {
   196      "collapsed": false
   197     },
   198     "source": [
   199      "### Delete multiple objects from the destination bucket above"
   200     ]
   201    },
   202    {
   203     "cell_type": "code",
   204     "execution_count": null,
   205     "metadata": {
   206      "collapsed": false
   207     },
   208     "outputs": [],
   209     "source": [
   210      "all_objects = copy_dest_bucket.list_all_objects()\n",
   211      "# Creates a group including all objects from the destination bucket\n",
   212      "objects_to_delete = copy_dest_bucket.objects(\n",
   213      "    obj_names=[entry.name for entry in all_objects]\n",
   214      ")\n",
   215      "delete_job_id = objects_to_delete.delete()\n",
   216      "client.job(delete_job_id).wait()\n",
   217      "after_deletion = copy_dest_bucket.list_all_objects()\n",
   218      "print(\n",
   219      "    f\"Objects before deletion: {len(all_objects)}, objects after deletion: {len(after_deletion)}\"\n",
   220      ")"
   221     ]
   222    },
   223    {
   224     "cell_type": "markdown",
   225     "metadata": {},
   226     "source": [
   227      "#### Transform -- Provide an ETL to be performed on each object so the result appears in the destination bucket.\n",
   228      "\n",
   229      "Note: This step requires the AIS cluster to be running in Kubernetes; see [getting_started](https://github.com/NVIDIA/aistore/blob/master/docs/getting_started.md#kubernetes-playground) for setup info."
   230     ]
   231    },
   232    {
   233     "cell_type": "code",
   234     "execution_count": null,
   235     "metadata": {},
   236     "outputs": [],
   237     "source": [
   238      "# First create an ETL\n",
   239      "# This is a simple example transform that reverses each object's contents (assuming utf-8 encoded text)\n",
   240      "def transform(input_bytes):\n",
   241      "    reversed_in_str = input_bytes.decode(\"utf-8\")[::-1]\n",
   242      "    return reversed_in_str.encode()\n",
   243      "\n",
   244      "\n",
   245      "etl_name = \"multiobj-transform-example\"\n",
   246      "try:\n",
   247      "    client.etl(etl_name=etl_name).init_code(transform=transform)\n",
   248      "except AISError as err:\n",
   249      "    print(err)\n",
   250      "\n",
   251      "# Now run the transform with the etl name specified\n",
   252      "transform_job = my_objects.transform(etl_name=etl_name, to_bck=transform_dest_bucket)\n",
   253      "client.job(job_id=transform_job).wait_for_idle()\n",
   254      "\n",
   255      "# The output will be in the destination bucket\n",
   256      "transformed_objs = transform_dest_bucket.list_all_objects()\n",
   257      "\n",
   258      "# See the result\n",
   259      "for entry in transformed_objs:\n",
   260      "    input_data = bucket.object(entry.name).get().read_all()\n",
   261      "    output_data = transform_dest_bucket.object(entry.name).get().read_all()\n",
   262      "    print(f\"Object {entry.name} {input_data} => {output_data}\")"
   263     ]
   264    },
   265    {
   266     "cell_type": "markdown",
   267     "metadata": {
   268      "collapsed": false
   269     },
   270     "source": [
   271      "### Cleanup buckets"
   272     ]
   273    },
   274    {
   275     "cell_type": "code",
   276     "execution_count": null,
   277     "metadata": {
   278      "collapsed": false
   279     },
   280     "outputs": [],
   281     "source": [
   282      "for bck in [bucket, copy_dest_bucket, transform_dest_bucket]:\n",
   283      "    bck.delete(missing_ok=True)"
   284     ]
   285    }
   286   ],
   287   "metadata": {
   288    "kernelspec": {
   289     "display_name": "Python 3",
   290     "language": "python",
   291     "name": "python3"
   292    },
   293    "language_info": {
   294     "codemirror_mode": {
   295      "name": "ipython",
   296      "version": 3
   297     },
   298     "file_extension": ".py",
   299     "mimetype": "text/x-python",
   300     "name": "python",
   301     "nbconvert_exporter": "python",
   302     "pygments_lexer": "ipython3",
   303     "version": "3.11.1 (main, Dec  7 2022, 01:11:34) [GCC 11.3.0]"
   304    },
   305    "orig_nbformat": 4,
   306    "vscode": {
   307     "interpreter": {
   308      "hash": "ead1b95f633dc9c51826328e1846203f51a198c6fb5f2884a80417ba131d4e82"
   309     }
   310    }
   311   },
   312   "nbformat": 4,
   313   "nbformat_minor": 2
   314  }