github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/integration/airflow/bacalhau_airflow/hooks.py (about)

     1  """
     2  Airflow hook to interact with the Bacalhau service.
     3  """
     4  from bacalhau_sdk.api import events, results, submit
     5  from bacalhau_sdk.config import get_client_id
     6  
     7  from airflow.hooks.base import BaseHook
     8  
     9  
    10  class BacalhauHook(BaseHook):
    11      """Hook to interact with the Bacalhau service."""
    12  
    13      def __init__(self, **kwargs) -> None:
    14          """
    15          Initialize the hook.
    16  
    17          Args:
    18              kwargs: Additional keyword arguments.
    19          """
    20          super().__init__(**kwargs)
    21          self.client_id = get_client_id()
    22  
    23      def submit_job(self, api_version: str, job_spec: dict) -> str:
    24          """Submit a job to the Bacalhau service.
    25  
    26          Args:
    27              api_version (str): The API version to use. Example: "V1beta1".
    28              job_spec (dict): A dictionary with the job specification. See example dags for more details.
    29  
    30          Returns:
    31              str: The job ID. Example: "3b39baee-5714-4f17-aa71-1f5824665ad6".
    32          """
    33  
    34          response = submit(
    35              dict(
    36                  apiversion=api_version,
    37                  clientid=self.client_id,
    38                  spec=job_spec,
    39              )
    40          )
    41          # TODO check if response is not empty
    42          return str(response.job.metadata.id)
    43  
    44      def get_results(self, job_id: str) -> list:
    45          """Get the data generated from a job. The data becomes available only after the job is finished.
    46  
    47          Args:
    48              job_id (str): The job ID to get the results from. Example: "3b39baee-5714-4f17-aa71-1f5824665ad6".
    49  
    50          Returns:
    51              list: A list of dictionaries with the results, one entry per node & shard pair. A nested field contains a CID pointer to the result data.
    52          """
    53          response = results(job_id)
    54          # TODO check if response is not empty
    55          return response.to_dict()["results"]
    56  
    57      def get_events(self, job_id: str) -> dict:
    58          """Get the events of a job. This is useful to check its status.
    59  
    60          Args:
    61              job_id (str): The job ID to get the events from. Example: "3b39baee-5714-4f17-aa71-1f5824665ad6".
    62  
    63          Returns:
    64              dict: List of dictionaries with the events
    65          """
    66          response = events(job_id)
    67          # TODO check if response is not empty
    68          return response.to_dict()