github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/integration/airflow/bacalhau_airflow/hooks.py (about) 1 """ 2 Airflow hook to interact with the Bacalhau service. 3 """ 4 from bacalhau_sdk.api import events, results, submit 5 from bacalhau_sdk.config import get_client_id 6 7 from airflow.hooks.base import BaseHook 8 9 10 class BacalhauHook(BaseHook): 11 """Hook to interact with the Bacalhau service.""" 12 13 def __init__(self, **kwargs) -> None: 14 """ 15 Initialize the hook. 16 17 Args: 18 kwargs: Additional keyword arguments. 19 """ 20 super().__init__(**kwargs) 21 self.client_id = get_client_id() 22 23 def submit_job(self, api_version: str, job_spec: dict) -> str: 24 """Submit a job to the Bacalhau service. 25 26 Args: 27 api_version (str): The API version to use. Example: "V1beta1". 28 job_spec (dict): A dictionary with the job specification. See example dags for more details. 29 30 Returns: 31 str: The job ID. Example: "3b39baee-5714-4f17-aa71-1f5824665ad6". 32 """ 33 34 response = submit( 35 dict( 36 apiversion=api_version, 37 clientid=self.client_id, 38 spec=job_spec, 39 ) 40 ) 41 # TODO check if response is not empty 42 return str(response.job.metadata.id) 43 44 def get_results(self, job_id: str) -> list: 45 """Get the data generated from a job. The data becomes available only after the job is finished. 46 47 Args: 48 job_id (str): The job ID to get the results from. Example: "3b39baee-5714-4f17-aa71-1f5824665ad6". 49 50 Returns: 51 list: A list of dictionaries with the results, one entry per node & shard pair. A nested field contains a CID pointer to the result data. 52 """ 53 response = results(job_id) 54 # TODO check if response is not empty 55 return response.to_dict()["results"] 56 57 def get_events(self, job_id: str) -> dict: 58 """Get the events of a job. This is useful to check its status. 59 60 Args: 61 job_id (str): The job ID to get the events from. Example: "3b39baee-5714-4f17-aa71-1f5824665ad6". 62 63 Returns: 64 dict: List of dictionaries with the events 65 """ 66 response = events(job_id) 67 # TODO check if response is not empty 68 return response.to_dict()