github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/ml-orchestration/kfp/pipeline.py (about) 1 # Licensed to the Apache Software Foundation (ASF) under one or more 2 # contributor license agreements. See the NOTICE file distributed with 3 # this work for additional information regarding copyright ownership. 4 # The ASF licenses this file to You under the Apache License, Version 2.0 5 # (the "License"); you may not use this file except in compliance with 6 # the License. You may obtain a copy of the License at 7 # 8 # http://www.apache.org/licenses/LICENSE-2.0 9 # 10 # Unless required by applicable law or agreed to in writing, software 11 # distributed under the License is distributed on an "AS IS" BASIS, 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 # See the License for the specific language governing permissions and 14 # limitations under the License. 15 16 import argparse 17 18 import kfp 19 from kfp import components as comp 20 from kfp.v2 import dsl 21 from kfp.v2.compiler import Compiler 22 23 24 def parse_args(): 25 """Parse arguments.""" 26 parser = argparse.ArgumentParser() 27 parser.add_argument( 28 "--gcp-project-id", 29 type=str, 30 help="ID for the google cloud project to deploy the pipeline to.", 31 required=True) 32 parser.add_argument( 33 "--region", 34 type=str, 35 help="Region in which to deploy the pipeline.", 36 required=True) 37 parser.add_argument( 38 "--pipeline-root", 39 type=str, 40 help= 41 "Path to artifact repository where Kubeflow Pipelines stores a pipeline’s artifacts.", 42 required=True) 43 parser.add_argument( 44 "--component-artifact-root", 45 type=str, 46 help= 47 "Path to artifact repository where Kubeflow Pipelines components can store artifacts.", 48 required=True) 49 parser.add_argument( 50 "--dataflow-staging-root", 51 type=str, 52 help="Path to staging directory for dataflow.", 53 required=True) 54 parser.add_argument( 55 "--beam-runner", 56 type=str, 57 help="Beam runner: DataflowRunner or DirectRunner.", 58 default="DirectRunner") 59 return parser.parse_args() 60 61 62 # arguments are parsed as a global variable so 63 # they can be used in the pipeline decorator below 64 ARGS = parse_args() 65 PIPELINE_ROOT = vars(ARGS)['pipeline_root'] 66 67 # [START load_kfp_components] 68 # load the kfp components from their yaml files 69 DataIngestOp = comp.load_component('components/ingestion/component.yaml') 70 DataPreprocessingOp = comp.load_component( 71 'components/preprocessing/component.yaml') 72 TrainModelOp = comp.load_component('components/train/component.yaml') 73 # [END load_kfp_components] 74 75 76 # [START define_kfp_pipeline] 77 @dsl.pipeline( 78 pipeline_root=PIPELINE_ROOT, 79 name="beam-preprocessing-kfp-example", 80 description="Pipeline to show an apache beam preprocessing example in KFP") 81 def pipeline( 82 gcp_project_id: str, 83 region: str, 84 component_artifact_root: str, 85 dataflow_staging_root: str, 86 beam_runner: str): 87 """KFP pipeline definition. 88 89 Args: 90 gcp_project_id (str): ID for the google cloud project to deploy the pipeline to. 91 region (str): Region in which to deploy the pipeline. 92 component_artifact_root (str): Path to artifact repository where Kubeflow Pipelines 93 components can store artifacts. 94 dataflow_staging_root (str): Path to staging directory for the dataflow runner. 95 beam_runner (str): Beam runner: DataflowRunner or DirectRunner. 96 """ 97 98 ingest_data_task = DataIngestOp(base_artifact_path=component_artifact_root) 99 100 data_preprocessing_task = DataPreprocessingOp( 101 ingested_dataset_path=ingest_data_task.outputs["ingested_dataset_path"], 102 base_artifact_path=component_artifact_root, 103 gcp_project_id=gcp_project_id, 104 region=region, 105 dataflow_staging_root=dataflow_staging_root, 106 beam_runner=beam_runner) 107 108 train_model_task = TrainModelOp( 109 preprocessed_dataset_path=data_preprocessing_task. 110 outputs["preprocessed_dataset_path"], 111 base_artifact_path=component_artifact_root) 112 113 114 # [END define_kfp_pipeline] 115 116 if __name__ == "__main__": 117 # [START compile_kfp_pipeline] 118 Compiler().compile(pipeline_func=pipeline, package_path="pipeline.json") 119 # [END compile_kfp_pipeline] 120 121 run_arguments = vars(ARGS) 122 del run_arguments['pipeline_root'] 123 124 # [START execute_kfp_pipeline] 125 client = kfp.Client() 126 experiment = client.create_experiment("KFP orchestration example") 127 run_result = client.run_pipeline( 128 experiment_id=experiment.id, 129 job_name="KFP orchestration job", 130 pipeline_package_path="pipeline.json", 131 params=run_arguments) 132 # [END execute_kfp_pipeline]