github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/yaml/main.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  import argparse
    19  
    20  import yaml
    21  
    22  import apache_beam as beam
    23  from apache_beam.yaml import yaml_transform
    24  
    25  
    26  def run(argv=None):
    27    parser = argparse.ArgumentParser()
    28    parser.add_argument(
    29        '--pipeline_spec',
    30        description='A yaml description of the pipeline to run.')
    31    parser.add_argument(
    32        '--pipeline_spec_file',
    33        description='A file containing a yaml description of the pipeline to run.'
    34    )
    35    known_args, pipeline_args = parser.parse_known_args(argv)
    36  
    37    if known_args.pipeline_spec_file and known_args.pipeline_spec:
    38      raise ValueError(
    39          "Exactly one of pipeline_spec or pipeline_spec_file must be set.")
    40    elif known_args.pipeline_spec_file:
    41      with open(known_args.pipeline_spec_file) as fin:
    42        pipeline_yaml = fin.read()
    43    elif known_args.pipeline_spec:
    44      pipeline_yaml = known_args.pipeline_spec
    45    else:
    46      raise ValueError(
    47          "Exactly one of pipeline_spec or pipeline_spec_file must be set.")
    48  
    49    pipeline_spec = yaml.load(pipeline_yaml, Loader=yaml_transform.SafeLineLoader)
    50  
    51    yaml_transform._LOGGER.setLevel('INFO')
    52  
    53    with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
    54        pipeline_args,
    55        pickle_library='cloudpickle',
    56        **pipeline_spec.get('options', {}))) as p:
    57      print("Building pipeline...")
    58      yaml_transform.expand_pipeline(p, known_args.pipeline_spec)
    59      print("Running pipeline...")
    60  
    61  
    62  if __name__ == '__main__':
    63    run()