github.com/apache/beam/sdks/v2@v2.48.2/python/container/Dockerfile (about)

     1  ###############################################################################
     2  #  Licensed to the Apache Software Foundation (ASF) under one
     3  #  or more contributor license agreements.  See the NOTICE file
     4  #  distributed with this work for additional information
     5  #  regarding copyright ownership.  The ASF licenses this file
     6  #  to you under the Apache License, Version 2.0 (the
     7  #  "License"); you may not use this file except in compliance
     8  #  with the License.  You may obtain a copy of the License at
     9  #
    10  #      http://www.apache.org/licenses/LICENSE-2.0
    11  #
    12  #  Unless required by applicable law or agreed to in writing, software
    13  #  distributed under the License is distributed on an "AS IS" BASIS,
    14  #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  #  See the License for the specific language governing permissions and
    16  # limitations under the License.
    17  ###############################################################################
    18  
    19  ARG py_version
    20  FROM python:"${py_version}"-bullseye as beam
    21  LABEL Author "Apache Beam <dev@beam.apache.org>"
    22  ARG TARGETOS
    23  ARG TARGETARCH
    24  
    25  # Install native bindings required for dependencies.
    26  RUN apt-get update && \
    27      apt-get install -y \
    28         # Required by python-snappy
    29         libsnappy-dev \
    30         # Required by pyyaml (for c bindings)
    31         libyaml-dev \
    32         # This is used to speed up the re-installation of the sdk.
    33         ccache \
    34         && \
    35      rm -rf /var/lib/apt/lists/*
    36  
    37  ####
    38  # Install required packages for Beam Python SDK and common dependencies used by users.
    39  ####
    40  
    41  COPY target/base_image_requirements.txt /tmp/base_image_requirements.txt
    42  RUN \
    43      # use --no-deps to ensure the list includes all transitive dependencies.
    44      pip install --no-deps -r /tmp/base_image_requirements.txt && \
    45      python -c "import nltk; nltk.download('stopwords')" && \
    46      rm /root/nltk_data/corpora/stopwords.zip && \
    47      # Check that the protobuf upb(also called micro protobuf) is used.
    48      python -c "from google.protobuf.internal import api_implementation; assert api_implementation._implementation_type == 'upb'; print ('Verified fast protobuf used.')" && \
    49      # Remove pip cache.
    50      rm -rf /root/.cache/pip && \
    51      rm -rf /tmp/base_image_requirements.txt
    52  
    53  RUN pip install --upgrade pip setuptools
    54  
    55  # Install Google Cloud SDK.
    56  ENV CLOUDSDK_CORE_DISABLE_PROMPTS yes
    57  ENV PATH $PATH:/usr/local/gcloud/google-cloud-sdk/bin
    58  RUN mkdir -p /usr/local/gcloud && \
    59      cd /usr/local/gcloud && \
    60      curl -s -O https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.tar.gz && \
    61      tar -xf google-cloud-sdk.tar.gz && \
    62      /usr/local/gcloud/google-cloud-sdk/install.sh && \
    63      rm google-cloud-sdk.tar.gz
    64  
    65  # Configure ccache prior to installing Beam SDK.
    66  RUN ln -s /usr/bin/ccache /usr/local/bin/gcc
    67  # These parameters are needed as pip compiles artifacts in random temporary directories.
    68  RUN ccache --set-config=sloppiness=file_macro && ccache --set-config=hash_dir=false
    69  
    70  ####
    71  # Install Apache Beam SDK. Use --no-deps and pip check to verify that all
    72  # necessary dependencies are specified in base_image_requirements.txt.
    73  ####
    74  COPY target/apache-beam.tar.gz /opt/apache/beam/tars/
    75  RUN pip install --no-deps -v /opt/apache/beam/tars/apache-beam.tar.gz[gcp]
    76  RUN pip check || (echo "Container does not include required Beam dependencies or has conflicting dependencies. If Beam dependencies have changed, you need to regenerate base_image_requirements.txt files. See: https://s.apache.org/beam-python-requirements-generate" && exit 1)
    77  # Log complete list of what exact packages and versions are installed.
    78  RUN pip freeze --all
    79  
    80  COPY target/LICENSE /opt/apache/beam/
    81  COPY target/LICENSE.python /opt/apache/beam/
    82  COPY target/NOTICE /opt/apache/beam/
    83  COPY target/launcher/${TARGETOS}_${TARGETARCH}/boot /opt/apache/beam/
    84  
    85  ENTRYPOINT ["/opt/apache/beam/boot"]
    86  
    87  ####
    88  # Pull and add third party licenses to the image if pull_licenses is true.
    89  # Use multistage build to eliminate unwanted changes to beam image due to
    90  # extra dependencies needed to pull licenses.
    91  ####
    92  
    93  FROM beam as third_party_licenses
    94  ARG pull_licenses
    95  COPY target/license_scripts /tmp/license_scripts/
    96  
    97  # Add golang licenses.
    98  COPY  target/go-licenses/* /opt/apache/beam/third_party_licenses/golang/
    99  
   100  COPY target/license_scripts /tmp/license_scripts/
   101  RUN if [ "$pull_licenses" = "true" ] ; then \
   102        pip install 'pip-licenses<5' pyyaml tenacity && \
   103        python /tmp/license_scripts/pull_licenses_py.py ; \
   104      fi
   105  
   106  FROM beam
   107  ARG pull_licenses
   108  COPY --from=third_party_licenses /opt/apache/beam/third_party_licenses /opt/apache/beam/third_party_licenses
   109  RUN if [ "$pull_licenses" != "true" ] ; then \
   110        rm -rf /opt/apache/beam/third_party_licenses ; \
   111      fi