github.com/apache/beam/sdks/v2@v2.48.2/python/container/Dockerfile (about) 1 ############################################################################### 2 # Licensed to the Apache Software Foundation (ASF) under one 3 # or more contributor license agreements. See the NOTICE file 4 # distributed with this work for additional information 5 # regarding copyright ownership. The ASF licenses this file 6 # to you under the Apache License, Version 2.0 (the 7 # "License"); you may not use this file except in compliance 8 # with the License. You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 ############################################################################### 18 19 ARG py_version 20 FROM python:"${py_version}"-bullseye as beam 21 LABEL Author "Apache Beam <dev@beam.apache.org>" 22 ARG TARGETOS 23 ARG TARGETARCH 24 25 # Install native bindings required for dependencies. 26 RUN apt-get update && \ 27 apt-get install -y \ 28 # Required by python-snappy 29 libsnappy-dev \ 30 # Required by pyyaml (for c bindings) 31 libyaml-dev \ 32 # This is used to speed up the re-installation of the sdk. 33 ccache \ 34 && \ 35 rm -rf /var/lib/apt/lists/* 36 37 #### 38 # Install required packages for Beam Python SDK and common dependencies used by users. 39 #### 40 41 COPY target/base_image_requirements.txt /tmp/base_image_requirements.txt 42 RUN \ 43 # use --no-deps to ensure the list includes all transitive dependencies. 44 pip install --no-deps -r /tmp/base_image_requirements.txt && \ 45 python -c "import nltk; nltk.download('stopwords')" && \ 46 rm /root/nltk_data/corpora/stopwords.zip && \ 47 # Check that the protobuf upb(also called micro protobuf) is used. 48 python -c "from google.protobuf.internal import api_implementation; assert api_implementation._implementation_type == 'upb'; print ('Verified fast protobuf used.')" && \ 49 # Remove pip cache. 50 rm -rf /root/.cache/pip && \ 51 rm -rf /tmp/base_image_requirements.txt 52 53 RUN pip install --upgrade pip setuptools 54 55 # Install Google Cloud SDK. 56 ENV CLOUDSDK_CORE_DISABLE_PROMPTS yes 57 ENV PATH $PATH:/usr/local/gcloud/google-cloud-sdk/bin 58 RUN mkdir -p /usr/local/gcloud && \ 59 cd /usr/local/gcloud && \ 60 curl -s -O https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.tar.gz && \ 61 tar -xf google-cloud-sdk.tar.gz && \ 62 /usr/local/gcloud/google-cloud-sdk/install.sh && \ 63 rm google-cloud-sdk.tar.gz 64 65 # Configure ccache prior to installing Beam SDK. 66 RUN ln -s /usr/bin/ccache /usr/local/bin/gcc 67 # These parameters are needed as pip compiles artifacts in random temporary directories. 68 RUN ccache --set-config=sloppiness=file_macro && ccache --set-config=hash_dir=false 69 70 #### 71 # Install Apache Beam SDK. Use --no-deps and pip check to verify that all 72 # necessary dependencies are specified in base_image_requirements.txt. 73 #### 74 COPY target/apache-beam.tar.gz /opt/apache/beam/tars/ 75 RUN pip install --no-deps -v /opt/apache/beam/tars/apache-beam.tar.gz[gcp] 76 RUN pip check || (echo "Container does not include required Beam dependencies or has conflicting dependencies. If Beam dependencies have changed, you need to regenerate base_image_requirements.txt files. See: https://s.apache.org/beam-python-requirements-generate" && exit 1) 77 # Log complete list of what exact packages and versions are installed. 78 RUN pip freeze --all 79 80 COPY target/LICENSE /opt/apache/beam/ 81 COPY target/LICENSE.python /opt/apache/beam/ 82 COPY target/NOTICE /opt/apache/beam/ 83 COPY target/launcher/${TARGETOS}_${TARGETARCH}/boot /opt/apache/beam/ 84 85 ENTRYPOINT ["/opt/apache/beam/boot"] 86 87 #### 88 # Pull and add third party licenses to the image if pull_licenses is true. 89 # Use multistage build to eliminate unwanted changes to beam image due to 90 # extra dependencies needed to pull licenses. 91 #### 92 93 FROM beam as third_party_licenses 94 ARG pull_licenses 95 COPY target/license_scripts /tmp/license_scripts/ 96 97 # Add golang licenses. 98 COPY target/go-licenses/* /opt/apache/beam/third_party_licenses/golang/ 99 100 COPY target/license_scripts /tmp/license_scripts/ 101 RUN if [ "$pull_licenses" = "true" ] ; then \ 102 pip install 'pip-licenses<5' pyyaml tenacity && \ 103 python /tmp/license_scripts/pull_licenses_py.py ; \ 104 fi 105 106 FROM beam 107 ARG pull_licenses 108 COPY --from=third_party_licenses /opt/apache/beam/third_party_licenses /opt/apache/beam/third_party_licenses 109 RUN if [ "$pull_licenses" != "true" ] ; then \ 110 rm -rf /opt/apache/beam/third_party_licenses ; \ 111 fi