github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/deployments/utils/spark3-hadoop2/Dockerfile (about) 1 # INSTRUCTIONS 2 # 3 # Build: 4 # docker build --tag treeverse/bitnami-based-spark:3.2.3-with-hadoop-2.7-for-testing . 5 # 6 # Push: 7 # docker push treeverse/bitnami-based-spark:3.2.3-with-hadoop-2.7-for-testing 8 # 9 # Use *only* in Esti. This is **NOT** a good Docker image to use for 10 # running Spark, except for testing. 11 12 FROM --platform=$BUILDPLATFORM ubuntu:21.04 AS extract 13 14 WORKDIR /build 15 # Extract hadoop-aws-2.7.4 and its dependency aws-java-sdk-1.7.4 from an old 16 # archived version of Apachehe Hadoop. These JARs are so long-dead that 17 # this is the *easy* way of getting our actual test code onthem. 18 # 19 # See, fear, and never use this Docker image except in tests. 20 ADD https://archive.apache.org/dist/hadoop/common/hadoop-2.7.4/hadoop-2.7.4.tar.gz /tmp/hadoop-2.7.4.tar.gz 21 RUN tar --extract --to-stdout --gzip --strip 5 --file /tmp/hadoop-2.7.4.tar.gz hadoop-2.7.4/share/hadoop/tools/lib/hadoop-aws-2.7.4.jar > ./hadoop-aws-2.7.4.jar 22 RUN tar --extract --to-stdout --gzip --strip 5 --file /tmp/hadoop-2.7.4.tar.gz hadoop-2.7.4/share/hadoop/tools/lib/aws-java-sdk-1.7.4.jar > ./aws-java-sdk-1.7.4.jar 23 24 # Build Bitnami Spark 3.2.x but with Hadoop 2. Details in 25 # https://github.com/bitnami/bitnami-docker-spark#using-a-different-version-of-hadoop-jars. 26 FROM bitnami/spark:3.2.3 27 28 USER root 29 RUN rm -r /opt/bitnami/spark/jars 30 ADD https://dlcdn.apache.org/spark/spark-3.2.3/spark-3.2.3-bin-hadoop2.7.tgz /tmp/ 31 RUN tar --extract --gzip --strip=1 < /tmp/spark-3.2.3-bin-hadoop2.7.tgz --directory /opt/bitnami/spark/ spark-3.2.3-bin-hadoop2.7/jars/ && rm /tmp/spark-3.2.3-bin-hadoop2.7.tgz 32 COPY --from=extract /build/*.jar /opt/bitnami/spark/jars/ 33 USER 1001