github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/test/spark/run-exporter-test.sh (about)

     1  #!/bin/bash -aux
     2  
     3  set -o pipefail
     4  
     5  REPOSITORY=${REPOSITORY//./-}
     6  # Run Export
     7  docker compose run -v  ${CLIENT_JAR}:/client/client.jar -T --no-deps --rm spark-submit bash -c "spark-submit  --master spark://spark:7077 --conf spark.hadoop.lakefs.api.url=http:/docker.lakefs.io:8000/api/v1   --conf spark.hadoop.lakefs.api.access_key=\${TESTER_ACCESS_KEY_ID}   --conf spark.hadoop.fs.s3a.connection.ssl.enabled=false   --conf spark.hadoop.lakefs.api.secret_key=\${TESTER_SECRET_ACCESS_KEY}   --class io.treeverse.clients.Main  /client/client.jar ${REPOSITORY} ${EXPORT_LOCATION}   --branch=main"
     8  
     9  # Validate export
    10  lakectl_out=$(mktemp)
    11  s3_out=$(mktemp)
    12  trap 'rm -f -- $s3_out $lakectl_out' INT TERM EXIT
    13  
    14  docker compose exec -T lakefs lakectl fs ls --recursive --no-color "lakefs://${REPOSITORY}/main/" | awk '{print $8}' | sort > ${lakectl_out}
    15  
    16  export_bucket=`echo $EXPORT_LOCATION | sed -E 's!^s3://([^/]*)/.*!\1!'`
    17  export_key=`echo $EXPORT_LOCATION | sed -E 's!^s3://[^/]*/!!'`
    18  
    19  aws s3api list-objects-v2 \
    20          --bucket "$export_bucket" --prefix "$export_key" \
    21  	--query "Contents[].[Key]" --output text | \
    22      sed "s%^${export_key}/%%" | \
    23      fgrep -v EXPORT_ | \
    24      sort > ${s3_out}
    25  
    26  if ! diff ${lakectl_out} ${s3_out}; then
    27    echo "The export's location and lakeFS should contain same objects"
    28    exit 1
    29  fi