github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/test/spark/s3a-multipart/src/main/scala/S3AMultipart.scala (about) 1 package io.lakefs.tests 2 3 import java.net.URI 4 5 import org.apache.hadoop.fs 6 import org.apache.hadoop.conf.Configuration 7 import com.amazonaws.SDKGlobalConfiguration 8 9 import scala.util.{Try, Success, Failure} 10 11 object S3AMultipart extends App { 12 def newRandom() = new scala.util.Random(17) 13 14 override def main(args: Array[String]) { 15 val partSize = 5 << 20 // Must be >= 5MiB on AWS S3. 16 val fileSize = 2 * partSize 17 val writeSize = 1 << 18 18 19 if (args.length != 1) { 20 Console.err.println("Usage: ... s3://bucket/path/to/object") 21 System.exit(1) 22 } 23 val path = args(0) 24 25 System.setProperty(SDKGlobalConfiguration.ENABLE_S3_SIGV4_SYSTEM_PROPERTY, "true") 26 27 val conf = new Configuration(true) 28 conf.set("fs.s3a.access.key", System.getenv("AWS_ACCESS_KEY_ID")) 29 conf.set("fs.s3a.secret.key", System.getenv("AWS_SECRET_ACCESS_KEY")) 30 conf.set("fs.s3a.custom.signers", "AWS4SignerType") 31 conf.set("fs.s3a.multipart.size", s"${5 << 20}") 32 conf.set("fs.s3a.multipart.threshold", s"${5 << 20}") 33 34 val region = System.getenv("AWS_REGION") 35 if (region != null) { 36 conf.set("fs.s3a.region", region) 37 conf.set("fs.s3a.endpoint", 38 s"s3.${region}.amazonaws.com" 39 ) // Otherwise it tries host-based addressing and fails 40 } 41 42 val endpoint = System.getenv("ENDPOINT") 43 if (endpoint != null) { 44 conf.set("fs.s3a.endpoint", endpoint) 45 } 46 47 val uri = try { 48 new URI(args(0)) 49 } catch { 50 case e: (Any) => { 51 Console.err.printf("parse URI %s: %s\n", args(0), e) 52 System.exit(1) 53 null 54 } 55 } 56 57 val filesystem = fs.FileSystem.get(uri, conf) 58 59 def asBytes(r: scala.util.Random, size: Int): Iterator[Array[Byte]] = { 60 def getNext() = { 61 val bytes = new Array[Byte](size) 62 r.nextBytes(bytes) 63 bytes 64 } 65 Iterator.continually(getNext()) 66 } 67 68 val up = filesystem.create(new fs.Path(path)) 69 70 val upRand = newRandom() 71 asBytes(upRand, writeSize).take(fileSize / writeSize).foreach(b => up.write(b, 0, b.length)) 72 up.close() 73 74 val down = filesystem.open(new fs.Path(path)) 75 val actualBytes = Iterator.continually(down.read()).takeWhile(_ >= 0).map(_.toByte) 76 77 val expectedBytes = asBytes(newRandom(), writeSize).flatten 78 79 val diffs = (actualBytes zip expectedBytes). 80 zipWithIndex. 81 filter({ case (((a, b), i)) => a != b }). 82 take(10). 83 toList 84 85 if (! diffs.isEmpty) { 86 val diffsText = diffs.map({ case ((a, b), i) => s"${a} != ${b} @${i}" }) 87 Console.err.println(s"Downloaded other bytes than uploaded; first diffs ${diffsText}") 88 System.exit(1) 89 } 90 } 91 }