github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/clients/hadoopfs/src/test/java/io/lakefs/LakeFSFileSystemServerS3Test.java (about) 1 package io.lakefs; 2 3 import org.slf4j.Logger; 4 import org.slf4j.LoggerFactory; 5 6 import io.lakefs.clients.sdk.model.*; 7 import io.lakefs.clients.sdk.model.ObjectStats.PathTypeEnum; 8 import io.lakefs.clients.sdk.ApiException; 9 import io.lakefs.utils.ObjectLocation; 10 11 import org.apache.commons.io.IOUtils; 12 import org.apache.hadoop.conf.Configuration; 13 import org.apache.hadoop.fs.FileAlreadyExistsException; 14 import org.apache.hadoop.fs.Path; 15 16 import com.amazonaws.HttpMethod; 17 import com.amazonaws.services.s3.model.*; 18 19 import org.junit.Assert; 20 import org.junit.Test; 21 import org.junit.runner.RunWith; 22 import org.junit.runners.Parameterized; 23 import org.junit.runners.Parameterized.Parameter; 24 import org.junit.runners.Parameterized.Parameters;import org.hamcrest.core.StringContains; 25 26 import org.mockserver.matchers.MatchType; 27 28 import static org.mockserver.model.HttpResponse.response; 29 import static org.mockserver.model.JsonBody.json; 30 31 import java.io.*; 32 import java.net.URL; 33 import java.util.Date; 34 import java.util.Arrays; 35 import java.util.concurrent.TimeUnit; 36 37 @RunWith(Parameterized.class) 38 public class LakeFSFileSystemServerS3Test extends S3FSTestBase { 39 static private final Logger LOG = LoggerFactory.getLogger(LakeFSFileSystemServerS3Test.class); 40 41 public static interface PhysicalAddressCreator { 42 default void initConfiguration(Configuration conf) {} 43 String createGetPhysicalAddress(S3FSTestBase o, String key); 44 StagingLocation createPutStagingLocation(S3FSTestBase o, String namespace, String repo, String branch, String path); 45 } 46 47 @Parameters(name="{1}") 48 public static Iterable<Object[]> data() { 49 return Arrays.asList(new Object[][]{ 50 {new SimplePhysicalAddressCreator(), "simple"}, 51 {new PresignedPhysicalAddressCreator(), "presigned"}}); 52 } 53 54 @Parameter(1) 55 public String unusedAddressCreatorType; 56 57 @Parameter(0) 58 public PhysicalAddressCreator pac; 59 60 static private class SimplePhysicalAddressCreator implements PhysicalAddressCreator { 61 public String createGetPhysicalAddress(S3FSTestBase o, String key) { 62 return o.s3Url(key); 63 } 64 65 public StagingLocation createPutStagingLocation(S3FSTestBase o, String namespace, String repo, String branch, String path) { 66 String fullPath = String.format("%s/%s/%s/%s/%s-object", 67 o.sessionId(), namespace, repo, branch, path); 68 return new StagingLocation().physicalAddress(o.s3Url(fullPath)); 69 } 70 } 71 72 static private class PresignedPhysicalAddressCreator implements PhysicalAddressCreator { 73 public void initConfiguration(Configuration conf) { 74 conf.set("fs.lakefs.access.mode", "presigned"); 75 } 76 77 protected Date getExpiration() { 78 return new Date(System.currentTimeMillis() + TimeUnit.HOURS.toMillis(1)); 79 } 80 81 public String createGetPhysicalAddress(S3FSTestBase o, String key) { 82 Date expiration = getExpiration(); 83 URL presignedUrl = 84 o.s3Client.generatePresignedUrl(new GeneratePresignedUrlRequest(o.s3Bucket, key) 85 .withMethod(HttpMethod.GET) 86 .withExpiration(expiration)); 87 return presignedUrl.toString(); 88 } 89 90 public StagingLocation createPutStagingLocation(S3FSTestBase o, String namespace, String repo, String branch, String path) { 91 String fullPath = String.format("%s/%s/%s/%s/%s-object", 92 o.sessionId(), namespace, repo, branch, path); 93 Date expiration = getExpiration(); 94 URL presignedUrl = 95 o.s3Client.generatePresignedUrl(new GeneratePresignedUrlRequest(o.s3Bucket, fullPath) 96 .withMethod(HttpMethod.PUT) 97 .withExpiration(expiration)); 98 return new StagingLocation() 99 .physicalAddress(o.s3Url(fullPath)) 100 .presignedUrl(presignedUrl.toString()); 101 } 102 } 103 104 @Override 105 protected void moreHadoopSetup() { 106 super.moreHadoopSetup(); 107 pac.initConfiguration(conf); 108 } 109 110 // Return a location under namespace for this getPhysicalAddress call. 111 protected StagingLocation mockGetPhysicalAddress(String repo, String branch, String path, String namespace) { 112 StagingLocation stagingLocation = 113 pac.createPutStagingLocation(this, namespace, repo, branch, path); 114 mockServerClient.when(request() 115 .withMethod("GET") 116 .withPath(String.format("/repositories/%s/branches/%s/staging/backing", repo, branch)) 117 .withQueryStringParameter("path", path)) 118 .respond(response().withStatusCode(200) 119 .withBody(gson.toJson(stagingLocation))); 120 return stagingLocation; 121 } 122 123 @Test 124 public void testCreate() throws IOException { 125 String contents = "The quick brown fox jumps over the lazy dog."; 126 long contentsLength = (long) contents.getBytes().length; 127 Path path = new Path("lakefs://repo/main/sub1/sub2/create.me"); 128 129 mockDirectoryMarker(ObjectLocation.pathToObjectLocation(null, path)); 130 131 StagingLocation stagingLocation = 132 mockGetPhysicalAddress("repo", "main", "sub1/sub2/create.me", "repo-base/create"); 133 134 // nothing at path 135 mockFileDoesNotExist("repo", "main", "sub1/sub2/create.me"); 136 // sub1/sub2 was an empty directory with no marker. 137 mockStatObjectNotFound("repo", "main", "sub1/sub2/"); 138 139 ObjectStats newStats = makeObjectStats("sub1/sub2/create.me") 140 .pathType(PathTypeEnum.OBJECT) 141 .physicalAddress(stagingLocation.getPhysicalAddress()). 142 checksum(UNUSED_CHECKSUM). 143 mtime(UNUSED_MTIME). 144 sizeBytes(UNUSED_FILE_SIZE); 145 146 mockServerClient.when(request() 147 .withMethod("PUT") 148 .withPath("/repositories/repo/branches/main/staging/backing") 149 .withBody(json(gson.toJson(new StagingMetadata() 150 .staging(stagingLocation) 151 .sizeBytes(contentsLength)), 152 MatchType.ONLY_MATCHING_FIELDS))) 153 .respond(response() 154 .withStatusCode(200) 155 .withBody(gson.toJson(newStats))); 156 157 // Empty dir marker should be deleted. 158 mockDeleteObject("repo", "main", "sub1/sub2/"); 159 160 OutputStream out = fs.create(path); 161 out.write(contents.getBytes()); 162 out.close(); 163 164 // Write succeeded, verify physical file on S3. 165 assertS3Object(stagingLocation, contents); 166 } 167 168 @Test 169 public void testMkdirs() throws IOException { 170 // setup empty folder checks 171 Path path = new Path("dir1/dir2/dir3"); 172 for (Path p = new Path(path.toString()); p != null && !p.isRoot(); p = p.getParent()) { 173 mockStatObjectNotFound("repo", "main", p.toString()); 174 mockStatObjectNotFound("repo", "main", p+"/"); 175 mockListing("repo", "main", ImmutablePagination.builder().prefix(p+"/").build()); 176 } 177 178 // physical address to directory marker object 179 StagingLocation stagingLocation = 180 mockGetPhysicalAddress("repo", "main", "dir1/dir2/dir3/", "repo-base/emptyDir"); 181 182 ObjectStats newStats = makeObjectStats("dir1/dir2/dir3/") 183 .physicalAddress(pac.createGetPhysicalAddress(this, "repo-base/dir12")); 184 mockStatObject("repo", "main", "dir1/dir2/dir3/", newStats); 185 186 mockServerClient.when(request() 187 .withMethod("PUT") 188 .withPath("/repositories/repo/branches/main/staging/backing") 189 .withQueryStringParameter("path", "dir1/dir2/dir3/") 190 .withBody(json(gson.toJson(new StagingMetadata() 191 .staging(stagingLocation) 192 .sizeBytes(0L)), 193 MatchType.ONLY_MATCHING_FIELDS))) 194 .respond(response() 195 .withStatusCode(200) 196 .withBody(gson.toJson(newStats))); 197 198 // call mkdirs 199 Assert.assertTrue(fs.mkdirs(new Path("lakefs://repo/main/", path))); 200 201 // verify file exists on s3 202 assertS3Object(stagingLocation, ""); 203 } 204 205 @Test 206 public void testCreateExistingDirectory() throws IOException { 207 Path path = new Path("lakefs://repo/main/sub1/sub2/create.me"); 208 // path is a directory -- so cannot be created as a file. 209 210 mockStatObjectNotFound("repo", "main", "sub1/sub2/create.me"); 211 ObjectStats stats = makeObjectStats("sub1/sub2/create.me/") 212 .physicalAddress(pac.createGetPhysicalAddress(this, "repo-base/sub1/sub2/create.me")); 213 mockStatObject("repo", "main", "sub1/sub2/create.me/", stats); 214 215 Exception e = 216 Assert.assertThrows(FileAlreadyExistsException.class, () -> fs.create(path, false)); 217 Assert.assertThat(e.getMessage(), new StringContains("is a directory")); 218 } 219 220 @Test 221 public void testCreateExistingFile() throws IOException { 222 Path path = new Path("lakefs://repo/main/sub1/sub2/create.me"); 223 224 ObjectLocation dir = new ObjectLocation("lakefs", "repo", "main", "sub1/sub2"); 225 mockStatObject("repo", "main", "sub1/sub2/create.me", 226 makeObjectStats("sub1/sub2/create.me")); 227 Exception e = Assert.assertThrows(FileAlreadyExistsException.class, 228 () -> fs.create(path, false)); 229 Assert.assertThat(e.getMessage(), new StringContains("already exists")); 230 } 231 232 @Test 233 public void testOpen() throws IOException, ApiException { 234 String contents = "The quick brown fox jumps over the lazy dog."; 235 byte[] contentsBytes = contents.getBytes(); 236 String physicalPath = sessionId() + "/repo-base/open"; 237 String physicalKey = pac.createGetPhysicalAddress(this, physicalPath); 238 int readBufferSize = 5; 239 Path path = new Path("lakefs://repo/main/read.me"); 240 241 mockStatObject("repo", "main", "read.me", 242 makeObjectStats("read.me") 243 .physicalAddress(physicalKey) 244 .checksum(UNUSED_CHECKSUM) 245 .mtime(UNUSED_MTIME) 246 .sizeBytes((long) contentsBytes.length)); 247 248 // Write physical file to S3. 249 ObjectMetadata s3Metadata = new ObjectMetadata(); 250 s3Metadata.setContentLength(contentsBytes.length); 251 s3Client.putObject(s3Bucket, 252 physicalPath, 253 new ByteArrayInputStream(contentsBytes), 254 s3Metadata); 255 256 try (InputStream in = fs.open(path, readBufferSize)) { 257 String actual = IOUtils.toString(in); 258 Assert.assertEquals(contents, actual); 259 } catch (Exception e) { 260 String actualFiles = String.join(", ", getS3FilesByPrefix("")); 261 throw new RuntimeException("Files " + actualFiles + "; read " + path.toString() + " from " + physicalKey, e); 262 } 263 } 264 265 // TODO(ariels): Rename test to "testOpenWithNonAsciiUriChars". 266 @Test 267 public void testOpenWithInvalidUriChars() throws IOException, ApiException { 268 String contents = "The quick brown fox jumps over the lazy dog."; 269 byte[] contentsBytes = contents.getBytes(); 270 int readBufferSize = 5; 271 272 String[] suffixes = { 273 "with space/open", 274 "wi:th$cha&rs#/%op;e?n", 275 "עכשיו/בעברית/open", 276 "\uD83E\uDD2F/imoji/open", 277 }; 278 for (String suffix : suffixes) { 279 String key = "/repo-base/" + suffix; 280 281 // Write physical file to S3. 282 ObjectMetadata s3Metadata = new ObjectMetadata(); 283 s3Metadata.setContentLength(contentsBytes.length); 284 s3Client.putObject(new PutObjectRequest(s3Bucket, key, new ByteArrayInputStream(contentsBytes), s3Metadata)); 285 286 String path = String.format("lakefs://repo/main/%s-x", suffix); 287 ObjectStats stats = makeObjectStats(suffix + "-x") 288 .physicalAddress(pac.createGetPhysicalAddress(this, key)) 289 .sizeBytes((long) contentsBytes.length); 290 mockStatObject("repo", "main", suffix + "-x", stats); 291 292 try (InputStream in = fs.open(new Path(path), readBufferSize)) { 293 String actual = IOUtils.toString(in); 294 Assert.assertEquals(contents, actual); 295 } 296 } 297 } 298 299 @Test 300 public void testOpen_NotExists() throws IOException, ApiException { 301 Path path = new Path("lakefs://repo/main/doesNotExi.st"); 302 mockStatObjectNotFound("repo", "main", "doesNotExi.st"); 303 Assert.assertThrows(FileNotFoundException.class, 304 () -> fs.open(path)); 305 } 306 }