github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/clients/hadoopfs/src/test/java/io/lakefs/LakeFSFileSystemServerS3Test.java (about)

     1  package io.lakefs;
     2  
     3  import org.slf4j.Logger;
     4  import org.slf4j.LoggerFactory;
     5  
     6  import io.lakefs.clients.sdk.model.*;
     7  import io.lakefs.clients.sdk.model.ObjectStats.PathTypeEnum;
     8  import io.lakefs.clients.sdk.ApiException;
     9  import io.lakefs.utils.ObjectLocation;
    10  
    11  import org.apache.commons.io.IOUtils;
    12  import org.apache.hadoop.conf.Configuration;
    13  import org.apache.hadoop.fs.FileAlreadyExistsException;
    14  import org.apache.hadoop.fs.Path;
    15  
    16  import com.amazonaws.HttpMethod;
    17  import com.amazonaws.services.s3.model.*;
    18  
    19  import org.junit.Assert;
    20  import org.junit.Test;
    21  import org.junit.runner.RunWith;
    22  import org.junit.runners.Parameterized;
    23  import org.junit.runners.Parameterized.Parameter;
    24  import org.junit.runners.Parameterized.Parameters;import org.hamcrest.core.StringContains;
    25  
    26  import org.mockserver.matchers.MatchType;
    27  
    28  import static org.mockserver.model.HttpResponse.response;
    29  import static org.mockserver.model.JsonBody.json;
    30  
    31  import java.io.*;
    32  import java.net.URL;
    33  import java.util.Date;
    34  import java.util.Arrays;
    35  import java.util.concurrent.TimeUnit;
    36  
    37  @RunWith(Parameterized.class)
    38  public class LakeFSFileSystemServerS3Test extends S3FSTestBase {
    39      static private final Logger LOG = LoggerFactory.getLogger(LakeFSFileSystemServerS3Test.class);
    40  
    41      public static interface PhysicalAddressCreator {
    42          default void initConfiguration(Configuration conf) {}
    43          String createGetPhysicalAddress(S3FSTestBase o, String key);
    44          StagingLocation createPutStagingLocation(S3FSTestBase o, String namespace, String repo, String branch, String path);
    45      }
    46  
    47      @Parameters(name="{1}")
    48      public static Iterable<Object[]> data() {
    49          return Arrays.asList(new Object[][]{
    50                  {new SimplePhysicalAddressCreator(), "simple"},
    51                  {new PresignedPhysicalAddressCreator(), "presigned"}});
    52      }
    53  
    54      @Parameter(1)
    55      public String unusedAddressCreatorType;
    56  
    57      @Parameter(0)
    58      public PhysicalAddressCreator pac;
    59  
    60      static private class SimplePhysicalAddressCreator implements PhysicalAddressCreator {
    61          public String createGetPhysicalAddress(S3FSTestBase o, String key) {
    62              return o.s3Url(key);
    63          }
    64  
    65          public StagingLocation createPutStagingLocation(S3FSTestBase o, String namespace, String repo, String branch, String path) {
    66              String fullPath = String.format("%s/%s/%s/%s/%s-object",
    67                                              o.sessionId(), namespace, repo, branch, path);
    68              return new StagingLocation().physicalAddress(o.s3Url(fullPath));
    69          }
    70      }
    71  
    72      static private class PresignedPhysicalAddressCreator implements PhysicalAddressCreator {
    73          public void initConfiguration(Configuration conf) {
    74              conf.set("fs.lakefs.access.mode", "presigned");
    75          }
    76  
    77          protected Date getExpiration() {
    78              return new Date(System.currentTimeMillis() + TimeUnit.HOURS.toMillis(1));
    79          }
    80  
    81          public String createGetPhysicalAddress(S3FSTestBase o, String key) {
    82              Date expiration = getExpiration();
    83              URL presignedUrl =
    84                  o.s3Client.generatePresignedUrl(new GeneratePresignedUrlRequest(o.s3Bucket, key)
    85                                                .withMethod(HttpMethod.GET)
    86                                                .withExpiration(expiration));
    87              return presignedUrl.toString();
    88          }
    89  
    90          public StagingLocation createPutStagingLocation(S3FSTestBase o, String namespace, String repo, String branch, String path) {
    91              String fullPath = String.format("%s/%s/%s/%s/%s-object",
    92                                              o.sessionId(), namespace, repo, branch, path);
    93              Date expiration = getExpiration();
    94              URL presignedUrl =
    95                  o.s3Client.generatePresignedUrl(new GeneratePresignedUrlRequest(o.s3Bucket, fullPath)
    96                                                .withMethod(HttpMethod.PUT)
    97                                                .withExpiration(expiration));
    98              return new StagingLocation()
    99                  .physicalAddress(o.s3Url(fullPath))
   100                  .presignedUrl(presignedUrl.toString());
   101          }
   102      }
   103  
   104      @Override
   105      protected void moreHadoopSetup() {
   106          super.moreHadoopSetup();
   107          pac.initConfiguration(conf);
   108      }
   109  
   110      // Return a location under namespace for this getPhysicalAddress call.
   111      protected StagingLocation mockGetPhysicalAddress(String repo, String branch, String path, String namespace) {
   112          StagingLocation stagingLocation =
   113              pac.createPutStagingLocation(this, namespace, repo, branch, path);
   114          mockServerClient.when(request()
   115                                .withMethod("GET")
   116                                .withPath(String.format("/repositories/%s/branches/%s/staging/backing", repo, branch))
   117                                .withQueryStringParameter("path", path))
   118              .respond(response().withStatusCode(200)
   119                       .withBody(gson.toJson(stagingLocation)));
   120          return stagingLocation;
   121      }
   122  
   123      @Test
   124      public void testCreate() throws IOException {
   125          String contents = "The quick brown fox jumps over the lazy dog.";
   126          long contentsLength = (long) contents.getBytes().length;
   127          Path path = new Path("lakefs://repo/main/sub1/sub2/create.me");
   128  
   129          mockDirectoryMarker(ObjectLocation.pathToObjectLocation(null, path));
   130  
   131          StagingLocation stagingLocation =
   132              mockGetPhysicalAddress("repo", "main", "sub1/sub2/create.me", "repo-base/create");
   133  
   134          // nothing at path
   135          mockFileDoesNotExist("repo", "main", "sub1/sub2/create.me");
   136          // sub1/sub2 was an empty directory with no marker.
   137          mockStatObjectNotFound("repo", "main", "sub1/sub2/");
   138  
   139          ObjectStats newStats = makeObjectStats("sub1/sub2/create.me")
   140              .pathType(PathTypeEnum.OBJECT)
   141              .physicalAddress(stagingLocation.getPhysicalAddress()).
   142              checksum(UNUSED_CHECKSUM).
   143              mtime(UNUSED_MTIME).
   144              sizeBytes(UNUSED_FILE_SIZE);
   145  
   146          mockServerClient.when(request()
   147                                .withMethod("PUT")
   148                                .withPath("/repositories/repo/branches/main/staging/backing")
   149                                .withBody(json(gson.toJson(new StagingMetadata()
   150                                                      .staging(stagingLocation)
   151                                                      .sizeBytes(contentsLength)),
   152                                               MatchType.ONLY_MATCHING_FIELDS)))
   153              .respond(response()
   154                       .withStatusCode(200)
   155                       .withBody(gson.toJson(newStats)));
   156  
   157          // Empty dir marker should be deleted.
   158          mockDeleteObject("repo", "main", "sub1/sub2/");
   159  
   160          OutputStream out = fs.create(path);
   161          out.write(contents.getBytes());
   162          out.close();
   163  
   164          // Write succeeded, verify physical file on S3.
   165          assertS3Object(stagingLocation, contents);
   166      }
   167  
   168      @Test
   169      public void testMkdirs() throws IOException {
   170          // setup empty folder checks
   171          Path path = new Path("dir1/dir2/dir3");
   172          for (Path p = new Path(path.toString()); p != null && !p.isRoot(); p = p.getParent()) {
   173              mockStatObjectNotFound("repo", "main", p.toString());
   174              mockStatObjectNotFound("repo", "main", p+"/");
   175              mockListing("repo", "main", ImmutablePagination.builder().prefix(p+"/").build());
   176          }
   177  
   178          // physical address to directory marker object
   179          StagingLocation stagingLocation =
   180              mockGetPhysicalAddress("repo", "main", "dir1/dir2/dir3/", "repo-base/emptyDir");
   181  
   182          ObjectStats newStats = makeObjectStats("dir1/dir2/dir3/")
   183              .physicalAddress(pac.createGetPhysicalAddress(this, "repo-base/dir12"));
   184          mockStatObject("repo", "main", "dir1/dir2/dir3/", newStats);
   185  
   186          mockServerClient.when(request()
   187                                .withMethod("PUT")
   188                                .withPath("/repositories/repo/branches/main/staging/backing")
   189                                .withQueryStringParameter("path", "dir1/dir2/dir3/")
   190                                .withBody(json(gson.toJson(new StagingMetadata()
   191                                                           .staging(stagingLocation)
   192                                                           .sizeBytes(0L)),
   193                                               MatchType.ONLY_MATCHING_FIELDS)))
   194              .respond(response()
   195                       .withStatusCode(200)
   196                       .withBody(gson.toJson(newStats)));
   197  
   198          // call mkdirs
   199          Assert.assertTrue(fs.mkdirs(new Path("lakefs://repo/main/", path)));
   200  
   201          // verify file exists on s3
   202          assertS3Object(stagingLocation, "");
   203      }
   204  
   205      @Test
   206      public void testCreateExistingDirectory() throws IOException {
   207          Path path = new Path("lakefs://repo/main/sub1/sub2/create.me");
   208          // path is a directory -- so cannot be created as a file.
   209  
   210          mockStatObjectNotFound("repo", "main", "sub1/sub2/create.me");
   211          ObjectStats stats = makeObjectStats("sub1/sub2/create.me/")
   212              .physicalAddress(pac.createGetPhysicalAddress(this, "repo-base/sub1/sub2/create.me"));
   213          mockStatObject("repo", "main", "sub1/sub2/create.me/", stats);
   214  
   215          Exception e =
   216              Assert.assertThrows(FileAlreadyExistsException.class, () -> fs.create(path, false));
   217          Assert.assertThat(e.getMessage(), new StringContains("is a directory"));
   218      }
   219  
   220      @Test
   221      public void testCreateExistingFile() throws IOException {
   222          Path path = new Path("lakefs://repo/main/sub1/sub2/create.me");
   223  
   224          ObjectLocation dir = new ObjectLocation("lakefs", "repo", "main", "sub1/sub2");
   225          mockStatObject("repo", "main", "sub1/sub2/create.me",
   226                         makeObjectStats("sub1/sub2/create.me"));
   227          Exception e = Assert.assertThrows(FileAlreadyExistsException.class,
   228                              () -> fs.create(path, false));
   229          Assert.assertThat(e.getMessage(), new StringContains("already exists"));
   230      }
   231  
   232      @Test
   233      public void testOpen() throws IOException, ApiException {
   234          String contents = "The quick brown fox jumps over the lazy dog.";
   235          byte[] contentsBytes = contents.getBytes();
   236          String physicalPath = sessionId() + "/repo-base/open";
   237          String physicalKey = pac.createGetPhysicalAddress(this, physicalPath);
   238          int readBufferSize = 5;
   239          Path path = new Path("lakefs://repo/main/read.me");
   240  
   241          mockStatObject("repo", "main", "read.me",
   242                         makeObjectStats("read.me")
   243                         .physicalAddress(physicalKey)
   244                         .checksum(UNUSED_CHECKSUM)
   245                         .mtime(UNUSED_MTIME)
   246                         .sizeBytes((long) contentsBytes.length));
   247  
   248          // Write physical file to S3.
   249          ObjectMetadata s3Metadata = new ObjectMetadata();
   250          s3Metadata.setContentLength(contentsBytes.length);
   251          s3Client.putObject(s3Bucket,
   252                             physicalPath,
   253                             new ByteArrayInputStream(contentsBytes),
   254                             s3Metadata);
   255  
   256          try (InputStream in = fs.open(path, readBufferSize)) {
   257              String actual = IOUtils.toString(in);
   258              Assert.assertEquals(contents, actual);
   259          } catch (Exception e) {
   260              String actualFiles = String.join(", ", getS3FilesByPrefix(""));
   261              throw new RuntimeException("Files " + actualFiles + "; read " + path.toString() + " from " + physicalKey, e);
   262          }
   263      }
   264  
   265      // TODO(ariels): Rename test to "testOpenWithNonAsciiUriChars".
   266      @Test
   267      public void testOpenWithInvalidUriChars() throws IOException, ApiException {
   268          String contents = "The quick brown fox jumps over the lazy dog.";
   269          byte[] contentsBytes = contents.getBytes();
   270          int readBufferSize = 5;
   271  
   272          String[] suffixes = {
   273                  "with space/open",
   274                  "wi:th$cha&rs#/%op;e?n",
   275                  "עכשיו/בעברית/open",
   276                  "\uD83E\uDD2F/imoji/open",
   277          };
   278          for (String suffix : suffixes) {
   279              String key = "/repo-base/" + suffix;
   280  
   281              // Write physical file to S3.
   282              ObjectMetadata s3Metadata = new ObjectMetadata();
   283              s3Metadata.setContentLength(contentsBytes.length);
   284              s3Client.putObject(new PutObjectRequest(s3Bucket, key, new ByteArrayInputStream(contentsBytes), s3Metadata));
   285  
   286              String path = String.format("lakefs://repo/main/%s-x", suffix);
   287              ObjectStats stats = makeObjectStats(suffix + "-x")
   288                  .physicalAddress(pac.createGetPhysicalAddress(this, key))
   289                  .sizeBytes((long) contentsBytes.length);
   290              mockStatObject("repo", "main", suffix + "-x", stats);
   291  
   292              try (InputStream in = fs.open(new Path(path), readBufferSize)) {
   293                  String actual = IOUtils.toString(in);
   294                  Assert.assertEquals(contents, actual);
   295              }
   296          }
   297      }
   298  
   299      @Test
   300      public void testOpen_NotExists() throws IOException, ApiException {
   301          Path path = new Path("lakefs://repo/main/doesNotExi.st");
   302          mockStatObjectNotFound("repo", "main", "doesNotExi.st");
   303          Assert.assertThrows(FileNotFoundException.class,
   304                              () -> fs.open(path));
   305      }
   306  }