github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/clients/hadoopfs/src/test/java/io/lakefs/FSTestBase.java (about)

     1  package io.lakefs;
     2  
     3  import com.amazonaws.ClientConfiguration;
     4  import com.amazonaws.auth.AWSCredentials;
     5  import com.amazonaws.auth.BasicAWSCredentials;
     6  import com.amazonaws.services.s3.AmazonS3;
     7  import com.amazonaws.services.s3.AmazonS3Client;
     8  import com.amazonaws.services.s3.S3ClientOptions;
     9  import com.amazonaws.services.s3.model.*;
    10  import com.aventrix.jnanoid.jnanoid.NanoIdUtils;
    11  import com.google.common.base.Optional;
    12  import com.google.common.collect.ImmutableMap;
    13  import com.google.gson.FieldNamingPolicy;
    14  import com.google.gson.Gson;
    15  import com.google.gson.GsonBuilder;
    16  
    17  import io.lakefs.clients.sdk.ApiException;
    18  import io.lakefs.clients.sdk.model.*;
    19  import io.lakefs.clients.sdk.model.ObjectStats.PathTypeEnum;
    20  import io.lakefs.utils.ObjectLocation;
    21  
    22  import org.apache.hadoop.conf.Configuration;
    23  import org.apache.hadoop.fs.FileAlreadyExistsException;
    24  import org.apache.hadoop.fs.FileStatus;
    25  import org.apache.hadoop.fs.LocatedFileStatus;
    26  import org.apache.hadoop.fs.Path;
    27  import org.apache.http.HttpStatus;
    28  
    29  import org.immutables.value.Value;
    30  import org.junit.Before;
    31  import org.junit.Rule;
    32  import org.junit.Test;
    33  import org.junit.rules.TestName;
    34  
    35  import org.mockserver.client.MockServerClient;
    36  import org.mockserver.junit.MockServerRule;
    37  import org.mockserver.matchers.MatchType;
    38  import org.mockserver.matchers.TimeToLive;
    39  import org.mockserver.matchers.Times;
    40  import org.mockserver.model.Cookie;
    41  import org.mockserver.model.HttpRequest;
    42  import org.mockserver.model.HttpResponse;
    43  import org.mockserver.model.Parameter;
    44  
    45  import static org.apache.commons.lang3.StringUtils.removeStart;
    46  
    47  import static org.mockserver.model.HttpResponse.response;
    48  import static org.mockserver.model.JsonBody.json;
    49  
    50  import java.io.IOException;
    51  import java.net.URI;
    52  import java.net.URISyntaxException;
    53  import java.util.Arrays;
    54  import java.util.List;
    55  
    56  /**
    57   * Base for all LakeFSFilesystem tests.  Helps set common components up but
    58   * contains no tests of its own.
    59   * The visibility of this class is public as it's being used by other libraries for testing purposes
    60   *
    61   * See e.g. "Base Test Class Testing Pattern: Why and How to use",
    62   * <a href="https://eliasnogueira.com/base-test-class-testing-pattern-why-and-how-to-use/">...</a>
    63   */
    64  public abstract class FSTestBase {
    65      static protected final Long UNUSED_FILE_SIZE = 1L;
    66      static protected final Long UNUSED_MTIME = 0L;
    67      static protected final String UNUSED_CHECKSUM = "unused";
    68  
    69      static protected final Long STATUS_FILE_SIZE = 2L;
    70      static protected final Long STATUS_MTIME = 123456789L;
    71      static protected final String STATUS_CHECKSUM = "status";
    72  
    73      protected Configuration conf;
    74      protected final LakeFSFileSystem fs = new LakeFSFileSystem();
    75  
    76      protected String s3Base;
    77      protected String s3Bucket;
    78  
    79      protected static final String S3_ACCESS_KEY_ID = "AKIArootkey";
    80      protected static final String S3_SECRET_ACCESS_KEY = "secret/minio/key=";
    81  
    82      protected static final ApiException noSuchFile = new ApiException(HttpStatus.SC_NOT_FOUND, "no such file");
    83  
    84      protected final Gson gson = new GsonBuilder()
    85          .setFieldNamingPolicy(FieldNamingPolicy.LOWER_CASE_WITH_UNDERSCORES)
    86          .create();
    87  
    88      @Value.Immutable static public interface Pagination {
    89          @Value.Parameter Optional<Integer> amount();
    90          @Value.Parameter Optional<String> after();
    91          @Value.Parameter Optional<String> prefix();
    92      }
    93  
    94      @Rule
    95      public MockServerRule mockServerRule = new MockServerRule(this);
    96      protected MockServerClient mockServerClient;
    97  
    98      @Rule
    99      public TestName name = new TestName();
   100  
   101      protected String sessionId() {
   102          return name.getMethodName();
   103      }
   104  
   105      protected HttpRequest request() {
   106          return HttpRequest.request().withCookie(new Cookie("sessionId", sessionId()));
   107      }
   108  
   109      protected static String makeS3BucketName() {
   110          String slug = NanoIdUtils.randomNanoId(NanoIdUtils.DEFAULT_NUMBER_GENERATOR,
   111                                                 "abcdefghijklmnopqrstuvwxyz-0123456789".toCharArray(), 14);
   112          return String.format("bucket-%s-x", slug);
   113      }
   114  
   115      /** @return "s3://..." URL to use for s3Path (which does not start with a slash) on bucket */
   116      protected String s3Url(String s3Path) {
   117          return s3Base + s3Path;
   118      }
   119  
   120      protected String getS3Key(StagingLocation stagingLocation) {
   121          return removeStart(stagingLocation.getPhysicalAddress(), s3Base);
   122      }
   123  
   124      /**
   125       * Override to add to Hadoop configuration.
   126       */
   127      protected void addHadoopConfiguration(Configuration conf) {
   128      }
   129  
   130      @Before
   131      public void hadoopSetup() throws IOException, URISyntaxException {
   132          s3Base = "s3a://UNUSED/"; // Overridden if S3 will be used!
   133  
   134          conf = new Configuration(false);
   135  
   136          addHadoopConfiguration(conf);
   137  
   138          conf.set("fs.lakefs.impl", "io.lakefs.LakeFSFileSystem");
   139  
   140          conf.set("fs.lakefs.access.key", "unused-but-checked");
   141          conf.set("fs.lakefs.secret.key", "unused-but-checked");
   142          conf.set("fs.lakefs.endpoint", String.format("http://localhost:%d/", mockServerClient.getPort()));
   143          conf.set("fs.lakefs.session_id", sessionId());
   144  
   145          System.setProperty("hadoop.home.dir", "/");
   146  
   147          // lakeFSFS initialization requires a blockstore.
   148          mockServerClient.when(request()
   149                                .withMethod("GET")
   150                                .withPath("/config/storage"),
   151                                Times.once())
   152              .respond(response()
   153                       .withStatusCode(200)
   154                       .withBody(gson.toJson(new StorageConfig()
   155                                             .blockstoreType("s3")
   156                                             .blockstoreNamespaceExample("/not/really")
   157                                             .blockstoreNamespaceValidityRegex(".*")
   158                                             // TODO(ariels): Change for presigned?
   159                                             .preSignSupport(false)
   160                                             .preSignSupportUi(false)
   161                                             .importSupport(false)
   162                                             .importValidityRegex(".*"))));
   163  
   164          // Always allow repo "repo" to be found, it's used in all tests.
   165          mockServerClient.when(request()
   166                                .withMethod("GET")
   167                                .withPath("/repositories/repo"))
   168              .respond(response().withStatusCode(200)
   169                       .withBody(gson.toJson(new Repository().id("repo")
   170                                             .creationDate(1234L)
   171                                             .defaultBranch("main")
   172                                             // Not really needed, just put something that works.
   173                                             .storageNamespace("s3a://FIX/ME?"))));
   174  
   175          // Don't return 404s for unknown paths - they will be emitted for
   176          // many bad requests or mocks, and make our life difficult.  Instead
   177          // fail using a unique error code.  This has very low priority.
   178          mockServerClient.when(request(), Times.unlimited(), TimeToLive.unlimited(), -10000)
   179              .respond(response().withStatusCode(418));
   180          // TODO(ariels): No tests mock "get underlying filesystem", so this
   181          //     also catches its "get repo" call.  Nothing bad happens, but
   182          //     this response does show up in logs.
   183  
   184          moreHadoopSetup();
   185  
   186          fs.initialize(new URI("lakefs://repo/main/file.txt"), conf);
   187      }
   188  
   189      protected void moreHadoopSetup() {}
   190  
   191      protected ObjectStats makeObjectStats(String path) {
   192          return new ObjectStats()
   193              .pathType(PathTypeEnum.OBJECT)
   194              .path(path)
   195              .checksum(UNUSED_CHECKSUM)
   196              .physicalAddress("physical://unused/" + path)
   197              .mtime(UNUSED_MTIME);
   198      }
   199  
   200      // Mock this statObject call not to be found
   201      protected void mockStatObjectNotFound(String repo, String ref, String path) {
   202          mockServerClient.when(request()
   203                                .withMethod("GET")
   204                                .withPath(String.format("/repositories/%s/refs/%s/objects/stat", repo, ref))
   205                                .withQueryStringParameter("path", path))
   206              .respond(response().withStatusCode(404)
   207                       .withBody(String.format("{message: \"%s/%s/%s not found\"}",
   208                                               repo, ref, path, sessionId())));
   209      }
   210  
   211      protected void mockStatObject(String repo, String ref, String path, ObjectStats stats) {
   212          mockServerClient.when(request()
   213                                .withMethod("GET")
   214                                .withPath(String.format("/repositories/%s/refs/%s/objects/stat", repo, ref))
   215                                .withQueryStringParameter("path", path))
   216              .respond(response().withStatusCode(200)
   217                       .withBody(gson.toJson(stats)));
   218      }
   219  
   220      // Mock this lakeFSFS path not to exist.  You may still need to
   221      // mockListing for the directory that will not contain this path.
   222      protected void mockFileDoesNotExist(String repo, String ref, String path) {
   223          mockStatObjectNotFound(repo, ref, path);
   224          mockStatObjectNotFound(repo, ref, path + Constants.SEPARATOR);
   225      }
   226  
   227      protected void mockFilesInDir(String repo, String main, String dir, String... files) {
   228          ObjectStats[] allStats;
   229          if (files.length == 0) {
   230              // Fake a directory marker
   231              Path dirPath = new Path(String.format("lakefs://%s/%s/%s", repo, main, dir));
   232              ObjectLocation dirLoc = ObjectLocation.pathToObjectLocation(dirPath);
   233              ObjectStats dirStats = mockDirectoryMarker(dirLoc);
   234              allStats = new ObjectStats[1];
   235              allStats[0] = dirStats;
   236          } else {
   237              mockStatObjectNotFound(repo, main, dir);
   238              mockStatObjectNotFound(repo, main, dir + Constants.SEPARATOR);
   239  
   240              allStats = new ObjectStats[files.length];
   241              for (int i = 0; i < files.length; i++) {
   242                  allStats[i] = makeObjectStats(dir + Constants.SEPARATOR + files[i]);
   243              }
   244          }
   245  
   246          // Directory can be listed!
   247          mockListing("repo", "main",
   248                      ImmutablePagination.builder().prefix(dir + Constants.SEPARATOR).build(),
   249                      allStats);
   250      }
   251  
   252      protected void mockUploadObject(String repo, String branch, String path) {
   253          ObjectStats uploadedStats = makeObjectStats(path)
   254              .physicalAddress(s3Url(String.format("repo-base/dir-marker/%s/%s/%s/%s",
   255                                                   sessionId(), repo, branch, path)));
   256          mockServerClient.when(request()
   257                                .withMethod("POST")
   258                                .withPath(String.format("/repositories/%s/branches/%s/objects", repo, branch))
   259                                .withQueryStringParameter("path", path))
   260              .respond(response().withStatusCode(200)
   261                       .withBody(gson.toJson(uploadedStats)));
   262      }
   263  
   264      protected void mockGetBranch(String repo, String branch) {
   265          mockServerClient.when(request()
   266                                .withMethod("GET")
   267                                .withPath(String.format("/repositories/%s/branches/%s", repo, branch)))
   268              .respond(response().withStatusCode(200)
   269                       .withBody(gson.toJson(new Ref().id("123").commitId("456"))));
   270      }
   271  
   272      protected void mockDeleteObject(String repo, String branch, String path) {
   273          mockServerClient.when(request()
   274                                .withMethod("DELETE")
   275                                .withPath(String.format("/repositories/%s/branches/%s/objects", repo, branch))
   276                                .withQueryStringParameter("path", path))
   277              .respond(response().withStatusCode(204));
   278      }
   279  
   280      protected void mockDeleteObjectNotFound(String repo, String branch, String path) {
   281          mockServerClient.when(request()
   282                                .withMethod("DELETE")
   283                                .withPath(String.format("/repositories/%s/branches/%s/objects", repo, branch))
   284                                .withQueryStringParameter("path", path))
   285              .respond(response().withStatusCode(404));
   286      }
   287  
   288      // Mocks a single deleteObjects call to succeed, returning list of failures.
   289      protected void mockDeleteObjects(String repo, String branch, String path, ObjectError... errors) {
   290          PathList pathList = new PathList().addPathsItem(path);
   291          mockDeleteObjects(repo, branch, pathList, errors);
   292      }
   293  
   294      // Mocks a single deleteObjects call to succeed, returning list of failures.
   295      protected void mockDeleteObjects(String repo, String branch, PathList pathList, ObjectError... errors) {
   296          mockServerClient.when(request()
   297                                .withMethod("POST")
   298                                .withPath(String.format("/repositories/%s/branches/%s/objects/delete", repo, branch))
   299                                .withBody(gson.toJson(pathList)),
   300                                Times.once())
   301              .respond(response().withStatusCode(200)
   302                       .withBody(gson.toJson(new ObjectErrorList()
   303                                             .errors(Arrays.asList(errors)))));
   304      }
   305  
   306      protected ObjectStats mockDirectoryMarker(ObjectLocation objectLoc) {
   307          // Mock parent directory to show the directory marker exists.
   308          ObjectStats markerStats = makeObjectStats(objectLoc.getPath())
   309              .pathType(PathTypeEnum.OBJECT);
   310          mockServerClient.when(request()
   311                                .withMethod("GET")
   312                                .withPath(String.format("/repositories/%s/refs/%s/objects/stat", objectLoc.getRepository(), objectLoc.getRef()))
   313                                .withQueryStringParameter("path", objectLoc.getPath()))
   314              .respond(response().withStatusCode(200)
   315                       .withBody(gson.toJson(markerStats)));
   316          return markerStats;
   317      }
   318  
   319      // Mock this listing and return these stats.
   320      protected void mockListing(String repo, String ref, ImmutablePagination pagination, ObjectStats... stats) {
   321          mockListingWithHasMore(repo, ref, pagination, false, stats);
   322      }
   323  
   324      protected void mockListingWithHasMore(String repo, String ref, ImmutablePagination pagination, boolean hasMore, ObjectStats... stats) {
   325          HttpRequest req = request()
   326              .withMethod("GET")
   327              .withPath(String.format("/repositories/%s/refs/%s/objects/ls", repo, ref));
   328          // Validate elements of pagination only if present.
   329          if (pagination.after().isPresent()) {
   330              req = req.withQueryStringParameter("after", pagination.after().or(""));
   331          }
   332          if (pagination.amount().isPresent()) {
   333              req = req.withQueryStringParameter("amount", pagination.amount().get().toString());
   334          }
   335          if (pagination.prefix().isPresent()) {
   336              req = req.withQueryStringParameter("prefix", pagination.prefix().or(""));
   337          }
   338          ObjectStatsList resp = new ObjectStatsList()
   339              .results(Arrays.asList(stats))
   340              .pagination(new io.lakefs.clients.sdk.model.Pagination()
   341                          .hasMore(hasMore).maxPerPage(10000).results(stats.length).nextOffset("zz"));
   342          mockServerClient.when(req)
   343              .respond(response()
   344                       .withStatusCode(200)
   345                       .withBody(gson.toJson(resp)));
   346      }
   347  }