github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/clients/hadoopfs/src/main/java/io/lakefs/storage/MetadataClient.java (about)

     1  package io.lakefs.storage;
     2  
     3  import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
     4  import com.amazonaws.services.s3.model.ObjectMetadata;
     5  import org.apache.hadoop.fs.FileStatus;
     6  import org.apache.hadoop.fs.FileSystem;
     7  import org.apache.hadoop.fs.Path;
     8  import org.slf4j.Logger;
     9  import org.slf4j.LoggerFactory;
    10  
    11  import java.io.IOException;
    12  import java.lang.reflect.InvocationTargetException;
    13  import java.lang.reflect.Method;
    14  import java.net.URI;
    15  
    16  /**
    17   * MetadataClient used to extract ObjectMetadata with content size and etag information from the underlying filesystem.
    18   */
    19  public class MetadataClient {
    20      public static final Logger LOG = LoggerFactory.getLogger(MetadataClient.class);
    21      private final FileSystem fs;
    22  
    23      public MetadataClient(FileSystem fs) {
    24          if (fs == null) {
    25              throw new java.lang.IllegalArgumentException();
    26          }
    27          this.fs = fs;
    28      }
    29  
    30      /**
    31       * Get object metadata by physical address. First it will try to extract the information from the FileSystem's FileStatus.
    32       * Fallback by extracting s3 client and call getObjectMetadata.
    33       * @param physicalUri physical uri of object
    34       * @return ObjectMetadata filled with Etag and content length
    35       * @throws IOException case etag can't be extracted by s3 or file status
    36       */
    37      ObjectMetadata getObjectMetadata(URI physicalUri) throws IOException {
    38          String bucket = physicalUri.getHost();
    39          String key = physicalUri.getPath().substring(1);
    40  
    41          // use underlying filesystem to get the file status and extract
    42          // content length and etag (using reflection)
    43          Path physicalPath = new Path(physicalUri.getPath());
    44          FileStatus fileStatus = this.fs.getFileStatus(physicalPath);
    45          try {
    46              Method getETagMethod = fileStatus.getClass().getMethod("getETag");
    47              String etag = (String) getETagMethod.invoke(fileStatus);
    48              // return the specific properties over object metadata for easy fallback
    49              ObjectMetadata objectMetadata = new ObjectMetadata();
    50              objectMetadata.setContentLength(fileStatus.getLen());
    51              objectMetadata.setHeader("ETag", etag);
    52              return objectMetadata;
    53          } catch (InvocationTargetException | IllegalAccessException e) {
    54              LOG.debug("failed to get etag from file status", e);
    55          } catch (NoSuchMethodException ignored) {
    56          }
    57  
    58          // fallback - get the underlying s3 client and request object metadata
    59          try {
    60              Method amazonS3ClientGetter = fs.getClass().getDeclaredMethod("getAmazonS3Client");
    61              amazonS3ClientGetter.setAccessible(true);
    62              Object s3Client = amazonS3ClientGetter.invoke(fs);
    63              Method getObjectMetadataMethod = s3Client.getClass().getDeclaredMethod("getObjectMetadata", GetObjectMetadataRequest.class);
    64              GetObjectMetadataRequest metadataRequest = new GetObjectMetadataRequest(bucket, key);
    65              return (ObjectMetadata) getObjectMetadataMethod.invoke(s3Client, metadataRequest);
    66          } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
    67              LOG.debug("failed to get object metadata using underlying s3 client", e);
    68          }
    69          // fallback - get the underlying s3 client from the databricks wrapper and request object metadata
    70          try {
    71              Method fsGetter = fs.getClass().getDeclaredMethod("getWrappedFs");
    72              Object s3fs = fsGetter.invoke(fs);
    73              Method amazonS3ClientGetter = s3fs.getClass().getDeclaredMethod("getAmazonS3Client");
    74              amazonS3ClientGetter.setAccessible(true);
    75              Object s3Client = amazonS3ClientGetter.invoke(s3fs);
    76              Method getObjectMetadataMethod = s3Client.getClass().getDeclaredMethod("getObjectMetadata", GetObjectMetadataRequest.class);
    77              GetObjectMetadataRequest metadataRequest = new GetObjectMetadataRequest(bucket, key);
    78              return (ObjectMetadata) getObjectMetadataMethod.invoke(s3Client, metadataRequest);
    79          } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
    80              LOG.warn("failed to get object metadata using underlying wrapped s3 client", e);
    81              throw new IOException("get object metadata using underlying wrapped s3 client", e);
    82          }
    83      }
    84  }