github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/clients/hadoopfs/src/main/java/io/lakefs/storage/MetadataClient.java (about) 1 package io.lakefs.storage; 2 3 import com.amazonaws.services.s3.model.GetObjectMetadataRequest; 4 import com.amazonaws.services.s3.model.ObjectMetadata; 5 import org.apache.hadoop.fs.FileStatus; 6 import org.apache.hadoop.fs.FileSystem; 7 import org.apache.hadoop.fs.Path; 8 import org.slf4j.Logger; 9 import org.slf4j.LoggerFactory; 10 11 import java.io.IOException; 12 import java.lang.reflect.InvocationTargetException; 13 import java.lang.reflect.Method; 14 import java.net.URI; 15 16 /** 17 * MetadataClient used to extract ObjectMetadata with content size and etag information from the underlying filesystem. 18 */ 19 public class MetadataClient { 20 public static final Logger LOG = LoggerFactory.getLogger(MetadataClient.class); 21 private final FileSystem fs; 22 23 public MetadataClient(FileSystem fs) { 24 if (fs == null) { 25 throw new java.lang.IllegalArgumentException(); 26 } 27 this.fs = fs; 28 } 29 30 /** 31 * Get object metadata by physical address. First it will try to extract the information from the FileSystem's FileStatus. 32 * Fallback by extracting s3 client and call getObjectMetadata. 33 * @param physicalUri physical uri of object 34 * @return ObjectMetadata filled with Etag and content length 35 * @throws IOException case etag can't be extracted by s3 or file status 36 */ 37 ObjectMetadata getObjectMetadata(URI physicalUri) throws IOException { 38 String bucket = physicalUri.getHost(); 39 String key = physicalUri.getPath().substring(1); 40 41 // use underlying filesystem to get the file status and extract 42 // content length and etag (using reflection) 43 Path physicalPath = new Path(physicalUri.getPath()); 44 FileStatus fileStatus = this.fs.getFileStatus(physicalPath); 45 try { 46 Method getETagMethod = fileStatus.getClass().getMethod("getETag"); 47 String etag = (String) getETagMethod.invoke(fileStatus); 48 // return the specific properties over object metadata for easy fallback 49 ObjectMetadata objectMetadata = new ObjectMetadata(); 50 objectMetadata.setContentLength(fileStatus.getLen()); 51 objectMetadata.setHeader("ETag", etag); 52 return objectMetadata; 53 } catch (InvocationTargetException | IllegalAccessException e) { 54 LOG.debug("failed to get etag from file status", e); 55 } catch (NoSuchMethodException ignored) { 56 } 57 58 // fallback - get the underlying s3 client and request object metadata 59 try { 60 Method amazonS3ClientGetter = fs.getClass().getDeclaredMethod("getAmazonS3Client"); 61 amazonS3ClientGetter.setAccessible(true); 62 Object s3Client = amazonS3ClientGetter.invoke(fs); 63 Method getObjectMetadataMethod = s3Client.getClass().getDeclaredMethod("getObjectMetadata", GetObjectMetadataRequest.class); 64 GetObjectMetadataRequest metadataRequest = new GetObjectMetadataRequest(bucket, key); 65 return (ObjectMetadata) getObjectMetadataMethod.invoke(s3Client, metadataRequest); 66 } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { 67 LOG.debug("failed to get object metadata using underlying s3 client", e); 68 } 69 // fallback - get the underlying s3 client from the databricks wrapper and request object metadata 70 try { 71 Method fsGetter = fs.getClass().getDeclaredMethod("getWrappedFs"); 72 Object s3fs = fsGetter.invoke(fs); 73 Method amazonS3ClientGetter = s3fs.getClass().getDeclaredMethod("getAmazonS3Client"); 74 amazonS3ClientGetter.setAccessible(true); 75 Object s3Client = amazonS3ClientGetter.invoke(s3fs); 76 Method getObjectMetadataMethod = s3Client.getClass().getDeclaredMethod("getObjectMetadata", GetObjectMetadataRequest.class); 77 GetObjectMetadataRequest metadataRequest = new GetObjectMetadataRequest(bucket, key); 78 return (ObjectMetadata) getObjectMetadataMethod.invoke(s3Client, metadataRequest); 79 } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { 80 LOG.warn("failed to get object metadata using underlying wrapped s3 client", e); 81 throw new IOException("get object metadata using underlying wrapped s3 client", e); 82 } 83 } 84 }