github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/clients/hadoopfs/src/main/java/io/lakefs/storage/PhysicalAddressTranslator.java (about)

     1  package io.lakefs.storage;
     2  
     3  import org.apache.hadoop.fs.Path;
     4  
     5  import java.net.URI;
     6  import java.net.URISyntaxException;
     7  import java.util.regex.Pattern;
     8  
     9  public class PhysicalAddressTranslator {
    10      private String blockstoreType;
    11      private String validityRegex;
    12  
    13      public PhysicalAddressTranslator(String blockstoreType, String validityRegex) {
    14          this.blockstoreType = blockstoreType;
    15          this.validityRegex = validityRegex;
    16      }
    17  
    18      // translate a physical address in lakeFS storage namespace syntax into a Hadoop FileSystem Path
    19      public Path translate(String address) throws URISyntaxException {
    20          if(!Pattern.compile(validityRegex).matcher(address).find()) {
    21              throw new RuntimeException(String.format("address %s does not match blockstore namespace regex %s", address,
    22              validityRegex));
    23          }
    24          
    25          // Going through Path.toUri to avoid encoding bugs. See: https://github.com/treeverse/lakeFS/issues/5827
    26          URI uri = new Path(address).toUri();
    27          switch (blockstoreType) {
    28              case "s3":
    29                  return new Path(new URI("s3a", uri.getUserInfo(), uri.getHost(), uri.getPort(), uri.getPath(), uri.getQuery(), uri.getFragment()));
    30              case "azure":
    31                  // TODO(johnnyaug) - translate https:// style to abfs://
    32              default:
    33                  throw new RuntimeException(String.format("lakeFS blockstore type %s unsupported by this FileSystem", blockstoreType));
    34          }
    35      }
    36  }