github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/clients/hadoopfs/src/main/java/io/lakefs/storage/HttpRangeInputStream.java (about)

     1  package io.lakefs.storage;
     2  
     3  import java.io.EOFException;
     4  import java.io.IOException;
     5  import java.io.InputStream;
     6  import java.net.HttpURLConnection;
     7  import java.net.MalformedURLException;
     8  import java.net.URL;
     9  
    10  import org.apache.commons.io.IOUtils;
    11  import org.apache.hadoop.fs.FSExceptionMessages;
    12  import org.apache.hadoop.fs.FSInputStream;
    13  
    14  public class HttpRangeInputStream extends FSInputStream {
    15      private static final int DEFAULT_BUFFER_SIZE_BYTES = 1024 * 1024;
    16      private final String url;
    17      private final int bufferSize;
    18  
    19      private long start = Long.MAX_VALUE;
    20      private long pos;
    21      private long len = 0;
    22      private byte[] rangeContent;
    23  
    24      private boolean closed;
    25      
    26      public HttpRangeInputStream(String url) throws IOException {
    27          this(url, DEFAULT_BUFFER_SIZE_BYTES);
    28      }
    29  
    30      public HttpRangeInputStream(String url, int bufferSize) throws IOException {
    31          this.url = url;
    32          this.bufferSize = bufferSize;
    33          HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection();
    34          connection.setRequestMethod("GET");
    35          connection.setRequestProperty("Range", "bytes=0-0");
    36          String contentRangeHeader = connection.getHeaderField("Content-Range");
    37          if (contentRangeHeader == null || !contentRangeHeader.startsWith("bytes 0-0/")) {
    38              // empty file
    39              return;
    40          }
    41          len = Long.parseLong(contentRangeHeader.substring("bytes 0-0/".length()));
    42      }
    43  
    44      private void updateInputStream(long targetPos) throws MalformedURLException, IOException {
    45          if (targetPos >= start && targetPos < start + bufferSize) {
    46              // no need to update the stream
    47              return;
    48          }
    49          HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection();
    50          connection.setRequestMethod("GET");
    51          long rangeEnd = Math.min(targetPos + bufferSize, len);
    52          connection.setRequestProperty("Range", "bytes=" + targetPos + "-" + rangeEnd);
    53          rangeContent = new byte[(int) (rangeEnd - targetPos)];
    54          try (InputStream inputStream = connection.getInputStream()) {
    55              IOUtils.readFully(inputStream, rangeContent);
    56          }
    57          start = targetPos;
    58      }
    59  
    60      @Override
    61      public synchronized void seek(long targetPos) throws IOException {
    62          if (closed) {
    63              throw new IOException("Stream closed");
    64          }
    65          if (targetPos < 0) {
    66              throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK
    67                      + " " + targetPos);
    68          }
    69          this.pos = targetPos;
    70      }
    71  
    72      @Override
    73      public synchronized long getPos() throws IOException {
    74          return pos;
    75      }
    76  
    77      public synchronized int available() throws IOException {
    78          if (closed) {
    79              throw new IOException("Stream closed");
    80          }
    81          if (len - pos > Integer.MAX_VALUE) {
    82              return Integer.MAX_VALUE;
    83          }
    84          return (int) Math.max(len - pos, 0);
    85      }
    86  
    87      @Override
    88      public synchronized boolean seekToNewSource(long targetPos) throws IOException {
    89          return false;
    90      }
    91  
    92      @Override
    93      public synchronized int read() throws IOException {
    94          if (closed) {
    95              throw new IOException("Stream closed");
    96          }
    97          if (pos >= len) {
    98              return -1;
    99          }
   100          updateInputStream(pos);
   101          int res = rangeContent[(int) (pos - start)] & 0xff;
   102          pos++;
   103          return res;
   104      }
   105  
   106      @Override
   107      public synchronized void close() throws IOException {
   108          if (closed) {
   109              return;
   110          }
   111          closed = true;
   112      }
   113  }