github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/clients/hadoopfs/src/main/java/io/lakefs/BulkDeleter.java (about)

     1  package io.lakefs;
     2  
     3  import java.io.Closeable;
     4  import java.io.IOException;
     5  import java.util.ArrayDeque;
     6  import java.util.Queue;
     7  import java.util.concurrent.Callable;
     8  import java.util.concurrent.ExecutionException;
     9  import java.util.concurrent.ExecutorService;
    10  import java.util.concurrent.Future;
    11  
    12  import io.lakefs.clients.sdk.ApiException;
    13  import io.lakefs.clients.sdk.model.ObjectErrorList;
    14  import io.lakefs.clients.sdk.model.PathList;
    15  
    16  class BulkDeleter implements Closeable {
    17      private static final int defaultBulkSize = 1000;
    18  
    19      private final ExecutorService executor;
    20      private final Callback callback;
    21      private final String repository;
    22      private final String branch;
    23      private final int bulkSize;
    24  
    25      private PathList pathList;
    26      // TODO(ariels): Configure this!
    27      private final int concurrency = 1;
    28      private Queue<Future<ObjectErrorList>> deletions = new ArrayDeque<>();
    29  
    30      public static interface Callback {
    31          ObjectErrorList apply(String repository, String branch, PathList pathList) throws ApiException;
    32      }
    33  
    34      public static class DeleteFailuresException extends IOException {
    35          public DeleteFailuresException(ObjectErrorList errorList) {
    36              super("failed to delete: " + errorList.toString());
    37          }
    38      }
    39  
    40      /**
    41       * Construct a BulkDeleter to bulk-delete objects on branch in repository,
    42       * using callback on executor.
    43       */
    44      BulkDeleter(ExecutorService executor, Callback callback, String repository, String branch, int bulkSize) {
    45          this.executor = executor;
    46          this.callback = callback;
    47          this.repository = repository;
    48          this.branch = branch;
    49          if (bulkSize <= 0) {
    50              bulkSize = defaultBulkSize;
    51          }
    52          this.bulkSize = bulkSize;
    53      }
    54  
    55      BulkDeleter(ExecutorService executor, Callback callback, String repository, String branch) {
    56          this(executor, callback, repository, branch, defaultBulkSize);
    57      }
    58  
    59      /**
    60       * Add another key to be deleted.  If a bulk is ready, delete it.  Any
    61       * errors thrown may be related to previously-added keys.
    62       */
    63      public synchronized void add(String key) throws IOException, DeleteFailuresException {
    64          if (pathList == null) {
    65              pathList = new PathList();
    66          }
    67          pathList.addPathsItem(key);
    68          if (pathList.getPaths().size() >= bulkSize) {
    69              startDeletingUnlocked();
    70          }
    71      }
    72  
    73      /**
    74       * Close this BulkDeleter, possibly performing one last deletion.
    75       *
    76       * @throws DeleteFailuresException if last deletion did not (entirely) succeed.
    77       */
    78      @Override
    79      public synchronized void close() throws IOException, DeleteFailuresException {
    80          if (pathList != null && !pathList.getPaths().isEmpty()) {
    81              startDeletingUnlocked();
    82          }
    83          drainDeletionsUnlocked();
    84      }
    85  
    86      /**
    87       * Start deleting everything in pathList and empty it.  Must call locked.
    88       */
    89      private void startDeletingUnlocked() throws IOException, DeleteFailuresException {
    90          maybeWaitForDeletionUnlocked();
    91          PathList toDelete = pathList;
    92          pathList = null;
    93          deletions.add(executor.submit(new Callable() {
    94                  @Override
    95                  public ObjectErrorList call() throws ApiException, InterruptedException, DeleteFailuresException {
    96                      ObjectErrorList ret = callback.apply(repository, branch, toDelete);
    97                      return ret;
    98                  }
    99              }));
   100      }
   101  
   102      /**
   103       * Wait for deletion callbacks to end until deletions has space.  Must
   104       * call locked.
   105       *
   106       * @throws DeleteFailuresException if deletion did not (entirely) succeed.
   107       */
   108      private void maybeWaitForDeletionUnlocked() throws DeleteFailuresException, IOException {
   109          while (deletions.size() >= concurrency) {
   110              waitForOneDeletionUnlocked();
   111          }
   112      }
   113  
   114      /**
   115       * Wait for deletion callbacks to end until deletions has space.  Must
   116       * call locked.
   117       *
   118       * @throws DeleteFailuresException if deletion did not (entirely) succeed.
   119       */
   120      private void drainDeletionsUnlocked() throws DeleteFailuresException, IOException {
   121          while (!deletions.isEmpty()) {
   122              waitForOneDeletionUnlocked();
   123          }
   124      }
   125  
   126      private void waitForOneDeletionUnlocked() throws DeleteFailuresException, IOException {
   127          try {
   128              Future<ObjectErrorList> deletion = deletions.poll();
   129              if (deletion == null) return;
   130  
   131              ObjectErrorList errors = deletion.get();
   132              if (errors != null && errors.getErrors() != null && !errors.getErrors().isEmpty()) {
   133                  throw new DeleteFailuresException(errors);
   134              }
   135          } catch (ExecutionException e) {
   136              // Unwrap and re-throw e (usually)
   137              Throwable cause = e.getCause();
   138              if (cause instanceof IOException) {
   139                  throw (IOException)cause;
   140              } else if (cause instanceof Error) {
   141                  // Don't wrap serious errors.
   142                  throw (Error)cause;
   143              } else {
   144                  throw new IOException("failed to wait for bulk delete", cause);
   145              }
   146          } catch (InterruptedException ie) {
   147              throw new IOException("wait for deletion", ie);
   148          }
   149      }
   150  }