github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/webui/src/pages/repositories/repository/fileRenderers/duckdb.tsx (about)

     1  import * as duckdb from '@duckdb/duckdb-wasm';
     2  import * as arrow from 'apache-arrow';
     3  import {AsyncDuckDB, AsyncDuckDBConnection, DuckDBDataProtocol} from '@duckdb/duckdb-wasm';
     4  import duckdb_wasm from '@duckdb/duckdb-wasm/dist/duckdb-mvp.wasm?url';
     5  import mvp_worker from '@duckdb/duckdb-wasm/dist/duckdb-browser-mvp.worker.js?url';
     6  import duckdb_wasm_eh from '@duckdb/duckdb-wasm/dist/duckdb-eh.wasm?url';
     7  import eh_worker from '@duckdb/duckdb-wasm/dist/duckdb-browser-eh.worker.js?url';
     8  
     9  
    10  
    11  const MANUAL_BUNDLES: duckdb.DuckDBBundles = {
    12      mvp: {
    13          mainModule: duckdb_wasm,
    14          mainWorker: mvp_worker,
    15      },
    16      eh: {
    17          mainModule: duckdb_wasm_eh,
    18          mainWorker: eh_worker,
    19      },
    20  };
    21  
    22  let _db: AsyncDuckDB | null = null;
    23  
    24  async function getDuckDB(): Promise<duckdb.AsyncDuckDB> {
    25      if (_db !== null) {
    26          return _db
    27      }
    28      const bundle = await duckdb.selectBundle(MANUAL_BUNDLES)
    29      if (!bundle.mainWorker) {
    30          throw Error("could not initialize DuckDB")
    31      }
    32      const worker = new Worker(bundle.mainWorker)
    33      const logger = new duckdb.VoidLogger()
    34      const db = new duckdb.AsyncDuckDB(logger, worker)
    35      await db.instantiate(bundle.mainModule, bundle.pthreadWorker)
    36      const conn = await db.connect()
    37      await conn.close()
    38      _db = db
    39      return _db
    40  }
    41  
    42  
    43  // taken from @duckdb/duckdb-wasm/dist/types/src/bindings/tokens.d.ts
    44  // which, unfortunately, we cannot import.
    45  const DUCKDB_STRING_CONSTANT = 2;
    46  const LAKEFS_URI_PATTERN = /^(['"]?)(lakefs:\/\/(.*))(['"])\s*$/;
    47  
    48  // returns a mapping of `lakefs://..` URIs to their `s3://...` equivalent
    49  async function extractFiles(conn: AsyncDuckDBConnection, sql: string): Promise<{ [name: string]: string }> {
    50      const tokenized = await conn.bindings.tokenize(sql)
    51      const r = Math.random(); // random number to make sure the S3 gateway picks up the request
    52      let prev = 0;
    53      const fileMap: { [name: string]: string } = {};
    54      tokenized.offsets.forEach((offset, i) => {
    55          let currentToken = sql.length;
    56          if (i < tokenized.offsets.length - 1) {
    57              currentToken = tokenized.offsets[i+1];
    58          }
    59          const part = sql.substring(prev, currentToken);
    60          prev = currentToken;
    61          if (tokenized.types[i] === DUCKDB_STRING_CONSTANT) {
    62              const matches = part.match(LAKEFS_URI_PATTERN)
    63              if (matches !== null) {
    64                  fileMap[matches[2]] = `s3://${matches[3]}?r=${r}`;
    65              }
    66          }
    67      })
    68      return fileMap
    69  }
    70  
    71  /* eslint-disable  @typescript-eslint/no-explicit-any */
    72  export async function runDuckDBQuery(sql: string):  Promise<arrow.Table<any>> {
    73      const db = await getDuckDB()
    74      /* eslint-disable  @typescript-eslint/no-explicit-any */
    75      let result: arrow.Table<any>
    76      const conn  = await db.connect()
    77      try {
    78          // TODO (ozk): read this from the server's configuration?
    79          await conn.query(`SET s3_region='us-east-1';`)
    80          // set the example values (used to make sure the S3 gateway picks up the request)
    81          // real authentication is done using the existing swagger cookie or token
    82          await conn.query(`SET s3_access_key_id='use_swagger_credentials';`)
    83          await conn.query(`SET s3_secret_access_key='these_are_meaningless_but_must_be_set';`)
    84          await conn.query(`SET s3_endpoint='${document.location.protocol}//${document.location.host}'`)
    85  
    86          // register lakefs uri-ed files as s3 files
    87          const fileMap = await extractFiles(conn, sql)
    88          const fileNames = Object.getOwnPropertyNames(fileMap)
    89          await Promise.all(fileNames.map(
    90              fileName => db.registerFileURL(fileName, fileMap[fileName], DuckDBDataProtocol.S3, true)
    91          ))
    92          // execute the query
    93          result = await conn.query(sql)
    94  
    95          // remove registrations
    96          await Promise.all(fileNames.map(fileName => db.dropFile(fileName)))
    97      } finally {
    98          await conn.close()
    99      }
   100      return result
   101  }