github.com/web-platform-tests/wpt.fyi@v0.0.0-20240530210107-70cf978996f1/results-processor/wptscreenshot.py (about)

     1  # Copyright 2019 The WPT Dashboard Project. All rights reserved.
     2  # Use of this source code is governed by a BSD-style license that can be
     3  # found in the LICENSE file.
     4  
     5  import base64
     6  import binascii
     7  import gzip
     8  import logging
     9  import multiprocessing
    10  import os
    11  import time
    12  from typing import List, IO, Optional, Tuple, TypeVar
    13  
    14  import requests
    15  
    16  import config
    17  import wptreport
    18  
    19  DATA_URI_PNG_PREFIX = 'data:image/png;base64,'
    20  
    21  _log = logging.getLogger(__name__)
    22  
    23  
    24  ############################
    25  # Start of worker functions
    26  # These functions run in worker processes. DO NOT use _log.
    27  
    28  # Global variables to be initialized in workers:
    29  _api = 'API URL to be initialized'
    30  _auth = ('username', 'password')
    31  _run_info: wptreport.RunInfo = {}
    32  
    33  
    34  def _initialize(api: str,
    35                  auth: Tuple[str, str],
    36                  run_info: wptreport.RunInfo) -> None:
    37      global _api
    38      global _auth
    39      global _run_info
    40      _api = api
    41      _auth = auth
    42      _run_info = run_info
    43  
    44  
    45  def _upload(images: List[str]) -> None:
    46      files = []
    47      for i in range(len(images)):
    48          files.append((
    49              'screenshot', ('%d.png' % i, images[i], 'image/png')))
    50  
    51      data = {'browser': _run_info.get('product'),
    52              'browser_version': _run_info.get('browser_version'),
    53              'os': _run_info.get('os'),
    54              'os_version': _run_info.get('os_version')}
    55      r = requests.post(_api, auth=_auth, data=data, files=files)
    56      if r.status_code != 201:
    57          time.sleep(1)
    58          requests.post(_api, auth=_auth, data=data, files=files)
    59  
    60  
    61  # End of worker functions
    62  ############################
    63  
    64  T = TypeVar('T', bound='WPTScreenshot')
    65  
    66  
    67  class WPTScreenshot(object):
    68      """A class to parse screenshots.db and upload screenshots.
    69  
    70      screenshots.db is a simple line-based format with one Data URI each line.
    71      """
    72      MAXIMUM_BATCH_SIZE = 100
    73  
    74      def __init__(self, filename: str,
    75                   run_info: Optional[wptreport.RunInfo] = None,
    76                   api: Optional[str] = None,
    77                   auth: Optional[Tuple[str, str]] = None,
    78                   processes: Optional[int] = None):
    79          """Creates a WPTScreenshot context manager.
    80  
    81          Usage:
    82              with WPTScreenshot(...) as s:
    83                  s.process()
    84  
    85          Args:
    86              filename: Filename of the screenshots database (the file can be
    87                  gzipped if the extension is ".gz").
    88              run_info: A finalized WPTReport.run_info dict (important fields:
    89                  product, browser_version, os, os_version) (optional).
    90              api: The URL of the API (optional).
    91              auth: A (username, password) tuple for HTTP basic auth (optional).
    92              processes: The number of worker processes (defaults to cpu*2).
    93          """
    94          self._filename: str = filename
    95          self._run_info: wptreport.RunInfo = run_info or {}
    96          self._api: str = (api or
    97                            config.project_baseurl() + '/api/screenshots/upload')
    98          self._auth = auth
    99          if processes is None:
   100              processes = (os.cpu_count() or 2) * 2
   101          self._processes: int = processes
   102  
   103          self._f: Optional[IO[str]] = None
   104          self._pool: Optional[multiprocessing.pool.Pool] = None
   105  
   106      def __enter__(self: T) -> T:
   107          """Starts and initializes all workers."""
   108          assert self._pool is None
   109          assert self._f is None
   110          self._pool = multiprocessing.Pool(
   111              self._processes, _initialize,
   112              (self._api, self._auth, self._run_info))
   113          if self._filename.endswith('.gz'):
   114              self._f = gzip.open(self._filename, 'rt', encoding='ascii')
   115          else:
   116              self._f = open(self._filename, 'rt', encoding='ascii')
   117          return self
   118  
   119      def __exit__(self, *args):
   120          """Waits for work to finish and frees all resources."""
   121          if self._pool is not None:
   122              self._pool.close()
   123              self._pool.join()
   124          if self._f is not None:
   125              self._f.close()
   126  
   127      def process(self):
   128          batch = []
   129          for line in self._f:
   130              line = line.rstrip()
   131              if not line.startswith(DATA_URI_PNG_PREFIX):
   132                  _log.error('Invalid data URI: %s', line)
   133                  continue
   134              try:
   135                  data = base64.b64decode(line[len(DATA_URI_PNG_PREFIX):])
   136              except binascii.Error:
   137                  _log.error('Invalid base64: %s', line)
   138                  continue
   139              batch.append(data)
   140              if len(batch) == self.MAXIMUM_BATCH_SIZE:
   141                  self._pool.apply_async(_upload, [batch])
   142                  batch = []
   143          if len(batch) > 0:
   144              self._pool.apply_async(_upload, [batch])