github.com/web-platform-tests/wpt.fyi@v0.0.0-20240530210107-70cf978996f1/results-processor/wptscreenshot.py (about) 1 # Copyright 2019 The WPT Dashboard Project. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 import base64 6 import binascii 7 import gzip 8 import logging 9 import multiprocessing 10 import os 11 import time 12 from typing import List, IO, Optional, Tuple, TypeVar 13 14 import requests 15 16 import config 17 import wptreport 18 19 DATA_URI_PNG_PREFIX = 'data:image/png;base64,' 20 21 _log = logging.getLogger(__name__) 22 23 24 ############################ 25 # Start of worker functions 26 # These functions run in worker processes. DO NOT use _log. 27 28 # Global variables to be initialized in workers: 29 _api = 'API URL to be initialized' 30 _auth = ('username', 'password') 31 _run_info: wptreport.RunInfo = {} 32 33 34 def _initialize(api: str, 35 auth: Tuple[str, str], 36 run_info: wptreport.RunInfo) -> None: 37 global _api 38 global _auth 39 global _run_info 40 _api = api 41 _auth = auth 42 _run_info = run_info 43 44 45 def _upload(images: List[str]) -> None: 46 files = [] 47 for i in range(len(images)): 48 files.append(( 49 'screenshot', ('%d.png' % i, images[i], 'image/png'))) 50 51 data = {'browser': _run_info.get('product'), 52 'browser_version': _run_info.get('browser_version'), 53 'os': _run_info.get('os'), 54 'os_version': _run_info.get('os_version')} 55 r = requests.post(_api, auth=_auth, data=data, files=files) 56 if r.status_code != 201: 57 time.sleep(1) 58 requests.post(_api, auth=_auth, data=data, files=files) 59 60 61 # End of worker functions 62 ############################ 63 64 T = TypeVar('T', bound='WPTScreenshot') 65 66 67 class WPTScreenshot(object): 68 """A class to parse screenshots.db and upload screenshots. 69 70 screenshots.db is a simple line-based format with one Data URI each line. 71 """ 72 MAXIMUM_BATCH_SIZE = 100 73 74 def __init__(self, filename: str, 75 run_info: Optional[wptreport.RunInfo] = None, 76 api: Optional[str] = None, 77 auth: Optional[Tuple[str, str]] = None, 78 processes: Optional[int] = None): 79 """Creates a WPTScreenshot context manager. 80 81 Usage: 82 with WPTScreenshot(...) as s: 83 s.process() 84 85 Args: 86 filename: Filename of the screenshots database (the file can be 87 gzipped if the extension is ".gz"). 88 run_info: A finalized WPTReport.run_info dict (important fields: 89 product, browser_version, os, os_version) (optional). 90 api: The URL of the API (optional). 91 auth: A (username, password) tuple for HTTP basic auth (optional). 92 processes: The number of worker processes (defaults to cpu*2). 93 """ 94 self._filename: str = filename 95 self._run_info: wptreport.RunInfo = run_info or {} 96 self._api: str = (api or 97 config.project_baseurl() + '/api/screenshots/upload') 98 self._auth = auth 99 if processes is None: 100 processes = (os.cpu_count() or 2) * 2 101 self._processes: int = processes 102 103 self._f: Optional[IO[str]] = None 104 self._pool: Optional[multiprocessing.pool.Pool] = None 105 106 def __enter__(self: T) -> T: 107 """Starts and initializes all workers.""" 108 assert self._pool is None 109 assert self._f is None 110 self._pool = multiprocessing.Pool( 111 self._processes, _initialize, 112 (self._api, self._auth, self._run_info)) 113 if self._filename.endswith('.gz'): 114 self._f = gzip.open(self._filename, 'rt', encoding='ascii') 115 else: 116 self._f = open(self._filename, 'rt', encoding='ascii') 117 return self 118 119 def __exit__(self, *args): 120 """Waits for work to finish and frees all resources.""" 121 if self._pool is not None: 122 self._pool.close() 123 self._pool.join() 124 if self._f is not None: 125 self._f.close() 126 127 def process(self): 128 batch = [] 129 for line in self._f: 130 line = line.rstrip() 131 if not line.startswith(DATA_URI_PNG_PREFIX): 132 _log.error('Invalid data URI: %s', line) 133 continue 134 try: 135 data = base64.b64decode(line[len(DATA_URI_PNG_PREFIX):]) 136 except binascii.Error: 137 _log.error('Invalid base64: %s', line) 138 continue 139 batch.append(data) 140 if len(batch) == self.MAXIMUM_BATCH_SIZE: 141 self._pool.apply_async(_upload, [batch]) 142 batch = [] 143 if len(batch) > 0: 144 self._pool.apply_async(_upload, [batch])