github.com/web-platform-tests/wpt.fyi@v0.0.0-20240530210107-70cf978996f1/results-processor/main.py (about)

     1  #!/usr/bin/env python3
     2  import functools
     3  import logging
     4  import os
     5  import tempfile
     6  import time
     7  from http import HTTPStatus
     8  
     9  import filelock
    10  import flask
    11  
    12  import processor
    13  
    14  
    15  # The file will be flock()'ed if a report is being processed.
    16  LOCK_FILE = '/tmp/results-processor.lock'
    17  # If the file above is locked, this timestamp file contains the UNIX timestamp
    18  # (a float in seconds) for when the current task start. A separate file is used
    19  # because the attempts to acquire a file lock invoke open() in truncate mode.
    20  TIMESTAMP_FILE = '/tmp/results-processor.last'
    21  # If the processing takes more than this timeout (in seconds), the instance is
    22  # considered unhealthy and will be restarted by AppEngine. We set it to be
    23  # smaller than the 60-minute timeout of AppEngine to give a safe margin.
    24  TIMEOUT = 3500
    25  
    26  # Hack to work around the bad logging setup of google.cloud.*:
    27  # https://github.com/googleapis/google-cloud-python/issues/6742
    28  logging.getLogger().handlers = []
    29  logging.basicConfig(level=logging.INFO)
    30  # Suppress the lock acquire/release logs from filelock.
    31  logging.getLogger('filelock').setLevel(logging.WARNING)
    32  app = flask.Flask(__name__)
    33  
    34  
    35  def _atomic_write(path, content):
    36      # Do not auto-delete the file because we will move it after closing it.
    37      temp = tempfile.NamedTemporaryFile(mode='wt', delete=False)
    38      temp.write(content)
    39      temp.close()
    40      # Atomic on POSIX: https://docs.python.org/3/library/os.html#os.replace
    41      os.replace(temp.name, path)
    42  
    43  
    44  def _serial_task(func):
    45      lock = filelock.FileLock(LOCK_FILE)
    46  
    47      # It is important to use wraps() to preserve the original name & docstring.
    48      @functools.wraps(func)
    49      def decorated_func(*args, **kwargs):
    50          try:
    51              with lock.acquire(timeout=1):
    52                  return func(*args, **kwargs)
    53          except filelock.Timeout:
    54              app.logger.info('%s unable to acquire lock.', func.__name__)
    55              return ('A result is currently being processed.',
    56                      HTTPStatus.SERVICE_UNAVAILABLE)
    57  
    58      return decorated_func
    59  
    60  
    61  def _internal_only(func):
    62      @functools.wraps(func)
    63      def decorated_func(*args, **kwargs):
    64          if (not app.debug and
    65                  # This header cannot be set by external requests.
    66                  # https://cloud.google.com/tasks/docs/creating-appengine-handlers?hl=en#reading_app_engine_task_request_headers
    67                  not flask.request.headers.get('X-AppEngine-QueueName')):
    68              return ('External requests not allowed', HTTPStatus.FORBIDDEN)
    69          return func(*args, **kwargs)
    70  
    71      return decorated_func
    72  
    73  
    74  @app.route('/_ah/liveness_check')
    75  def liveness_check():
    76      lock = filelock.FileLock(LOCK_FILE)
    77      try:
    78          lock.acquire(timeout=0.1)
    79          lock.release()
    80      except filelock.Timeout:
    81          try:
    82              with open(TIMESTAMP_FILE, 'rt') as f:
    83                  last_locked = float(f.readline().strip())
    84              assert time.time() - last_locked <= TIMEOUT
    85          # Respectively: file not found, invalid content, old timestamp.
    86          except (IOError, ValueError, AssertionError):
    87              app.logger.warning('Liveness check failed.')
    88              return ('The current task has taken too long.',
    89                      HTTPStatus.INTERNAL_SERVER_ERROR)
    90      return 'Service alive'
    91  
    92  
    93  @app.route('/_ah/readiness_check')
    94  def readiness_check():
    95      lock = filelock.FileLock(LOCK_FILE)
    96      try:
    97          lock.acquire(timeout=0.1)
    98          lock.release()
    99      except filelock.Timeout:
   100          return ('A result is currently being processed.',
   101                  HTTPStatus.SERVICE_UNAVAILABLE)
   102      return 'Service alive'
   103  
   104  
   105  # Check request origins before acquiring the lock.
   106  @app.route('/api/results/process', methods=['POST'])
   107  @_internal_only
   108  @_serial_task
   109  def task_handler():
   110      _atomic_write(TIMESTAMP_FILE, str(time.time()))
   111  
   112      task_id = flask.request.headers.get('X-AppEngine-TaskName')
   113      app.logger.info('Processing task %s', task_id)
   114      resp = processor.process_report(task_id, flask.request.form)
   115      status = HTTPStatus.CREATED if resp else HTTPStatus.NO_CONTENT
   116      if resp:
   117          app.logger.info(resp)
   118  
   119      return (resp, status)
   120  
   121  
   122  # Run the script directly locally to start Flask dev server.
   123  if __name__ == '__main__':
   124      logging.basicConfig(level=logging.DEBUG)
   125      app.run(debug=False)