github.com/letsencrypt/boulder@v0.20251208.0/test/startservers.py (about)

     1  import atexit
     2  import collections
     3  import os
     4  import signal
     5  import socket
     6  import subprocess
     7  
     8  from helpers import config_dir, waithealth, waitport
     9  
    10  Service = collections.namedtuple('Service', ('name', 'debug_port', 'grpc_port', 'host_override', 'cmd', 'deps'))
    11  
    12  # Keep these ports in sync with consul/config.hcl
    13  SERVICES = (
    14      Service('remoteva-a',
    15          8011, 9397, 'rva.boulder',
    16          ('./bin/boulder', 'remoteva', '--config', os.path.join(config_dir, 'remoteva-a.json'), '--addr', ':9397', '--debug-addr', ':8011'),
    17          None),
    18      Service('remoteva-b',
    19          8012, 9498, 'rva.boulder',
    20          ('./bin/boulder', 'remoteva', '--config', os.path.join(config_dir, 'remoteva-b.json'), '--addr', ':9498', '--debug-addr', ':8012'),
    21          None),
    22      Service('remoteva-c',
    23          8023, 9499, 'rva.boulder',
    24          ('./bin/boulder', 'remoteva', '--config', os.path.join(config_dir, 'remoteva-c.json'), '--addr', ':9499', '--debug-addr', ':8023'),
    25          None),
    26      Service('boulder-sa-1',
    27          8003, 9395, 'sa.boulder',
    28          ('./bin/boulder', 'boulder-sa', '--config', os.path.join(config_dir, 'sa.json'), '--addr', ':9395', '--debug-addr', ':8003'),
    29          None),
    30      Service('boulder-sa-2',
    31          8103, 9495, 'sa.boulder',
    32          ('./bin/boulder', 'boulder-sa', '--config', os.path.join(config_dir, 'sa.json'), '--addr', ':9495', '--debug-addr', ':8103'),
    33          None),
    34      Service('aia-test-srv',
    35          4502, None, None,
    36          ('./bin/aia-test-srv', '--addr', ':4502', '--hierarchy', 'test/certs/webpki/'), None),
    37      Service('ct-test-srv',
    38          4600, None, None,
    39          ('./bin/ct-test-srv', '--config', 'test/ct-test-srv/ct-test-srv.json'), None),
    40      Service('boulder-publisher-1',
    41          8009, 9391, 'publisher.boulder',
    42          ('./bin/boulder', 'boulder-publisher', '--config', os.path.join(config_dir, 'publisher.json'), '--addr', ':9391', '--debug-addr', ':8009'),
    43          None),
    44      Service('boulder-publisher-2',
    45          8109, 9491, 'publisher.boulder',
    46          ('./bin/boulder', 'boulder-publisher', '--config', os.path.join(config_dir, 'publisher.json'), '--addr', ':9491', '--debug-addr', ':8109'),
    47          None),
    48      Service('boulder-va-1',
    49          8004, 9392, 'va.boulder',
    50          ('./bin/boulder', 'boulder-va', '--config', os.path.join(config_dir, 'va.json'), '--addr', ':9392', '--debug-addr', ':8004'),
    51          ('remoteva-a', 'remoteva-b')),
    52      Service('boulder-va-2',
    53          8104, 9492, 'va.boulder',
    54          ('./bin/boulder', 'boulder-va', '--config', os.path.join(config_dir, 'va.json'), '--addr', ':9492', '--debug-addr', ':8104'),
    55          ('remoteva-a', 'remoteva-b')),
    56      Service('boulder-ca-1',
    57          8001, 9393, 'ca.boulder',
    58          ('./bin/boulder', 'boulder-ca', '--config', os.path.join(config_dir, 'ca.json'), '--addr', ':9393', '--debug-addr', ':8001'),
    59          ('boulder-sa-1', 'boulder-sa-2', 'boulder-ra-sct-provider-1', 'boulder-ra-sct-provider-2')),
    60      Service('boulder-ca-2',
    61          8101, 9493, 'ca.boulder',
    62          ('./bin/boulder', 'boulder-ca', '--config', os.path.join(config_dir, 'ca.json'), '--addr', ':9493', '--debug-addr', ':8101'),
    63          ('boulder-sa-1', 'boulder-sa-2', 'boulder-ra-sct-provider-1', 'boulder-ra-sct-provider-2')),
    64      Service('s3-test-srv',
    65          4501, None, None,
    66          ('./bin/s3-test-srv', '--listen', ':4501'),
    67          None),
    68      Service('crl-storer',
    69          9667, None, None,
    70          ('./bin/boulder', 'crl-storer', '--config', os.path.join(config_dir, 'crl-storer.json'), '--addr', ':9309', '--debug-addr', ':9667'),
    71          ('s3-test-srv',)),
    72      Service('boulder-ra-1',
    73          8002, 9394, 'ra.boulder',
    74          ('./bin/boulder', 'boulder-ra', '--config', os.path.join(config_dir, 'ra.json'), '--addr', ':9394', '--debug-addr', ':8002'),
    75          ('boulder-sa-1', 'boulder-sa-2', 'boulder-ca-1', 'boulder-ca-2', 'boulder-va-1', 'boulder-va-2', 'boulder-publisher-1', 'boulder-publisher-2')),
    76      Service('boulder-ra-2',
    77          8102, 9494, 'ra.boulder',
    78          ('./bin/boulder', 'boulder-ra', '--config', os.path.join(config_dir, 'ra.json'), '--addr', ':9494', '--debug-addr', ':8102'),
    79          ('boulder-sa-1', 'boulder-sa-2', 'boulder-ca-1', 'boulder-ca-2', 'boulder-va-1', 'boulder-va-2', 'boulder-publisher-1', 'boulder-publisher-2')),
    80      # We run a separate instance of the RA for use as the SCTProvider service called by the CA.
    81      # This solves a small problem of startup order: if a client (the CA in this case) starts
    82      # up before its backends, gRPC will try to connect immediately (due to health checks),
    83      # get a connection refused, and enter a backoff state. That backoff state can cause
    84      # subsequent requests to fail. This issue only exists for the CA-RA pair because they
    85      # have a circular relationship - the RA calls CA.IssueCertificate, and the CA calls
    86      # SCTProvider.GetSCTs (offered by the RA).
    87      Service('boulder-ra-sct-provider-1',
    88          8118, 9594, 'ra.boulder',
    89          ('./bin/boulder', 'boulder-ra', '--config', os.path.join(config_dir, 'ra.json'), '--addr', ':9594', '--debug-addr', ':8118'),
    90          ('boulder-sa-1', 'boulder-sa-2', 'boulder-publisher-1', 'boulder-publisher-2')),
    91      Service('boulder-ra-sct-provider-2',
    92          8119, 9694, 'ra.boulder',
    93          ('./bin/boulder', 'boulder-ra', '--config', os.path.join(config_dir, 'ra.json'), '--addr', ':9694', '--debug-addr', ':8119'),
    94          ('boulder-sa-1', 'boulder-sa-2', 'boulder-publisher-1', 'boulder-publisher-2')),
    95      Service('bad-key-revoker',
    96          8020, None, None,
    97          ('./bin/boulder', 'bad-key-revoker', '--config', os.path.join(config_dir, 'bad-key-revoker.json'), '--debug-addr', ':8020'),
    98          ('boulder-ra-1', 'boulder-ra-2')),
    99      # Note: the nonce-service instances bind to specific interfaces, not all
   100      # interfaces, because they use their explicit host:port pair to calculate
   101      # the nonce prefix, which is used by WFEs when deciding where to redeem
   102      # nonces. The `taro` and `zinc` instances simulate nonce services in two
   103      # different datacenters. The WFE is configured to get nonces from one of
   104      # these services, and potentially redeeem from either service (though in
   105      # practice it will only redeem from the one that is configured for getting
   106      # nonces).
   107      Service('nonce-service-taro-1',
   108          8111, '10.77.77.77:9301', 'nonce.boulder',
   109          ('./bin/boulder', 'nonce-service', '--config', os.path.join(config_dir, 'nonce-a.json'), '--addr', '10.77.77.77:9301', '--debug-addr', ':8111',),
   110          None),
   111      Service('nonce-service-taro-2',
   112          8113, '10.77.77.77:9501', 'nonce.boulder',
   113          ('./bin/boulder', 'nonce-service', '--config', os.path.join(config_dir, 'nonce-a.json'), '--addr', '10.77.77.77:9501', '--debug-addr', ':8113',),
   114          None),
   115      Service('nonce-service-zinc-1',
   116          8112, '10.77.77.77:9401', 'nonce.boulder',
   117          ('./bin/boulder', 'nonce-service', '--config', os.path.join(config_dir, 'nonce-b.json'), '--addr', '10.77.77.77:9401', '--debug-addr', ':8112',),
   118          None),
   119      Service('pardot-test-srv',
   120          # Uses port 9601 to mock Salesforce OAuth2 token API and 9602 to mock
   121          # the Pardot API.
   122          9601, None, None,
   123          ('./bin/pardot-test-srv', '--config', os.path.join(config_dir, 'pardot-test-srv.json'),),
   124          None),
   125      Service('email-exporter',
   126          8114, None, None,
   127          ('./bin/boulder', 'email-exporter', '--config', os.path.join(config_dir, 'email-exporter.json'), '--addr', ':9603', '--debug-addr', ':8114'),
   128          ('pardot-test-srv',)),
   129      Service('boulder-wfe2',
   130          4001, None, None,
   131          ('./bin/boulder', 'boulder-wfe2', '--config', os.path.join(config_dir, 'wfe2.json'), '--addr', ':4001', '--tls-addr', ':4431', '--debug-addr', ':8013'),
   132          ('boulder-ra-1', 'boulder-ra-2', 'boulder-sa-1', 'boulder-sa-2', 'nonce-service-taro-1', 'nonce-service-taro-2', 'nonce-service-zinc-1', 'email-exporter')),
   133      Service('zendesk-test-srv',
   134          9701, None, None,
   135          ('./bin/zendesk-test-srv', '--config', os.path.join(config_dir, 'zendesk-test-srv.json'),),
   136          None),
   137      Service('sfe',
   138          # Uses port 4003 for HTTP.
   139          4003, None, None,
   140          ('./bin/boulder', 'sfe', '--config', os.path.join(config_dir, 'sfe.json'), '--debug-addr', ':8015'),
   141          ('boulder-ra-1', 'boulder-ra-2', 'boulder-sa-1', 'boulder-sa-2', 'zendesk-test-srv')),
   142      Service('log-validator',
   143          8016, None, None,
   144          ('./bin/boulder', 'log-validator', '--config', os.path.join(config_dir, 'log-validator.json'), '--debug-addr', ':8016'),
   145          None),
   146  )
   147  
   148  def _service_toposort(services):
   149      """Yields Service objects in topologically sorted order.
   150  
   151      No service will be yielded until every service listed in its deps value
   152      has been yielded.
   153      """
   154      ready = set([s for s in services if not s.deps])
   155      blocked = set(services) - ready
   156      done = set()
   157      while ready:
   158          service = ready.pop()
   159          yield service
   160          done.add(service.name)
   161          new = set([s for s in blocked if all([d in done for d in s.deps])])
   162          ready |= new
   163          blocked -= new
   164      if blocked:
   165          print("WARNING: services with unsatisfied dependencies:")
   166          for s in blocked:
   167              print(s.name, ":", s.deps)
   168          raise(Exception("Unable to satisfy service dependencies"))
   169  
   170  processes = []
   171  
   172  # NOTE(@cpu): We manage the challSrvProcess separately from the other global
   173  # processes because we want integration tests to be able to stop/start it (e.g.
   174  # to run the load-generator).
   175  challSrvProcess = None
   176  
   177  def install(race_detection, coverage=False):
   178      # Pass empty BUILD_TIME and BUILD_ID flags to avoid constantly invalidating the
   179      # build cache with new BUILD_TIMEs, or invalidating it on merges with a new
   180      # BUILD_ID.
   181      go_build_flags=''
   182      if race_detection:
   183          go_build_flags += ' -race'
   184  
   185      if coverage:
   186          go_build_flags += ' -cover' # https://go.dev/blog/integration-test-coverage
   187  
   188      return subprocess.call(["/usr/bin/make", "GO_BUILD_FLAGS=%s" % go_build_flags]) == 0
   189  
   190  def run(cmd, coverage_dir=None):
   191      e = os.environ.copy()
   192      e.setdefault("GORACE", "halt_on_error=1")
   193      if coverage_dir:
   194          abs_coverage_dir = os.path.abspath(coverage_dir)
   195          e.setdefault("GOCOVERDIR", abs_coverage_dir)
   196          e.setdefault("GOCOVERMODE", "atomic")
   197      p = subprocess.Popen(cmd, env=e)
   198      p.cmd = cmd
   199      return p
   200  
   201  def start(coverage_dir=None):
   202      """Return True if everything builds and starts.
   203  
   204      Give up and return False if anything fails to build, or dies at
   205      startup. Anything that did start before this point can be cleaned
   206      up explicitly by calling stop(), or automatically atexit.
   207      """
   208      signal.signal(signal.SIGTERM, lambda _, __: stop())
   209      signal.signal(signal.SIGINT, lambda _, __: stop())
   210  
   211      # Check that we can resolve the service names before we try to start any
   212      # services. This prevents a confusing error (timed out health check).
   213      try:
   214          socket.getaddrinfo('publisher.service.consul', None)
   215      except Exception as e:
   216          print("Error querying DNS. Is consul running? `docker compose ps bconsul`. %s" % (e))
   217          return False
   218  
   219      # Start the chall-test-srv first so it can be used to resolve DNS for
   220      # gRPC.
   221      startChallSrv()
   222  
   223      # Processes are in order of dependency: Each process should be started
   224      # before any services that intend to send it RPCs. On shutdown they will be
   225      # killed in reverse order.
   226      for service in _service_toposort(SERVICES):
   227          print("Starting service", service.name)
   228          try:
   229              global processes
   230              p = run(service.cmd, coverage_dir)
   231              processes.append(p)
   232              if service.grpc_port is not None:
   233                  waithealth(' '.join(p.args), service.grpc_port, service.host_override)
   234              else:
   235                  if not waitport(service.debug_port, ' '.join(p.args), perTickCheck=check):
   236                      return False
   237          except Exception as e:
   238              print("Error starting service %s: %s" % (service.name, e))
   239              return False
   240  
   241      print("All servers running. Hit ^C to kill.")
   242      return True
   243  
   244  def check():
   245      """Return true if all started processes are still alive.
   246  
   247      Log about anything that died. The chall-test-srv is not considered when
   248      checking processes.
   249      """
   250      global processes
   251      busted = []
   252      stillok = []
   253      for p in processes:
   254          if p.poll() is None:
   255              stillok.append(p)
   256          else:
   257              busted.append(p)
   258      if busted:
   259          print("\n\nThese processes exited early (check above for their output):")
   260          for p in busted:
   261              print("\t'%s' with pid %d exited %d" % (p.cmd, p.pid, p.returncode))
   262      processes = stillok
   263      return not busted
   264  
   265  def startChallSrv():
   266      """
   267      Start the chall-test-srv and wait for it to become available. See also
   268      stopChallSrv.
   269      """
   270      global challSrvProcess
   271      if challSrvProcess is not None:
   272          raise(Exception("startChallSrv called more than once"))
   273  
   274      # NOTE(@cpu): We specify explicit bind addresses for -https01 and
   275      # --tlsalpn01 here to allow HTTPS HTTP-01 responses on 443 for on interface
   276      # and TLS-ALPN-01 responses on 443 for another interface. The choice of
   277      # which is used is controlled by mock DNS data added by the relevant
   278      # integration tests.
   279      challSrvProcess = run([
   280          './bin/chall-test-srv',
   281          '--defaultIPv4', os.environ.get("FAKE_DNS"),
   282          '-defaultIPv6', '',
   283          '--dns01', ':8053,:8054',
   284          '--doh', ':8343,:8443',
   285          '--doh-cert', 'test/certs/ipki/10.77.77.77/cert.pem',
   286          '--doh-cert-key', 'test/certs/ipki/10.77.77.77/key.pem',
   287          '--management', ':8055',
   288          '--http01', '64.112.117.122:80',
   289          '-https01', '64.112.117.122:443',
   290          '--tlsalpn01', '64.112.117.134:443'])
   291      # Wait for the chall-test-srv management port.
   292      if not waitport(8055, ' '.join(challSrvProcess.args)):
   293          return False
   294  
   295  def stopChallSrv():
   296      """
   297      Stop the running chall-test-srv (if any) and wait for it to terminate.
   298      See also startChallSrv.
   299      """
   300      global challSrvProcess
   301      if challSrvProcess is None:
   302          return
   303      if challSrvProcess.poll() is None:
   304          challSrvProcess.send_signal(signal.SIGTERM)
   305          challSrvProcess.wait()
   306      challSrvProcess = None
   307  
   308  @atexit.register
   309  def stop():
   310      # When we are about to exit, send SIGTERM to each subprocess and wait for
   311      # them to nicely die. This reflects the restart process in prod and allows
   312      # us to exercise the graceful shutdown code paths.
   313      global processes
   314      for p in reversed(processes):
   315          if p.poll() is None:
   316              p.send_signal(signal.SIGTERM)
   317              p.wait()
   318      processes = []
   319  
   320      # Also stop the challenge test server
   321      stopChallSrv()