github.com/letsencrypt/boulder@v0.20251208.0/test/startservers.py (about) 1 import atexit 2 import collections 3 import os 4 import signal 5 import socket 6 import subprocess 7 8 from helpers import config_dir, waithealth, waitport 9 10 Service = collections.namedtuple('Service', ('name', 'debug_port', 'grpc_port', 'host_override', 'cmd', 'deps')) 11 12 # Keep these ports in sync with consul/config.hcl 13 SERVICES = ( 14 Service('remoteva-a', 15 8011, 9397, 'rva.boulder', 16 ('./bin/boulder', 'remoteva', '--config', os.path.join(config_dir, 'remoteva-a.json'), '--addr', ':9397', '--debug-addr', ':8011'), 17 None), 18 Service('remoteva-b', 19 8012, 9498, 'rva.boulder', 20 ('./bin/boulder', 'remoteva', '--config', os.path.join(config_dir, 'remoteva-b.json'), '--addr', ':9498', '--debug-addr', ':8012'), 21 None), 22 Service('remoteva-c', 23 8023, 9499, 'rva.boulder', 24 ('./bin/boulder', 'remoteva', '--config', os.path.join(config_dir, 'remoteva-c.json'), '--addr', ':9499', '--debug-addr', ':8023'), 25 None), 26 Service('boulder-sa-1', 27 8003, 9395, 'sa.boulder', 28 ('./bin/boulder', 'boulder-sa', '--config', os.path.join(config_dir, 'sa.json'), '--addr', ':9395', '--debug-addr', ':8003'), 29 None), 30 Service('boulder-sa-2', 31 8103, 9495, 'sa.boulder', 32 ('./bin/boulder', 'boulder-sa', '--config', os.path.join(config_dir, 'sa.json'), '--addr', ':9495', '--debug-addr', ':8103'), 33 None), 34 Service('aia-test-srv', 35 4502, None, None, 36 ('./bin/aia-test-srv', '--addr', ':4502', '--hierarchy', 'test/certs/webpki/'), None), 37 Service('ct-test-srv', 38 4600, None, None, 39 ('./bin/ct-test-srv', '--config', 'test/ct-test-srv/ct-test-srv.json'), None), 40 Service('boulder-publisher-1', 41 8009, 9391, 'publisher.boulder', 42 ('./bin/boulder', 'boulder-publisher', '--config', os.path.join(config_dir, 'publisher.json'), '--addr', ':9391', '--debug-addr', ':8009'), 43 None), 44 Service('boulder-publisher-2', 45 8109, 9491, 'publisher.boulder', 46 ('./bin/boulder', 'boulder-publisher', '--config', os.path.join(config_dir, 'publisher.json'), '--addr', ':9491', '--debug-addr', ':8109'), 47 None), 48 Service('boulder-va-1', 49 8004, 9392, 'va.boulder', 50 ('./bin/boulder', 'boulder-va', '--config', os.path.join(config_dir, 'va.json'), '--addr', ':9392', '--debug-addr', ':8004'), 51 ('remoteva-a', 'remoteva-b')), 52 Service('boulder-va-2', 53 8104, 9492, 'va.boulder', 54 ('./bin/boulder', 'boulder-va', '--config', os.path.join(config_dir, 'va.json'), '--addr', ':9492', '--debug-addr', ':8104'), 55 ('remoteva-a', 'remoteva-b')), 56 Service('boulder-ca-1', 57 8001, 9393, 'ca.boulder', 58 ('./bin/boulder', 'boulder-ca', '--config', os.path.join(config_dir, 'ca.json'), '--addr', ':9393', '--debug-addr', ':8001'), 59 ('boulder-sa-1', 'boulder-sa-2', 'boulder-ra-sct-provider-1', 'boulder-ra-sct-provider-2')), 60 Service('boulder-ca-2', 61 8101, 9493, 'ca.boulder', 62 ('./bin/boulder', 'boulder-ca', '--config', os.path.join(config_dir, 'ca.json'), '--addr', ':9493', '--debug-addr', ':8101'), 63 ('boulder-sa-1', 'boulder-sa-2', 'boulder-ra-sct-provider-1', 'boulder-ra-sct-provider-2')), 64 Service('s3-test-srv', 65 4501, None, None, 66 ('./bin/s3-test-srv', '--listen', ':4501'), 67 None), 68 Service('crl-storer', 69 9667, None, None, 70 ('./bin/boulder', 'crl-storer', '--config', os.path.join(config_dir, 'crl-storer.json'), '--addr', ':9309', '--debug-addr', ':9667'), 71 ('s3-test-srv',)), 72 Service('boulder-ra-1', 73 8002, 9394, 'ra.boulder', 74 ('./bin/boulder', 'boulder-ra', '--config', os.path.join(config_dir, 'ra.json'), '--addr', ':9394', '--debug-addr', ':8002'), 75 ('boulder-sa-1', 'boulder-sa-2', 'boulder-ca-1', 'boulder-ca-2', 'boulder-va-1', 'boulder-va-2', 'boulder-publisher-1', 'boulder-publisher-2')), 76 Service('boulder-ra-2', 77 8102, 9494, 'ra.boulder', 78 ('./bin/boulder', 'boulder-ra', '--config', os.path.join(config_dir, 'ra.json'), '--addr', ':9494', '--debug-addr', ':8102'), 79 ('boulder-sa-1', 'boulder-sa-2', 'boulder-ca-1', 'boulder-ca-2', 'boulder-va-1', 'boulder-va-2', 'boulder-publisher-1', 'boulder-publisher-2')), 80 # We run a separate instance of the RA for use as the SCTProvider service called by the CA. 81 # This solves a small problem of startup order: if a client (the CA in this case) starts 82 # up before its backends, gRPC will try to connect immediately (due to health checks), 83 # get a connection refused, and enter a backoff state. That backoff state can cause 84 # subsequent requests to fail. This issue only exists for the CA-RA pair because they 85 # have a circular relationship - the RA calls CA.IssueCertificate, and the CA calls 86 # SCTProvider.GetSCTs (offered by the RA). 87 Service('boulder-ra-sct-provider-1', 88 8118, 9594, 'ra.boulder', 89 ('./bin/boulder', 'boulder-ra', '--config', os.path.join(config_dir, 'ra.json'), '--addr', ':9594', '--debug-addr', ':8118'), 90 ('boulder-sa-1', 'boulder-sa-2', 'boulder-publisher-1', 'boulder-publisher-2')), 91 Service('boulder-ra-sct-provider-2', 92 8119, 9694, 'ra.boulder', 93 ('./bin/boulder', 'boulder-ra', '--config', os.path.join(config_dir, 'ra.json'), '--addr', ':9694', '--debug-addr', ':8119'), 94 ('boulder-sa-1', 'boulder-sa-2', 'boulder-publisher-1', 'boulder-publisher-2')), 95 Service('bad-key-revoker', 96 8020, None, None, 97 ('./bin/boulder', 'bad-key-revoker', '--config', os.path.join(config_dir, 'bad-key-revoker.json'), '--debug-addr', ':8020'), 98 ('boulder-ra-1', 'boulder-ra-2')), 99 # Note: the nonce-service instances bind to specific interfaces, not all 100 # interfaces, because they use their explicit host:port pair to calculate 101 # the nonce prefix, which is used by WFEs when deciding where to redeem 102 # nonces. The `taro` and `zinc` instances simulate nonce services in two 103 # different datacenters. The WFE is configured to get nonces from one of 104 # these services, and potentially redeeem from either service (though in 105 # practice it will only redeem from the one that is configured for getting 106 # nonces). 107 Service('nonce-service-taro-1', 108 8111, '10.77.77.77:9301', 'nonce.boulder', 109 ('./bin/boulder', 'nonce-service', '--config', os.path.join(config_dir, 'nonce-a.json'), '--addr', '10.77.77.77:9301', '--debug-addr', ':8111',), 110 None), 111 Service('nonce-service-taro-2', 112 8113, '10.77.77.77:9501', 'nonce.boulder', 113 ('./bin/boulder', 'nonce-service', '--config', os.path.join(config_dir, 'nonce-a.json'), '--addr', '10.77.77.77:9501', '--debug-addr', ':8113',), 114 None), 115 Service('nonce-service-zinc-1', 116 8112, '10.77.77.77:9401', 'nonce.boulder', 117 ('./bin/boulder', 'nonce-service', '--config', os.path.join(config_dir, 'nonce-b.json'), '--addr', '10.77.77.77:9401', '--debug-addr', ':8112',), 118 None), 119 Service('pardot-test-srv', 120 # Uses port 9601 to mock Salesforce OAuth2 token API and 9602 to mock 121 # the Pardot API. 122 9601, None, None, 123 ('./bin/pardot-test-srv', '--config', os.path.join(config_dir, 'pardot-test-srv.json'),), 124 None), 125 Service('email-exporter', 126 8114, None, None, 127 ('./bin/boulder', 'email-exporter', '--config', os.path.join(config_dir, 'email-exporter.json'), '--addr', ':9603', '--debug-addr', ':8114'), 128 ('pardot-test-srv',)), 129 Service('boulder-wfe2', 130 4001, None, None, 131 ('./bin/boulder', 'boulder-wfe2', '--config', os.path.join(config_dir, 'wfe2.json'), '--addr', ':4001', '--tls-addr', ':4431', '--debug-addr', ':8013'), 132 ('boulder-ra-1', 'boulder-ra-2', 'boulder-sa-1', 'boulder-sa-2', 'nonce-service-taro-1', 'nonce-service-taro-2', 'nonce-service-zinc-1', 'email-exporter')), 133 Service('zendesk-test-srv', 134 9701, None, None, 135 ('./bin/zendesk-test-srv', '--config', os.path.join(config_dir, 'zendesk-test-srv.json'),), 136 None), 137 Service('sfe', 138 # Uses port 4003 for HTTP. 139 4003, None, None, 140 ('./bin/boulder', 'sfe', '--config', os.path.join(config_dir, 'sfe.json'), '--debug-addr', ':8015'), 141 ('boulder-ra-1', 'boulder-ra-2', 'boulder-sa-1', 'boulder-sa-2', 'zendesk-test-srv')), 142 Service('log-validator', 143 8016, None, None, 144 ('./bin/boulder', 'log-validator', '--config', os.path.join(config_dir, 'log-validator.json'), '--debug-addr', ':8016'), 145 None), 146 ) 147 148 def _service_toposort(services): 149 """Yields Service objects in topologically sorted order. 150 151 No service will be yielded until every service listed in its deps value 152 has been yielded. 153 """ 154 ready = set([s for s in services if not s.deps]) 155 blocked = set(services) - ready 156 done = set() 157 while ready: 158 service = ready.pop() 159 yield service 160 done.add(service.name) 161 new = set([s for s in blocked if all([d in done for d in s.deps])]) 162 ready |= new 163 blocked -= new 164 if blocked: 165 print("WARNING: services with unsatisfied dependencies:") 166 for s in blocked: 167 print(s.name, ":", s.deps) 168 raise(Exception("Unable to satisfy service dependencies")) 169 170 processes = [] 171 172 # NOTE(@cpu): We manage the challSrvProcess separately from the other global 173 # processes because we want integration tests to be able to stop/start it (e.g. 174 # to run the load-generator). 175 challSrvProcess = None 176 177 def install(race_detection, coverage=False): 178 # Pass empty BUILD_TIME and BUILD_ID flags to avoid constantly invalidating the 179 # build cache with new BUILD_TIMEs, or invalidating it on merges with a new 180 # BUILD_ID. 181 go_build_flags='' 182 if race_detection: 183 go_build_flags += ' -race' 184 185 if coverage: 186 go_build_flags += ' -cover' # https://go.dev/blog/integration-test-coverage 187 188 return subprocess.call(["/usr/bin/make", "GO_BUILD_FLAGS=%s" % go_build_flags]) == 0 189 190 def run(cmd, coverage_dir=None): 191 e = os.environ.copy() 192 e.setdefault("GORACE", "halt_on_error=1") 193 if coverage_dir: 194 abs_coverage_dir = os.path.abspath(coverage_dir) 195 e.setdefault("GOCOVERDIR", abs_coverage_dir) 196 e.setdefault("GOCOVERMODE", "atomic") 197 p = subprocess.Popen(cmd, env=e) 198 p.cmd = cmd 199 return p 200 201 def start(coverage_dir=None): 202 """Return True if everything builds and starts. 203 204 Give up and return False if anything fails to build, or dies at 205 startup. Anything that did start before this point can be cleaned 206 up explicitly by calling stop(), or automatically atexit. 207 """ 208 signal.signal(signal.SIGTERM, lambda _, __: stop()) 209 signal.signal(signal.SIGINT, lambda _, __: stop()) 210 211 # Check that we can resolve the service names before we try to start any 212 # services. This prevents a confusing error (timed out health check). 213 try: 214 socket.getaddrinfo('publisher.service.consul', None) 215 except Exception as e: 216 print("Error querying DNS. Is consul running? `docker compose ps bconsul`. %s" % (e)) 217 return False 218 219 # Start the chall-test-srv first so it can be used to resolve DNS for 220 # gRPC. 221 startChallSrv() 222 223 # Processes are in order of dependency: Each process should be started 224 # before any services that intend to send it RPCs. On shutdown they will be 225 # killed in reverse order. 226 for service in _service_toposort(SERVICES): 227 print("Starting service", service.name) 228 try: 229 global processes 230 p = run(service.cmd, coverage_dir) 231 processes.append(p) 232 if service.grpc_port is not None: 233 waithealth(' '.join(p.args), service.grpc_port, service.host_override) 234 else: 235 if not waitport(service.debug_port, ' '.join(p.args), perTickCheck=check): 236 return False 237 except Exception as e: 238 print("Error starting service %s: %s" % (service.name, e)) 239 return False 240 241 print("All servers running. Hit ^C to kill.") 242 return True 243 244 def check(): 245 """Return true if all started processes are still alive. 246 247 Log about anything that died. The chall-test-srv is not considered when 248 checking processes. 249 """ 250 global processes 251 busted = [] 252 stillok = [] 253 for p in processes: 254 if p.poll() is None: 255 stillok.append(p) 256 else: 257 busted.append(p) 258 if busted: 259 print("\n\nThese processes exited early (check above for their output):") 260 for p in busted: 261 print("\t'%s' with pid %d exited %d" % (p.cmd, p.pid, p.returncode)) 262 processes = stillok 263 return not busted 264 265 def startChallSrv(): 266 """ 267 Start the chall-test-srv and wait for it to become available. See also 268 stopChallSrv. 269 """ 270 global challSrvProcess 271 if challSrvProcess is not None: 272 raise(Exception("startChallSrv called more than once")) 273 274 # NOTE(@cpu): We specify explicit bind addresses for -https01 and 275 # --tlsalpn01 here to allow HTTPS HTTP-01 responses on 443 for on interface 276 # and TLS-ALPN-01 responses on 443 for another interface. The choice of 277 # which is used is controlled by mock DNS data added by the relevant 278 # integration tests. 279 challSrvProcess = run([ 280 './bin/chall-test-srv', 281 '--defaultIPv4', os.environ.get("FAKE_DNS"), 282 '-defaultIPv6', '', 283 '--dns01', ':8053,:8054', 284 '--doh', ':8343,:8443', 285 '--doh-cert', 'test/certs/ipki/10.77.77.77/cert.pem', 286 '--doh-cert-key', 'test/certs/ipki/10.77.77.77/key.pem', 287 '--management', ':8055', 288 '--http01', '64.112.117.122:80', 289 '-https01', '64.112.117.122:443', 290 '--tlsalpn01', '64.112.117.134:443']) 291 # Wait for the chall-test-srv management port. 292 if not waitport(8055, ' '.join(challSrvProcess.args)): 293 return False 294 295 def stopChallSrv(): 296 """ 297 Stop the running chall-test-srv (if any) and wait for it to terminate. 298 See also startChallSrv. 299 """ 300 global challSrvProcess 301 if challSrvProcess is None: 302 return 303 if challSrvProcess.poll() is None: 304 challSrvProcess.send_signal(signal.SIGTERM) 305 challSrvProcess.wait() 306 challSrvProcess = None 307 308 @atexit.register 309 def stop(): 310 # When we are about to exit, send SIGTERM to each subprocess and wait for 311 # them to nicely die. This reflects the restart process in prod and allows 312 # us to exercise the graceful shutdown code paths. 313 global processes 314 for p in reversed(processes): 315 if p.poll() is None: 316 p.send_signal(signal.SIGTERM) 317 p.wait() 318 processes = [] 319 320 # Also stop the challenge test server 321 stopChallSrv()