github.com/letsencrypt/boulder@v0.20251208.0/va/va.go (about)

     1  package va
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"crypto/tls"
     7  	"errors"
     8  	"fmt"
     9  	"maps"
    10  	"math/rand/v2"
    11  	"net"
    12  	"net/netip"
    13  	"net/url"
    14  	"os"
    15  	"regexp"
    16  	"slices"
    17  	"strings"
    18  	"syscall"
    19  	"time"
    20  
    21  	"github.com/jmhodges/clock"
    22  	"github.com/prometheus/client_golang/prometheus"
    23  	"github.com/prometheus/client_golang/prometheus/promauto"
    24  	"google.golang.org/protobuf/proto"
    25  
    26  	"github.com/letsencrypt/boulder/bdns"
    27  	"github.com/letsencrypt/boulder/core"
    28  	corepb "github.com/letsencrypt/boulder/core/proto"
    29  	berrors "github.com/letsencrypt/boulder/errors"
    30  	"github.com/letsencrypt/boulder/features"
    31  	bgrpc "github.com/letsencrypt/boulder/grpc"
    32  	"github.com/letsencrypt/boulder/identifier"
    33  	blog "github.com/letsencrypt/boulder/log"
    34  	"github.com/letsencrypt/boulder/metrics"
    35  	"github.com/letsencrypt/boulder/probs"
    36  	vapb "github.com/letsencrypt/boulder/va/proto"
    37  )
    38  
    39  const (
    40  	PrimaryPerspective = "Primary"
    41  	allPerspectives    = "all"
    42  
    43  	opDCV = "dcv"
    44  	opCAA = "caa"
    45  
    46  	pass = "pass"
    47  	fail = "fail"
    48  )
    49  
    50  var (
    51  	// badTLSHeader contains the string 'HTTP /' which is returned when
    52  	// we try to talk TLS to a server that only talks HTTP
    53  	badTLSHeader = []byte{0x48, 0x54, 0x54, 0x50, 0x2f}
    54  	// h2SettingsFrameErrRegex is a regex against a net/http error indicating
    55  	// a malformed HTTP response that matches the initial SETTINGS frame of an
    56  	// HTTP/2 connection. This happens when a server configures HTTP/2 on port
    57  	// :80, failing HTTP-01 challenges.
    58  	//
    59  	// The regex first matches the error string prefix and then matches the raw
    60  	// bytes of an arbitrarily sized HTTP/2 SETTINGS frame:
    61  	//   0x00 0x00 0x?? 0x04 0x00 0x00 0x00 0x00
    62  	//
    63  	// The third byte is variable and indicates the frame size. Typically
    64  	// this will be 0x12.
    65  	// The 0x04 in the fourth byte indicates that the frame is SETTINGS type.
    66  	//
    67  	// See:
    68  	//   * https://tools.ietf.org/html/rfc7540#section-4.1
    69  	//   * https://tools.ietf.org/html/rfc7540#section-6.5
    70  	//
    71  	// NOTE(@cpu): Using a regex is a hack but unfortunately for this case
    72  	// http.Client.Do() will return a url.Error err that wraps
    73  	// a errors.ErrorString instance. There isn't much else to do with one of
    74  	// those except match the encoded byte string with a regex. :-X
    75  	//
    76  	// NOTE(@cpu): The first component of this regex is optional to avoid an
    77  	// integration test flake. In some (fairly rare) conditions the malformed
    78  	// response error will be returned simply as a http.badStringError without
    79  	// the broken transport prefix. Most of the time the error is returned with
    80  	// a transport connection error prefix.
    81  	h2SettingsFrameErrRegex = regexp.MustCompile(`(?:net\/http\: HTTP\/1\.x transport connection broken: )?malformed HTTP response \"\\x00\\x00\\x[a-f0-9]{2}\\x04\\x00\\x00\\x00\\x00\\x00.*"`)
    82  )
    83  
    84  // RemoteClients wraps the vapb.VAClient and vapb.CAAClient interfaces to aid in
    85  // mocking remote VAs for testing.
    86  type RemoteClients struct {
    87  	vapb.VAClient
    88  	vapb.CAAClient
    89  }
    90  
    91  // RemoteVA embeds RemoteClients and adds a field containing the address of the
    92  // remote gRPC server since the underlying gRPC client doesn't provide a way to
    93  // extract this metadata which is useful for debugging gRPC connection issues.
    94  type RemoteVA struct {
    95  	RemoteClients
    96  	Address     string
    97  	Perspective string
    98  	RIR         string
    99  }
   100  
   101  type vaMetrics struct {
   102  	// validationLatency is a histogram of the latency to perform validations
   103  	// from the primary and remote VA perspectives. It's labelled by:
   104  	//   - operation: VA.DoDCV or VA.DoCAA as [dcv|caa|dcv+caa]
   105  	//   - perspective: ValidationAuthorityImpl.perspective
   106  	//   - challenge_type: core.Challenge.Type
   107  	//   - problem_type: probs.ProblemType
   108  	//   - result: the result of the validation as [pass|fail]
   109  	validationLatency                 *prometheus.HistogramVec
   110  	prospectiveRemoteCAACheckFailures prometheus.Counter
   111  	tlsALPNOIDCounter                 *prometheus.CounterVec
   112  	http01Fallbacks                   prometheus.Counter
   113  	http01Redirects                   prometheus.Counter
   114  	caaCounter                        *prometheus.CounterVec
   115  	ipv4FallbackCounter               prometheus.Counter
   116  }
   117  
   118  func initMetrics(stats prometheus.Registerer) *vaMetrics {
   119  	validationLatency := promauto.With(stats).NewHistogramVec(prometheus.HistogramOpts{
   120  		Name:    "validation_latency",
   121  		Help:    "Histogram of the latency to perform validations from the primary and remote VA perspectives",
   122  		Buckets: metrics.InternetFacingBuckets,
   123  	}, []string{"operation", "perspective", "challenge_type", "problem_type", "result"})
   124  	prospectiveRemoteCAACheckFailures := promauto.With(stats).NewCounter(prometheus.CounterOpts{
   125  		Name: "prospective_remote_caa_check_failures",
   126  		Help: "Number of CAA rechecks that would have failed due to remote VAs returning failure if consesus were enforced",
   127  	})
   128  	tlsALPNOIDCounter := promauto.With(stats).NewCounterVec(prometheus.CounterOpts{
   129  		Name: "tls_alpn_oid_usage",
   130  		Help: "Number of TLS ALPN validations using either of the two OIDs",
   131  	}, []string{"oid"})
   132  	http01Fallbacks := promauto.With(stats).NewCounter(prometheus.CounterOpts{
   133  		Name: "http01_fallbacks",
   134  		Help: "Number of IPv6 to IPv4 HTTP-01 fallback requests made",
   135  	})
   136  	http01Redirects := promauto.With(stats).NewCounter(prometheus.CounterOpts{
   137  		Name: "http01_redirects",
   138  		Help: "Number of HTTP-01 redirects followed",
   139  	})
   140  	caaCounter := promauto.With(stats).NewCounterVec(prometheus.CounterOpts{
   141  		Name: "caa_sets_processed",
   142  		Help: "A counter of CAA sets processed labelled by result",
   143  	}, []string{"result"})
   144  	ipv4FallbackCounter := promauto.With(stats).NewCounter(prometheus.CounterOpts{
   145  		Name: "tls_alpn_ipv4_fallback",
   146  		Help: "A counter of IPv4 fallbacks during TLS ALPN validation",
   147  	})
   148  
   149  	return &vaMetrics{
   150  		validationLatency:                 validationLatency,
   151  		prospectiveRemoteCAACheckFailures: prospectiveRemoteCAACheckFailures,
   152  		tlsALPNOIDCounter:                 tlsALPNOIDCounter,
   153  		http01Fallbacks:                   http01Fallbacks,
   154  		http01Redirects:                   http01Redirects,
   155  		caaCounter:                        caaCounter,
   156  		ipv4FallbackCounter:               ipv4FallbackCounter,
   157  	}
   158  }
   159  
   160  // PortConfig specifies what ports the VA should call to on the remote
   161  // host when performing its checks.
   162  type portConfig struct {
   163  	HTTPPort  int
   164  	HTTPSPort int
   165  	TLSPort   int
   166  }
   167  
   168  // newDefaultPortConfig is a constructor which returns a portConfig with default
   169  // settings.
   170  //
   171  // CABF BRs section 1.6.1: Authorized Ports: One of the following ports: 80
   172  // (http), 443 (https), 25 (smtp), 22 (ssh).
   173  //
   174  // RFC 8555 section 8.3: Dereference the URL using an HTTP GET request. This
   175  // request MUST be sent to TCP port 80 on the HTTP server.
   176  //
   177  // RFC 8737 section 3: The ACME server initiates a TLS connection to the chosen
   178  // IP address. This connection MUST use TCP port 443.
   179  func newDefaultPortConfig() *portConfig {
   180  	return &portConfig{
   181  		HTTPPort:  80,
   182  		HTTPSPort: 443,
   183  		TLSPort:   443,
   184  	}
   185  }
   186  
   187  // ValidationAuthorityImpl represents a VA
   188  type ValidationAuthorityImpl struct {
   189  	vapb.UnsafeVAServer
   190  	vapb.UnsafeCAAServer
   191  	log                blog.Logger
   192  	dnsClient          bdns.Client
   193  	issuerDomain       string
   194  	httpPort           int
   195  	httpsPort          int
   196  	tlsPort            int
   197  	userAgent          string
   198  	clk                clock.Clock
   199  	remoteVAs          []RemoteVA
   200  	maxRemoteFailures  int
   201  	accountURIPrefixes []string
   202  	singleDialTimeout  time.Duration
   203  	slowRemoteTimeout  time.Duration
   204  	perspective        string
   205  	rir                string
   206  	isReservedIPFunc   func(netip.Addr) error
   207  
   208  	metrics *vaMetrics
   209  }
   210  
   211  var _ vapb.VAServer = (*ValidationAuthorityImpl)(nil)
   212  var _ vapb.CAAServer = (*ValidationAuthorityImpl)(nil)
   213  
   214  // NewValidationAuthorityImpl constructs a new VA
   215  func NewValidationAuthorityImpl(
   216  	resolver bdns.Client,
   217  	remoteVAs []RemoteVA,
   218  	userAgent string,
   219  	issuerDomain string,
   220  	stats prometheus.Registerer,
   221  	clk clock.Clock,
   222  	logger blog.Logger,
   223  	accountURIPrefixes []string,
   224  	perspective string,
   225  	rir string,
   226  	reservedIPChecker func(netip.Addr) error,
   227  	slowRemoteTimeout time.Duration,
   228  ) (*ValidationAuthorityImpl, error) {
   229  
   230  	if len(accountURIPrefixes) == 0 {
   231  		return nil, errors.New("no account URI prefixes configured")
   232  	}
   233  
   234  	for i, va1 := range remoteVAs {
   235  		for j, va2 := range remoteVAs {
   236  			if i != j && va1.Perspective == va2.Perspective {
   237  				return nil, fmt.Errorf("duplicate remote VA perspective %q", va1.Perspective)
   238  			}
   239  		}
   240  	}
   241  
   242  	pc := newDefaultPortConfig()
   243  
   244  	va := &ValidationAuthorityImpl{
   245  		log:                logger,
   246  		dnsClient:          resolver,
   247  		issuerDomain:       issuerDomain,
   248  		httpPort:           pc.HTTPPort,
   249  		httpsPort:          pc.HTTPSPort,
   250  		tlsPort:            pc.TLSPort,
   251  		userAgent:          userAgent,
   252  		clk:                clk,
   253  		metrics:            initMetrics(stats),
   254  		remoteVAs:          remoteVAs,
   255  		maxRemoteFailures:  maxAllowedFailures(len(remoteVAs)),
   256  		accountURIPrefixes: accountURIPrefixes,
   257  		// singleDialTimeout specifies how long an individual `DialContext` operation may take
   258  		// before timing out. This timeout ignores the base RPC timeout and is strictly
   259  		// used for the DialContext operations that take place during an
   260  		// HTTP-01 challenge validation.
   261  		singleDialTimeout: 10 * time.Second,
   262  		perspective:       perspective,
   263  		rir:               rir,
   264  		isReservedIPFunc:  reservedIPChecker,
   265  	}
   266  
   267  	return va, nil
   268  }
   269  
   270  // maxAllowedFailures returns the maximum number of allowed failures
   271  // for a given number of remote perspectives, according to the "Quorum
   272  // Requirements" table in BRs Section 3.2.2.9, as follows:
   273  //
   274  //	| # of Distinct Remote Network Perspectives Used | # of Allowed non-Corroborations |
   275  //	| --- | --- |
   276  //	| 2-5 |  1  |
   277  //	| 6+  |  2  |
   278  func maxAllowedFailures(perspectiveCount int) int {
   279  	if perspectiveCount < 2 {
   280  		return 0
   281  	}
   282  	if perspectiveCount < 6 {
   283  		return 1
   284  	}
   285  	return 2
   286  }
   287  
   288  // ipError is an error type used to pass though the IP address of the remote
   289  // host when an error occurs during HTTP-01 and TLS-ALPN domain validation.
   290  type ipError struct {
   291  	ip  netip.Addr
   292  	err error
   293  }
   294  
   295  // newIPError wraps an error and the IP of the remote host in an ipError so we
   296  // can display the IP in the problem details returned to the client.
   297  func newIPError(ip netip.Addr, err error) error {
   298  	return ipError{ip: ip, err: err}
   299  }
   300  
   301  // Unwrap returns the underlying error.
   302  func (i ipError) Unwrap() error {
   303  	return i.err
   304  }
   305  
   306  // Error returns a string representation of the error.
   307  func (i ipError) Error() string {
   308  	return fmt.Sprintf("%s: %s", i.ip, i.err)
   309  }
   310  
   311  // detailedError returns a ProblemDetails corresponding to an error
   312  // that occurred during HTTP-01 or TLS-ALPN domain validation. Specifically it
   313  // tries to unwrap known Go error types and present something a little more
   314  // meaningful. It additionally handles `berrors.ConnectionFailure` errors by
   315  // passing through the detailed message.
   316  func detailedError(err error) *probs.ProblemDetails {
   317  	var ipErr ipError
   318  	if errors.As(err, &ipErr) {
   319  		detailedErr := detailedError(ipErr.err)
   320  		if (ipErr.ip == netip.Addr{}) {
   321  			// This should never happen.
   322  			return detailedErr
   323  		}
   324  		// Prefix the error message with the IP address of the remote host.
   325  		detailedErr.Detail = fmt.Sprintf("%s: %s", ipErr.ip, detailedErr.Detail)
   326  		return detailedErr
   327  	}
   328  	// net/http wraps net.OpError in a url.Error. Unwrap them.
   329  	var urlErr *url.Error
   330  	if errors.As(err, &urlErr) {
   331  		prob := detailedError(urlErr.Err)
   332  		prob.Detail = fmt.Sprintf("Fetching %s: %s", urlErr.URL, prob.Detail)
   333  		return prob
   334  	}
   335  
   336  	var tlsErr tls.RecordHeaderError
   337  	if errors.As(err, &tlsErr) && bytes.Equal(tlsErr.RecordHeader[:], badTLSHeader) {
   338  		return probs.Malformed("Server only speaks HTTP, not TLS")
   339  	}
   340  
   341  	var netOpErr *net.OpError
   342  	if errors.As(err, &netOpErr) {
   343  		if fmt.Sprintf("%T", netOpErr.Err) == "tls.alert" {
   344  			// All the tls.alert error strings are reasonable to hand back to a
   345  			// user. Confirmed against Go 1.8.
   346  			return probs.TLS(netOpErr.Error())
   347  		} else if netOpErr.Timeout() && netOpErr.Op == "dial" {
   348  			return probs.Connection("Timeout during connect (likely firewall problem)")
   349  		} else if netOpErr.Timeout() {
   350  			return probs.Connection(fmt.Sprintf("Timeout during %s (your server may be slow or overloaded)", netOpErr.Op))
   351  		}
   352  	}
   353  	var syscallErr *os.SyscallError
   354  	if errors.As(err, &syscallErr) {
   355  		switch syscallErr.Err {
   356  		case syscall.ECONNREFUSED:
   357  			return probs.Connection("Connection refused")
   358  		case syscall.ENETUNREACH:
   359  			return probs.Connection("Network unreachable")
   360  		case syscall.ECONNRESET:
   361  			return probs.Connection("Connection reset by peer")
   362  		}
   363  	}
   364  	var netErr net.Error
   365  	if errors.As(err, &netErr) && netErr.Timeout() {
   366  		return probs.Connection("Timeout after connect (your server may be slow or overloaded)")
   367  	}
   368  	if errors.Is(err, berrors.ConnectionFailure) {
   369  		return probs.Connection(err.Error())
   370  	}
   371  	if errors.Is(err, berrors.Unauthorized) {
   372  		return probs.Unauthorized(err.Error())
   373  	}
   374  	if errors.Is(err, berrors.DNS) {
   375  		return probs.DNS(err.Error())
   376  	}
   377  	if errors.Is(err, berrors.Malformed) {
   378  		return probs.Malformed(err.Error())
   379  	}
   380  	if errors.Is(err, berrors.CAA) {
   381  		return probs.CAA(err.Error())
   382  	}
   383  
   384  	if h2SettingsFrameErrRegex.MatchString(err.Error()) {
   385  		return probs.Connection("Server is speaking HTTP/2 over HTTP")
   386  	}
   387  	return probs.Connection("Error getting validation data")
   388  }
   389  
   390  // isPrimaryVA returns true if the VA is the primary validation perspective.
   391  func (va *ValidationAuthorityImpl) isPrimaryVA() bool {
   392  	return va.perspective == PrimaryPerspective
   393  }
   394  
   395  // validateChallenge simply passes through to the appropriate validation method
   396  // depending on the challenge type.
   397  // The accountURI parameter is required for dns-account-01 challenges to
   398  // calculate the account-specific label.
   399  func (va *ValidationAuthorityImpl) validateChallenge(
   400  	ctx context.Context,
   401  	ident identifier.ACMEIdentifier,
   402  	kind core.AcmeChallenge,
   403  	token string,
   404  	keyAuthorization string,
   405  	accountURI string,
   406  ) ([]core.ValidationRecord, error) {
   407  	switch kind {
   408  	case core.ChallengeTypeHTTP01:
   409  		return va.validateHTTP01(ctx, ident, token, keyAuthorization)
   410  	case core.ChallengeTypeDNS01:
   411  		// Strip a (potential) leading wildcard token from the identifier.
   412  		ident.Value = strings.TrimPrefix(ident.Value, "*.")
   413  		return va.validateDNS01(ctx, ident, keyAuthorization)
   414  	case core.ChallengeTypeTLSALPN01:
   415  		return va.validateTLSALPN01(ctx, ident, keyAuthorization)
   416  	case core.ChallengeTypeDNSAccount01:
   417  		if features.Get().DNSAccount01Enabled {
   418  			// Strip a (potential) leading wildcard token from the identifier.
   419  			ident.Value = strings.TrimPrefix(ident.Value, "*.")
   420  			return va.validateDNSAccount01(ctx, ident, keyAuthorization, accountURI)
   421  		}
   422  	}
   423  	return nil, berrors.MalformedError("invalid challenge type %s", kind)
   424  }
   425  
   426  // observeLatency records entries in the validationLatency histogram of the
   427  // latency to perform validations from the primary and remote VA perspectives.
   428  // The labels are:
   429  //   - operation: VA.DoDCV or VA.DoCAA as [dcv|caa]
   430  //   - perspective: [ValidationAuthorityImpl.perspective|all]
   431  //   - challenge_type: core.Challenge.Type
   432  //   - problem_type: probs.ProblemType
   433  //   - result: the result of the validation as [pass|fail]
   434  func (va *ValidationAuthorityImpl) observeLatency(op, perspective, challType, probType, result string, latency time.Duration) {
   435  	labels := prometheus.Labels{
   436  		"operation":      op,
   437  		"perspective":    perspective,
   438  		"challenge_type": challType,
   439  		"problem_type":   probType,
   440  		"result":         result,
   441  	}
   442  	va.metrics.validationLatency.With(labels).Observe(latency.Seconds())
   443  }
   444  
   445  // remoteOperation is a func type that encapsulates the operation and request
   446  // passed to va.performRemoteOperation. The operation must be a method on
   447  // vapb.VAClient or vapb.CAAClient, and the request must be the corresponding
   448  // proto.Message passed to that method.
   449  type remoteOperation = func(context.Context, RemoteVA, proto.Message) (remoteResult, error)
   450  
   451  // remoteResult is an interface that must be implemented by the results of a
   452  // remoteOperation, such as *vapb.ValidationResult and *vapb.IsCAAValidResponse.
   453  // It provides methods to access problem details, the associated perspective,
   454  // and the RIR.
   455  type remoteResult interface {
   456  	proto.Message
   457  	GetProblem() *corepb.ProblemDetails
   458  	GetPerspective() string
   459  	GetRir() string
   460  }
   461  
   462  const (
   463  	// requiredRIRs is the minimum number of distinct Regional Internet
   464  	// Registries required for MPIC-compliant validation. Per BRs Section
   465  	// 3.2.2.9, starting March 15, 2026, the required number is 2.
   466  	requiredRIRs = 2
   467  )
   468  
   469  // mpicSummary is returned by doRemoteOperation and contains a summary of the
   470  // validation results for logging purposes. To ensure that the JSON output does
   471  // not contain nil slices, and to ensure deterministic output use the
   472  // summarizeMPIC function to prepare an mpicSummary.
   473  type mpicSummary struct {
   474  	// Passed are the perspectives that passed validation.
   475  	Passed []string `json:"passedPerspectives"`
   476  
   477  	// Failed are the perspectives that failed validation.
   478  	Failed []string `json:"failedPerspectives"`
   479  
   480  	// PassedRIRs are the Regional Internet Registries that the passing
   481  	// perspectives reside in.
   482  	PassedRIRs []string `json:"passedRIRs"`
   483  
   484  	// QuorumResult is the Multi-Perspective Issuance Corroboration quorum
   485  	// result, per BRs Section 5.4.1, Requirement 2.7 (i.e., "3/4" which should
   486  	// be interpreted as "Three (3) out of four (4) attempted Network
   487  	// Perspectives corroborated the determinations made by the Primary Network
   488  	// Perspective".
   489  	QuorumResult string `json:"quorumResult"`
   490  }
   491  
   492  // summarizeMPIC prepares an *mpicSummary for logging, ensuring there are no nil
   493  // slices and output is deterministic.
   494  func summarizeMPIC(passed, failed []string, passedRIRSet map[string]struct{}) *mpicSummary {
   495  	if passed == nil {
   496  		passed = []string{}
   497  	}
   498  	slices.Sort(passed)
   499  	if failed == nil {
   500  		failed = []string{}
   501  	}
   502  	slices.Sort(failed)
   503  
   504  	passedRIRs := []string{}
   505  	if passedRIRSet != nil {
   506  		for rir := range maps.Keys(passedRIRSet) {
   507  			passedRIRs = append(passedRIRs, rir)
   508  		}
   509  	}
   510  	slices.Sort(passedRIRs)
   511  
   512  	return &mpicSummary{
   513  		Passed:       passed,
   514  		Failed:       failed,
   515  		PassedRIRs:   passedRIRs,
   516  		QuorumResult: fmt.Sprintf("%d/%d", len(passed), len(passed)+len(failed)),
   517  	}
   518  }
   519  
   520  // doRemoteOperation concurrently calls the provided operation with `req` and a
   521  // RemoteVA once for each configured RemoteVA. It cancels remaining operations
   522  // and returns early if either the required number of successful results is
   523  // obtained or the number of failures exceeds va.maxRemoteFailures.
   524  //
   525  // Internal logic errors are logged. If the number of operation failures exceeds
   526  // va.maxRemoteFailures, the first encountered problem is returned as a
   527  // *probs.ProblemDetails.
   528  func (va *ValidationAuthorityImpl) doRemoteOperation(ctx context.Context, op remoteOperation, req proto.Message) (*mpicSummary, *probs.ProblemDetails) {
   529  	remoteVACount := len(va.remoteVAs)
   530  	//  - Mar 15, 2026: MUST implement using at least 3 perspectives
   531  	//  - Jun 15, 2026: MUST implement using at least 4 perspectives
   532  	//  - Dec 15, 2026: MUST implement using at least 5 perspectives
   533  	// See "Phased Implementation Timeline" in
   534  	// https://github.com/cabforum/servercert/blob/main/docs/BR.md#3229-multi-perspective-issuance-corroboration
   535  	if remoteVACount < 3 {
   536  		return nil, probs.ServerInternal("Insufficient remote perspectives: need at least 3")
   537  	}
   538  
   539  	type response struct {
   540  		addr        string
   541  		perspective string
   542  		rir         string
   543  		result      remoteResult
   544  		err         error
   545  	}
   546  
   547  	subCtx, cancel := context.WithCancel(ctx)
   548  	defer cancel()
   549  
   550  	responses := make(chan *response, remoteVACount)
   551  	for _, i := range rand.Perm(remoteVACount) {
   552  		go func(rva RemoteVA) {
   553  			res, err := op(subCtx, rva, req)
   554  			if err != nil {
   555  				responses <- &response{rva.Address, rva.Perspective, rva.RIR, res, err}
   556  				return
   557  			}
   558  			if res.GetPerspective() != rva.Perspective || res.GetRir() != rva.RIR {
   559  				err = fmt.Errorf(
   560  					"Expected perspective %q (%q) but got reply from %q (%q) - misconfiguration likely", rva.Perspective, rva.RIR, res.GetPerspective(), res.GetRir(),
   561  				)
   562  				responses <- &response{rva.Address, rva.Perspective, rva.RIR, res, err}
   563  				return
   564  			}
   565  			responses <- &response{rva.Address, rva.Perspective, rva.RIR, res, err}
   566  		}(va.remoteVAs[i])
   567  	}
   568  
   569  	required := remoteVACount - va.maxRemoteFailures
   570  	var passed []string
   571  	var failed []string
   572  	var passedRIRs = map[string]struct{}{}
   573  	var firstProb *probs.ProblemDetails
   574  
   575  	for resp := range responses {
   576  		var currProb *probs.ProblemDetails
   577  
   578  		if resp.err != nil {
   579  			// Failed to communicate with the remote VA.
   580  			failed = append(failed, resp.perspective)
   581  
   582  			if core.IsCanceled(resp.err) {
   583  				currProb = probs.ServerInternal("Secondary validation RPC canceled")
   584  			} else {
   585  				va.log.Errf("Operation on remote VA (%s) failed: %s", resp.addr, resp.err)
   586  				currProb = probs.ServerInternal("Secondary validation RPC failed")
   587  			}
   588  		} else if resp.result.GetProblem() != nil {
   589  			// The remote VA returned a problem.
   590  			failed = append(failed, resp.perspective)
   591  
   592  			var err error
   593  			currProb, err = bgrpc.PBToProblemDetails(resp.result.GetProblem())
   594  			if err != nil {
   595  				va.log.Errf("Operation on Remote VA (%s) returned malformed problem: %s", resp.addr, err)
   596  				currProb = probs.ServerInternal("Secondary validation RPC returned malformed result")
   597  			}
   598  		} else {
   599  			// The remote VA returned a successful result.
   600  			passed = append(passed, resp.perspective)
   601  			passedRIRs[resp.rir] = struct{}{}
   602  		}
   603  
   604  		if firstProb == nil && currProb != nil {
   605  			// A problem was encountered for the first time.
   606  			firstProb = currProb
   607  		}
   608  
   609  		if va.slowRemoteTimeout != 0 {
   610  			// If enough perspectives have passed, or enough perspectives have
   611  			// failed, set a tighter deadline for the remaining perspectives.
   612  			if (len(passed) >= required && len(passedRIRs) >= requiredRIRs) ||
   613  				(len(failed) > remoteVACount-required) {
   614  				timer := time.AfterFunc(va.slowRemoteTimeout, cancel)
   615  				defer timer.Stop()
   616  			}
   617  		}
   618  
   619  		// Once all the VAs have returned a result, break the loop.
   620  		if len(passed)+len(failed) >= remoteVACount {
   621  			break
   622  		}
   623  	}
   624  	if len(passed) >= required && len(passedRIRs) >= requiredRIRs {
   625  		return summarizeMPIC(passed, failed, passedRIRs), nil
   626  	}
   627  	if firstProb == nil {
   628  		// This should never happen. If we didn't meet the thresholds above we
   629  		// should have seen at least one error.
   630  		return summarizeMPIC(passed, failed, passedRIRs), probs.ServerInternal(
   631  			"During secondary validation: validation failed but the problem is unavailable")
   632  	}
   633  	firstProb.Detail = fmt.Sprintf("During secondary validation: %s", firstProb.Detail)
   634  	return summarizeMPIC(passed, failed, passedRIRs), firstProb
   635  }
   636  
   637  // validationLogEvent is a struct that contains the information needed to log
   638  // the results of DoCAA and DoDCV.
   639  type validationLogEvent struct {
   640  	AuthzID       string
   641  	Requester     int64
   642  	Identifier    identifier.ACMEIdentifier
   643  	Challenge     core.Challenge
   644  	Error         string `json:",omitempty"`
   645  	InternalError string `json:",omitempty"`
   646  	Latency       float64
   647  	Summary       *mpicSummary `json:",omitempty"`
   648  }
   649  
   650  // DoDCV conducts a local Domain Control Validation (DCV) for the specified
   651  // challenge. When invoked on the primary Validation Authority (VA) and the
   652  // local validation succeeds, it also performs DCV validations using the
   653  // configured remote VAs. Failed validations are indicated by a non-nil Problems
   654  // in the returned ValidationResult. DoDCV returns error only for internal logic
   655  // errors (and the client may receive errors from gRPC in the event of a
   656  // communication problem). ValidationResult always includes a list of
   657  // ValidationRecords, even when it also contains Problems. This method
   658  // implements the DCV portion of Multi-Perspective Issuance Corroboration as
   659  // defined in BRs Sections 3.2.2.9 and 5.4.1.
   660  func (va *ValidationAuthorityImpl) DoDCV(ctx context.Context, req *vapb.PerformValidationRequest) (*vapb.ValidationResult, error) {
   661  	if core.IsAnyNilOrZero(req, req.Identifier, req.Challenge, req.Authz, req.Authz.RegID, req.ExpectedKeyAuthorization) {
   662  		return nil, berrors.InternalServerError("Incomplete validation request")
   663  	}
   664  
   665  	ident := identifier.FromProto(req.Identifier)
   666  
   667  	chall, err := bgrpc.PBToChallenge(req.Challenge)
   668  	if err != nil {
   669  		return nil, errors.New("challenge failed to deserialize")
   670  	}
   671  
   672  	err = chall.CheckPending()
   673  	if err != nil {
   674  		return nil, berrors.MalformedError("challenge failed consistency check: %s", err)
   675  	}
   676  
   677  	// Initialize variables and a deferred function to handle validation latency
   678  	// metrics, log validation errors, and log an MPIC summary. Avoid using :=
   679  	// to redeclare `prob`, `localLatency`, or `summary` below this point.
   680  	var prob *probs.ProblemDetails
   681  	var summary *mpicSummary
   682  	var localLatency time.Duration
   683  	start := va.clk.Now()
   684  	logEvent := validationLogEvent{
   685  		AuthzID:    req.Authz.Id,
   686  		Requester:  req.Authz.RegID,
   687  		Identifier: ident,
   688  		Challenge:  chall,
   689  	}
   690  	defer func() {
   691  		probType := ""
   692  		outcome := fail
   693  		if prob != nil {
   694  			probType = string(prob.Type)
   695  			logEvent.Error = prob.String()
   696  			logEvent.Challenge.Error = prob
   697  			logEvent.Challenge.Status = core.StatusInvalid
   698  		} else {
   699  			logEvent.Challenge.Status = core.StatusValid
   700  			outcome = pass
   701  		}
   702  
   703  		// Observe local validation latency (primary|remote).
   704  		va.observeLatency(opDCV, va.perspective, string(chall.Type), probType, outcome, localLatency)
   705  		if va.isPrimaryVA() {
   706  			// Observe total validation latency (primary+remote).
   707  			va.observeLatency(opDCV, allPerspectives, string(chall.Type), probType, outcome, va.clk.Since(start))
   708  			logEvent.Summary = summary
   709  		}
   710  
   711  		// Log the total validation latency.
   712  		logEvent.Latency = va.clk.Since(start).Round(time.Millisecond).Seconds()
   713  		va.log.AuditObject("Validation result", logEvent)
   714  	}()
   715  
   716  	// For dns-account-01 challenges, construct the account URI from the configured prefix
   717  	var accountURI string
   718  	if chall.Type == core.ChallengeTypeDNSAccount01 && features.Get().DNSAccount01Enabled {
   719  		accountURI = fmt.Sprintf("%s%d", va.accountURIPrefixes[0], req.Authz.RegID)
   720  	}
   721  
   722  	// Do local validation. Note that we process the result in a couple ways
   723  	// *before* checking whether it returned an error. These few checks are
   724  	// carefully written to ensure that they work whether the local validation
   725  	// was successful or not, and cannot themselves fail.
   726  	records, err := va.validateChallenge(
   727  		ctx,
   728  		ident,
   729  		chall.Type,
   730  		chall.Token,
   731  		req.ExpectedKeyAuthorization,
   732  		accountURI,
   733  	)
   734  
   735  	// Stop the clock for local validation latency.
   736  	localLatency = va.clk.Since(start)
   737  
   738  	// Check for malformed ValidationRecords
   739  	logEvent.Challenge.ValidationRecord = records
   740  	if err == nil && !logEvent.Challenge.RecordsSane() {
   741  		err = errors.New("records from local validation failed sanity check")
   742  	}
   743  
   744  	if err != nil {
   745  		logEvent.InternalError = err.Error()
   746  		prob = detailedError(err)
   747  		return bgrpc.ValidationResultToPB(records, filterProblemDetails(prob), va.perspective, va.rir)
   748  	}
   749  
   750  	if va.isPrimaryVA() {
   751  		// Do remote validation. We do this after local validation is complete
   752  		// to avoid wasting work when validation will fail anyway. This only
   753  		// returns a singular problem, because the remote VAs have already
   754  		// logged their own validationLogEvent, and it's not helpful to present
   755  		// multiple large errors to the end user.
   756  		op := func(ctx context.Context, remoteva RemoteVA, req proto.Message) (remoteResult, error) {
   757  			validationRequest, ok := req.(*vapb.PerformValidationRequest)
   758  			if !ok {
   759  				return nil, fmt.Errorf("got type %T, want *vapb.PerformValidationRequest", req)
   760  			}
   761  			return remoteva.DoDCV(ctx, validationRequest)
   762  		}
   763  		summary, prob = va.doRemoteOperation(ctx, op, req)
   764  	}
   765  
   766  	return bgrpc.ValidationResultToPB(records, filterProblemDetails(prob), va.perspective, va.rir)
   767  }