go.chromium.org/luci@v0.0.0-20250314024836-d9a61d0730e6/tokenserver/appengine/impl/certconfig/crl.go (about)

     1  // Copyright 2016 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package certconfig
    16  
    17  import (
    18  	"context"
    19  	"crypto/sha1"
    20  	"crypto/x509/pkix"
    21  	"encoding/hex"
    22  	"fmt"
    23  	"math/big"
    24  	"sync"
    25  	"time"
    26  
    27  	"google.golang.org/protobuf/types/known/timestamppb"
    28  
    29  	"go.chromium.org/luci/common/data/caching/lazyslot"
    30  	"go.chromium.org/luci/common/errors"
    31  	"go.chromium.org/luci/common/logging"
    32  	"go.chromium.org/luci/common/retry/transient"
    33  	ds "go.chromium.org/luci/gae/service/datastore"
    34  
    35  	"go.chromium.org/luci/tokenserver/api/admin/v1"
    36  	"go.chromium.org/luci/tokenserver/appengine/impl/utils"
    37  	"go.chromium.org/luci/tokenserver/appengine/impl/utils/shards"
    38  )
    39  
    40  // CRLShardCount is a number of shards to use for storing CRL in the datastore.
    41  //
    42  // Each shard can hold ~2 MB of data (taking into account zlib compression),
    43  // so 16 shards ~= 32 MB. Good enough for a foreseeable future.
    44  //
    45  // Changing this value requires rerunning of Admin.FetchCRL RPC to rebuild
    46  // the entities.
    47  const CRLShardCount = 16
    48  
    49  // CRL represents a parsed Certificate Revocation List of some CA.
    50  //
    51  // ID is always "crl", the parent entity is corresponding CA.
    52  type CRL struct {
    53  	_id string `gae:"$id,crl"`
    54  
    55  	// Parent is pointing to parent CA entity.
    56  	Parent *ds.Key `gae:"$parent"`
    57  
    58  	// EntityVersion is used for simple concurrency control.
    59  	//
    60  	// Increase on each update of this entity.
    61  	EntityVersion int `gae:",noindex"`
    62  
    63  	// LastUpdateTime is extracted from corresponding field of CRL.
    64  	//
    65  	// It indicates a time when CRL was generated by the CA.
    66  	LastUpdateTime time.Time `gae:",noindex"`
    67  
    68  	// LastFetchTime is when this CRL was fetched the last time.
    69  	//
    70  	// Updated only when newer CRL version is fetched.
    71  	LastFetchTime time.Time `gae:",noindex"`
    72  
    73  	// LastFetchETag is ETag header of last downloaded CRL file.
    74  	//
    75  	// If CRL's etag doesn't change, we can skip reparsing CRL.
    76  	LastFetchETag string `gae:",noindex"`
    77  
    78  	// RevokedCertsCount is a number of revoked certificates in CRL. FYI only.
    79  	RevokedCertsCount int `gae:",noindex"`
    80  }
    81  
    82  // GetStatusProto returns populated CRLStatus proto message.
    83  func (crl *CRL) GetStatusProto() *admin.CRLStatus {
    84  	return &admin.CRLStatus{
    85  		LastUpdateTime:    timestamppb.New(crl.LastUpdateTime),
    86  		LastFetchTime:     timestamppb.New(crl.LastFetchTime),
    87  		LastFetchEtag:     crl.LastFetchETag,
    88  		RevokedCertsCount: int64(crl.RevokedCertsCount),
    89  	}
    90  }
    91  
    92  ////////////////////////////////////////////////////////////////////////////////
    93  
    94  // CRLShardHeader represents a hash of a shard of a CRL sharded set.
    95  //
    96  // We split CRL into a bunch of shards to avoid hitting datastore entity size
    97  // limits. Each shard lives in its own entity group, where root entity
    98  // (CRLShardHeader) contains a hash of the shard data (CRLShardBody).
    99  //
   100  // It is used to skip fetches of fat shard entities if we already have the same
   101  // data locally (based on matching hash).
   102  //
   103  // ID is "<cn name>|<total number of shards>|<shard index>" (see shardEntityID).
   104  type CRLShardHeader struct {
   105  	ID   string `gae:"$id"`
   106  	SHA1 string `gae:",noindex"` // SHA1 of serialized shard data (before compression)
   107  }
   108  
   109  // CRLShardBody is a fat entity that contains serialized CRL shard.
   110  //
   111  // See CRLShardHeader for more info.
   112  //
   113  // Parent entity is CRLShardHeader. ID is always "1".
   114  type CRLShardBody struct {
   115  	_id string `gae:"$id,1"`
   116  
   117  	Parent     *ds.Key `gae:"$parent"`  // key of CRLShardHeader
   118  	SHA1       string  `gae:",noindex"` // SHA1 of serialized shard data (before compression)
   119  	ZippedData []byte  `gae:",noindex"` // zlib-compressed serialized shards.Shard.
   120  }
   121  
   122  // UpdateCRLSet splits a set of revoked certificate serial numbers into shards,
   123  // storing each shard in a separate entity (CRLShardBody).
   124  //
   125  // It effectively overwrites the entire set.
   126  func UpdateCRLSet(c context.Context, cn string, shardCount int, crl *pkix.CertificateList) error {
   127  	// Split CRL into shards.
   128  	set := make(shards.Set, shardCount)
   129  	for _, cert := range crl.TBSCertList.RevokedCertificates {
   130  		sn, err := utils.SerializeSN(cert.SerialNumber)
   131  		if err != nil {
   132  			return err
   133  		}
   134  		set.Insert(sn)
   135  	}
   136  	// Update shards in parallel via a bunch of independent transactions.
   137  	wg := sync.WaitGroup{}
   138  	er := errors.NewLazyMultiError(len(set))
   139  	for idx, shard := range set {
   140  		wg.Add(1)
   141  		go func(idx int, shard shards.Shard) {
   142  			defer wg.Done()
   143  			er.Assign(idx, updateCRLShard(c, cn, shard, shardCount, idx))
   144  		}(idx, shard)
   145  	}
   146  	wg.Wait()
   147  	return er.Get()
   148  }
   149  
   150  // updateCRLShard updates entities that holds a single shard of a CRL set.
   151  func updateCRLShard(c context.Context, cn string, shard shards.Shard, count, idx int) error {
   152  	blob := shard.Serialize()
   153  	hash := sha1.Sum(blob)
   154  	digest := hex.EncodeToString(hash[:])
   155  
   156  	// Have it already?
   157  	header := CRLShardHeader{ID: shardEntityID(cn, count, idx)}
   158  	switch err := ds.Get(c, &header); {
   159  	case err != nil && err != ds.ErrNoSuchEntity:
   160  		return err
   161  	case err == nil && header.SHA1 == digest:
   162  		logging.Infof(c, "CRL for %q: shard %d/%d is up-to-date", cn, idx, count)
   163  		return nil
   164  	}
   165  
   166  	// Zip before uploading.
   167  	zipped, err := utils.ZlibCompress(blob)
   168  	if err != nil {
   169  		return err
   170  	}
   171  	logging.Infof(
   172  		c, "CRL for %q: shard %d/%d updated (%d bytes zipped, %d%% compression)",
   173  		cn, idx, count, len(zipped), 100*len(zipped)/len(blob))
   174  
   175  	// Upload, updating the header and the body at once.
   176  	return ds.RunInTransaction(c, func(c context.Context) error {
   177  		header.SHA1 = digest
   178  		body := CRLShardBody{
   179  			Parent:     ds.KeyForObj(c, &header),
   180  			SHA1:       digest,
   181  			ZippedData: zipped,
   182  		}
   183  		return ds.Put(c, &header, &body)
   184  	}, nil)
   185  }
   186  
   187  // shardEntityID returns an ID of CRLShardHeader entity for given shard.
   188  //
   189  // 'cn' is Common Name of the CRL. 'total' is total number of shards expected,
   190  // and 'index' is an index of some particular shard.
   191  func shardEntityID(cn string, total, index int) string {
   192  	return fmt.Sprintf("%s|%d|%d", cn, total, index)
   193  }
   194  
   195  ////////////////////////////////////////////////////////////////////////////////
   196  
   197  // CRLChecker knows how to check presence of a certificate serial number in CRL.
   198  //
   199  // Uses entities prepared by UpdateCRLSet.
   200  //
   201  // It is a stateful object that caches CRL shards in memory (occasionally
   202  // refetching them from the datastore), thus providing an eventually consistent
   203  // view of the CRL set.
   204  //
   205  // Safe for concurrent use. Should be reused between requests.
   206  type CRLChecker struct {
   207  	cn            string          // name of CA to check a CRL of
   208  	shardCount    int             // a total number of shards
   209  	shards        []lazyslot.Slot // per-shard local state, len(shards) == shardCount
   210  	cacheDuration time.Duration   // how often to refetch shards from datastore
   211  }
   212  
   213  // shardCache is kept inside 'shards' slots in CRLChecker.
   214  type shardCache struct {
   215  	shard shards.Shard // shard data as a map[]
   216  	sha1  string       // shard hash, to skip unnecessary refetches
   217  }
   218  
   219  // NewCRLChecker initializes new CRLChecker that knows how to examine CRL of
   220  // a CA (identifies by its Common Name).
   221  //
   222  // It must know number of shards in advance. Usually is it just CRLShardCount.
   223  //
   224  // It will cache shards in local memory, refetching them if necessary after
   225  // 'cacheDuration' interval.
   226  func NewCRLChecker(cn string, shardCount int, cacheDuration time.Duration) *CRLChecker {
   227  	return &CRLChecker{
   228  		cn:            cn,
   229  		shardCount:    shardCount,
   230  		shards:        make([]lazyslot.Slot, shardCount),
   231  		cacheDuration: cacheDuration,
   232  	}
   233  }
   234  
   235  // IsRevokedSN returns true if given serial number is in the CRL.
   236  func (ch *CRLChecker) IsRevokedSN(c context.Context, sn *big.Int) (bool, error) {
   237  	snBlob, err := utils.SerializeSN(sn)
   238  	if err != nil {
   239  		return false, err
   240  	}
   241  	shard, err := ch.shard(c, shards.ShardIndex(snBlob, ch.shardCount))
   242  	if err != nil {
   243  		return false, err
   244  	}
   245  	_, revoked := shard[string(snBlob)]
   246  	return revoked, nil
   247  }
   248  
   249  // shard returns a shard given its index.
   250  func (ch *CRLChecker) shard(c context.Context, idx int) (shards.Shard, error) {
   251  	val, err := ch.shards[idx].Get(c, func(prev any) (any, time.Duration, error) {
   252  		prevState, _ := prev.(shardCache)
   253  		newState, err := ch.refetchShard(c, idx, prevState)
   254  		return newState, ch.cacheDuration, err
   255  	})
   256  	if err != nil {
   257  		return nil, err
   258  	}
   259  	// lazyslot.Get always returns non-nil val on success. It is safe to cast it
   260  	// to whatever we returned in the callback (which is always shardCache, see
   261  	// refetchShard).
   262  	return val.(shardCache).shard, nil
   263  }
   264  
   265  // refetchShard is called by 'shard' to fetch a new version of a shard.
   266  func (ch *CRLChecker) refetchShard(c context.Context, idx int, prevState shardCache) (newState shardCache, err error) {
   267  	// Have something locally already? Quickly fetch CRLShardHeader to check
   268  	// whether we need to pull a heavy CRLShardBody.
   269  	hdr := CRLShardHeader{ID: shardEntityID(ch.cn, ch.shardCount, idx)}
   270  	if prevState.sha1 != "" {
   271  		switch err = ds.Get(c, &hdr); {
   272  		case err == ds.ErrNoSuchEntity:
   273  			err = fmt.Errorf("shard header %q is missing", hdr.ID)
   274  			return
   275  		case err != nil:
   276  			err = transient.Tag.Apply(err)
   277  			return
   278  		}
   279  		// The currently cached copy is still good enough?
   280  		if hdr.SHA1 == prevState.sha1 {
   281  			newState = prevState
   282  			return
   283  		}
   284  	}
   285  
   286  	// Nothing is cached, or the datastore copy is fresher than what we have in
   287  	// the cache. Need to fetch a new copy, unzip and deserialize it. This entity
   288  	// is prepared by updateCRLShard.
   289  	body := CRLShardBody{Parent: ds.KeyForObj(c, &hdr)}
   290  	switch err = ds.Get(c, &body); {
   291  	case err == ds.ErrNoSuchEntity:
   292  		err = fmt.Errorf("shard body %q is missing", hdr.ID)
   293  		return
   294  	case err != nil:
   295  		err = transient.Tag.Apply(err)
   296  		return
   297  	}
   298  
   299  	// Unzip and deserialize.
   300  	blob, err := utils.ZlibDecompress(body.ZippedData)
   301  	if err != nil {
   302  		return
   303  	}
   304  	shard, err := shards.ParseShard(blob)
   305  	if err != nil {
   306  		return
   307  	}
   308  
   309  	newState = shardCache{shard: shard, sha1: body.SHA1}
   310  	return
   311  }