go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/appengine/gaeauth/server/internal/authdbimpl/authdb.go (about) 1 // Copyright 2015 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package authdbimpl 16 17 import ( 18 "bytes" 19 "context" 20 "crypto/sha256" 21 "encoding/hex" 22 "fmt" 23 "strings" 24 "time" 25 26 ds "go.chromium.org/luci/gae/service/datastore" 27 28 "go.chromium.org/luci/common/clock" 29 "go.chromium.org/luci/common/errors" 30 "go.chromium.org/luci/common/logging" 31 "go.chromium.org/luci/common/retry/transient" 32 "go.chromium.org/luci/server/auth/service" 33 "go.chromium.org/luci/server/auth/service/protocol" 34 ) 35 36 // maxShardSize is a limit on a blob size to store in a single entity. 37 const maxShardSize = 1020 * 1024 // 1020 KiB 38 39 // SnapshotInfo identifies some concrete AuthDB snapshot. 40 // 41 // Singleton entity. Serves as a pointer to a blob with corresponding AuthDB 42 // proto message (stored in separate Snapshot entity). 43 type SnapshotInfo struct { 44 AuthServiceURL string `gae:",noindex"` 45 Rev int64 `gae:",noindex"` 46 47 _kind string `gae:"$kind,gaeauth.SnapshotInfo"` 48 _id int64 `gae:"$id,1"` 49 } 50 51 // GetSnapshotID returns datastore ID of the corresponding Snapshot entity. 52 func (si *SnapshotInfo) GetSnapshotID() string { 53 if strings.IndexByte(si.AuthServiceURL, ',') != -1 { 54 panic(fmt.Errorf("forbidden symbol ',' in URL %q", si.AuthServiceURL)) 55 } 56 return fmt.Sprintf("v1,%s,%d", si.AuthServiceURL, si.Rev) 57 } 58 59 // Snapshot is serialized deflated AuthDB blob with some minimal metadata. 60 // 61 // Root entity. Immutable. Key has the form "v1,<AuthServiceURL>,<Revision>", 62 // it's generated by SnapshotInfo.GetSnapshotID(). It is globally unique 63 // version identifier, since it includes URL of an auth service. AuthServiceURL 64 // should be not very long (~< 250 chars) for this too work. 65 // 66 // Currently does not get garbage collected. 67 type Snapshot struct { 68 ID string `gae:"$id"` 69 70 // AuthDBDeflated is zlib-compressed serialized AuthDB protobuf message. 71 // 72 // If it is too big, it is stored in a bunch of SnapshotShard entities 73 // referenced by ShardIDs field below. 74 // 75 // Note: if the old version of this code tries to load a new Snapshot entity 76 // with ShardIDs field populated, it would abort with an error because old 77 // code doesn't know about ShardIDs field (it is not in the old Snapshot 78 // entity struct). This is desirable: the new sharded data structure is not 79 // (and can't be made) compatible with old code, so it is good that it breaks 80 // as soon as possible. 81 AuthDBDeflated []byte `gae:",noindex"` 82 83 // ShardIDs is a list of IDs of SnapshotShard entities to fetch. 84 ShardIDs []string `gae:",noindex"` 85 86 CreatedAt time.Time // when it was created on Auth service 87 FetchedAt time.Time // when it was fetched and put into the datastore 88 89 _kind string `gae:"$kind,gaeauth.Snapshot"` 90 } 91 92 // SnapshotShard holds a shard of a deflated AuthDB. 93 type SnapshotShard struct { 94 // ID is "<Snapshot ID>:<shard hash>". 95 ID string `gae:"$id"` 96 // Shard is the actual data. 97 Shard []byte `gae:",noindex"` 98 99 _kind string `gae:"$kind,gaeauth.SnapshotShard"` 100 } 101 102 // GetLatestSnapshotInfo fetches SnapshotInfo singleton entity. 103 // 104 // If no such entity is stored, returns (nil, nil). 105 func GetLatestSnapshotInfo(ctx context.Context) (*SnapshotInfo, error) { 106 report := durationReporter(ctx, latestSnapshotInfoDuration) 107 logging.Debugf(ctx, "Fetching AuthDB snapshot info from the datastore") 108 ctx = ds.WithoutTransaction(defaultNS(ctx)) 109 info := SnapshotInfo{} 110 switch err := ds.Get(ctx, &info); { 111 case err == ds.ErrNoSuchEntity: 112 report("SUCCESS") 113 return nil, nil 114 case err != nil: 115 report("ERROR_TRANSIENT") 116 return nil, transient.Tag.Apply(err) 117 default: 118 report("SUCCESS") 119 return &info, nil 120 } 121 } 122 123 // deleteSnapshotInfo removes SnapshotInfo entity from the datastore. 124 // 125 // Used to detach the service from auth_service. 126 func deleteSnapshotInfo(ctx context.Context) error { 127 ctx = ds.WithoutTransaction(ctx) 128 return ds.Delete(ctx, ds.KeyForObj(ctx, &SnapshotInfo{})) 129 } 130 131 // GetAuthDBSnapshot fetches, inflates and deserializes AuthDB snapshot. 132 func GetAuthDBSnapshot(ctx context.Context, id string) (*protocol.AuthDB, error) { 133 report := durationReporter(ctx, getSnapshotDuration) 134 logging.Debugf(ctx, "Fetching AuthDB snapshot from the datastore") 135 defer logging.Debugf(ctx, "AuthDB snapshot fetched") 136 137 blob, code, err := fetchDeflated(ctx, id) 138 if err != nil { 139 report(code) 140 return nil, err 141 } 142 143 db, err := service.InflateAuthDB(blob) 144 if err != nil { 145 report("ERROR_INFLATION") 146 return nil, err 147 } 148 149 report("SUCCESS") 150 return db, nil 151 } 152 153 // fetchDeflated fetches a deflated AuthDB from datastore, perhaps reassembling 154 // it from shards. 155 // 156 // See also storeDeflated. 157 func fetchDeflated(ctx context.Context, id string) (blob []byte, code string, err error) { 158 ctx = ds.WithoutTransaction(defaultNS(ctx)) 159 160 snap := Snapshot{ID: id} 161 162 switch err = ds.Get(ctx, &snap); { 163 case err == ds.ErrNoSuchEntity: 164 return nil, "ERROR_NO_SNAPSHOT", err // not transient 165 case err != nil: 166 return nil, "ERROR_TRANSIENT", transient.Tag.Apply(err) 167 } 168 169 if len(snap.ShardIDs) != 0 { 170 logging.Infof(ctx, "Reconstructing from %d shards", len(snap.ShardIDs)) 171 switch snap.AuthDBDeflated, err = unshardAuthDB(ctx, snap.ShardIDs); { 172 case transient.Tag.In(err): 173 return nil, "ERROR_SHARDS_TRANSIENT", err 174 case err != nil: 175 // We apply the transient tag here to return Internal code 176 // instead of Unauthenticated code. The Unauthenticated code 177 // is misleading when we encountered an error in unshardAuthDB. 178 // https://source.chromium.org/chromium/infra/infra/+/main:go/src/go.chromium.org/luci/server/auth/auth.go;l=272 179 return nil, "ERROR_SHARDS_MISSING", transient.Tag.Apply(err) 180 } 181 } 182 183 return snap.AuthDBDeflated, "SUCCESS", nil 184 } 185 186 // ConfigureAuthService makes initial fetch of AuthDB snapshot from the auth 187 // service and sets up PubSub subscription. 188 // 189 // `baseURL` is root URL of currently running service, will be used to derive 190 // PubSub push endpoint URL. 191 // 192 // If `authServiceURL` is blank, disables the fetching. 193 func ConfigureAuthService(ctx context.Context, baseURL, authServiceURL string) error { 194 logging.Infof(ctx, "Reconfiguring AuthDB to be fetched from %q", authServiceURL) 195 ctx = defaultNS(ctx) 196 197 // If switching auth services, need to grab URL of a currently configured 198 // auth service to unsubscribe from its PubSub stream. 199 prevAuthServiceURL := "" 200 switch existing, err := GetLatestSnapshotInfo(ctx); { 201 case err != nil: 202 return err 203 case existing != nil: 204 prevAuthServiceURL = existing.AuthServiceURL 205 } 206 207 // Stopping synchronization completely? 208 if authServiceURL == "" { 209 if prevAuthServiceURL != "" { 210 if err := killPubSub(ctx, prevAuthServiceURL); err != nil { 211 return err 212 } 213 } 214 return deleteSnapshotInfo(ctx) 215 } 216 217 // Fetch latest AuthDB snapshot and store it in the datastore, thus verifying 218 // authServiceURL works end-to-end. 219 srv := getAuthService(ctx, authServiceURL) 220 latestRev, err := srv.GetLatestSnapshotRevision(ctx) 221 if err != nil { 222 return err 223 } 224 info := &SnapshotInfo{ 225 AuthServiceURL: authServiceURL, 226 Rev: latestRev, 227 } 228 if err := fetchSnapshot(ctx, info); err != nil { 229 logging.Errorf(ctx, "Failed to fetch latest snapshot from %s - %s", authServiceURL, err) 230 return err 231 } 232 233 // Configure PubSub subscription to receive future updates. 234 if err := setupPubSub(ctx, baseURL, authServiceURL); err != nil { 235 logging.Errorf(ctx, "Failed to configure pubsub subscription - %s", err) 236 return err 237 } 238 239 // All is configured. Switch SnapshotInfo entity to point to new snapshot. 240 // It makes syncAuthDB fetch changes from `authServiceURL`, thus promoting 241 // `authServiceURL` to the status of main auth service. 242 if err := ds.Put(ds.WithoutTransaction(ctx), info); err != nil { 243 return transient.Tag.Apply(err) 244 } 245 246 // Stop getting notifications from previously used auth service. 247 if prevAuthServiceURL != "" && prevAuthServiceURL != authServiceURL { 248 return killPubSub(ctx, prevAuthServiceURL) 249 } 250 251 return nil 252 } 253 254 // fetchSnapshot fetches AuthDB snapshot specified by `info` and puts it into 255 // the datastore. 256 // 257 // Idempotent. Doesn't touch SnapshotInfo entity itself, and thus always safe 258 // to call. 259 func fetchSnapshot(ctx context.Context, info *SnapshotInfo) error { 260 srv := getAuthService(ctx, info.AuthServiceURL) 261 snap, err := srv.GetSnapshot(ctx, info.Rev) 262 if err != nil { 263 return err 264 } 265 blob, err := service.DeflateAuthDB(snap.AuthDB) 266 if err != nil { 267 return err 268 } 269 if err := storeDeflated(ctx, info.GetSnapshotID(), blob, snap.Created, maxShardSize); err != nil { 270 return err 271 } 272 logging.Infof(ctx, "Lag: %s", clock.Now(ctx).Sub(snap.Created)) 273 return nil 274 } 275 276 // storeDeflated stores a deflated AuthDB into datastore, perhaps splitting it 277 // into shards. 278 // 279 // See also fetchDeflated. 280 func storeDeflated(ctx context.Context, id string, blob []byte, created time.Time, maxShardSize int) error { 281 ctx = ds.WithoutTransaction(defaultNS(ctx)) 282 283 snapshot := Snapshot{ 284 ID: id, 285 CreatedAt: created.UTC(), 286 FetchedAt: clock.Now(ctx).UTC(), 287 } 288 289 // If we are able to store AuthDB inline in the Snapshot, do it. That way 290 // older versions of this code can still successfully read it. If it doesn't 291 // fit, there's nothing we can do other than to store it separately in shards. 292 // The old code will see unrecognized ShardIDs field and will fail. 293 if len(blob) < maxShardSize { 294 snapshot.AuthDBDeflated = blob 295 } else { 296 var err error 297 if snapshot.ShardIDs, err = shardAuthDB(ctx, id, blob, maxShardSize); err != nil { 298 return err 299 } 300 logging.Infof(ctx, "Split into %d shards", len(snapshot.ShardIDs)) 301 } 302 303 return transient.Tag.Apply(ds.Put(ctx, &snapshot)) 304 } 305 306 // syncAuthDB fetches latest AuthDB snapshot from the configured auth service, 307 // puts it into the datastore and updates SnapshotInfo entity to point to it. 308 // 309 // Expects authenticating transport to be in the context. Called when receiving 310 // PubSub notifications. 311 // 312 // Returns SnapshotInfo of the most recent snapshot. 313 func syncAuthDB(ctx context.Context) (*SnapshotInfo, error) { 314 report := durationReporter(ctx, syncAuthDBDuration) 315 316 // `info` is what we have in the datastore now. 317 info, err := GetLatestSnapshotInfo(ctx) 318 if err != nil { 319 report("ERROR_GET_LATEST_INFO") 320 return nil, err 321 } 322 if info == nil { 323 report("ERROR_NOT_CONFIGURED") 324 return nil, errors.New("auth_service URL is not configured") 325 } 326 327 // Grab revision number of the latest snapshot on the server. 328 srv := getAuthService(ctx, info.AuthServiceURL) 329 latestRev, err := srv.GetLatestSnapshotRevision(ctx) 330 if err != nil { 331 report("ERROR_GET_LATEST_REVISION") 332 return nil, err 333 } 334 335 // Nothing new? 336 if info.Rev == latestRev { 337 logging.Infof(ctx, "AuthDB is up-to-date at revision %d", latestRev) 338 report("SUCCESS_UP_TO_DATE") 339 return info, nil 340 } 341 342 // Auth service traveled back in time? 343 if info.Rev > latestRev { 344 logging.Errorf( 345 ctx, "Latest AuthDB revision on server is %d, we have %d. It should not happen", 346 latestRev, info.Rev) 347 report("SUCCESS_NEWER_ALREADY") 348 return info, nil 349 } 350 351 // Fetch the actual snapshot from the server and put it into the datastore. 352 info.Rev = latestRev 353 if err = fetchSnapshot(ctx, info); err != nil { 354 logging.Errorf(ctx, "Failed to fetch snapshot %d from %q - %s", info.Rev, info.AuthServiceURL, err) 355 report("ERROR_FETCHING") 356 return nil, err 357 } 358 359 // Move pointer to the latest snapshot only if it is more recent than what is 360 // already in the datastore. 361 var latest *SnapshotInfo 362 err = ds.RunInTransaction(ds.WithoutTransaction(ctx), func(ctx context.Context) error { 363 latest = &SnapshotInfo{} 364 switch err := ds.Get(ctx, latest); { 365 case err == ds.ErrNoSuchEntity: 366 logging.Warningf(ctx, "No longer need to fetch AuthDB, not configured anymore") 367 return nil 368 case err != nil: 369 return err 370 case latest.AuthServiceURL != info.AuthServiceURL: 371 logging.Warningf( 372 ctx, "No longer need to fetch AuthDB from %q, %q is primary now", 373 info.AuthServiceURL, latest.AuthServiceURL) 374 return nil 375 case latest.Rev >= info.Rev: 376 logging.Warningf(ctx, "Already have rev %d", info.Rev) 377 return nil 378 } 379 latest = info 380 return ds.Put(ctx, info) 381 }, nil) 382 383 if err != nil { 384 report("ERROR_COMMITTING") 385 return nil, transient.Tag.Apply(err) 386 } 387 388 report("SUCCESS_UPDATED") 389 return latest, nil 390 } 391 392 // shardAuthDB splits an AuthDB blob into multiple SnapshotShard entities. 393 func shardAuthDB(ctx context.Context, id string, blob []byte, maxSize int) ([]string, error) { 394 var ids []string 395 396 var shard []byte 397 for len(blob) != 0 { 398 shardSize := maxSize 399 if shardSize > len(blob) { 400 shardSize = len(blob) 401 } 402 shard, blob = blob[:shardSize], blob[shardSize:] 403 404 digest := sha256.Sum256(shard) 405 shardID := fmt.Sprintf("%s:%s", id, hex.EncodeToString(digest[:])) 406 ids = append(ids, shardID) 407 408 // Store shards sequentially to avoid allocating RAM to store full `blob` in 409 // RPC buffers. There's no requirement for this code to be performant, it 410 // executes in a background job. 411 err := ds.Put(ctx, &SnapshotShard{ID: shardID, Shard: shard}) 412 if err != nil { 413 return nil, transient.Tag.Apply(err) 414 } 415 } 416 417 return ids, nil 418 } 419 420 // unshardAuthDB fetches SnapshotShard entities and reassembles the AuthDB blob. 421 func unshardAuthDB(ctx context.Context, shardIDs []string) ([]byte, error) { 422 shards := make([]SnapshotShard, len(shardIDs)) 423 for idx, id := range shardIDs { 424 shards[idx].ID = id 425 } 426 427 if err := ds.Get(ctx, shards); err != nil { 428 if merr, ok := err.(errors.MultiError); ok { 429 for _, inner := range merr { 430 if inner == ds.ErrNoSuchEntity { 431 return nil, err // fatal 432 } 433 } 434 return nil, transient.Tag.Apply(err) 435 } else { 436 // Overall RPC error. 437 return nil, transient.Tag.Apply(err) 438 } 439 } 440 441 slices := make([][]byte, len(shards)) 442 for idx, shard := range shards { 443 slices[idx] = shard.Shard 444 } 445 return bytes.Join(slices, nil), nil 446 }