go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/auth_service/impl/model/importer.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package model 16 17 import ( 18 "archive/tar" 19 "compress/gzip" 20 "context" 21 "fmt" 22 "io" 23 "regexp" 24 "sort" 25 "strings" 26 "time" 27 28 "google.golang.org/protobuf/encoding/prototext" 29 30 "go.chromium.org/luci/auth/identity" 31 "go.chromium.org/luci/common/clock" 32 "go.chromium.org/luci/common/data/stringset" 33 "go.chromium.org/luci/common/errors" 34 "go.chromium.org/luci/common/logging" 35 "go.chromium.org/luci/gae/service/datastore" 36 "go.chromium.org/luci/server/auth" 37 38 "go.chromium.org/luci/auth_service/api/configspb" 39 ) 40 41 // Imports groups from some external tar.gz bundle or plain text list. 42 // External URL should serve *.tar.gz file with the following file structure: 43 // <external group system name>/<group name>: 44 // userid 45 // userid 46 // ... 47 48 // For example ldap.tar.gz may look like: 49 // ldap/trusted-users: 50 // jane 51 // joe 52 // ... 53 // ldap/all: 54 // jane 55 // joe 56 // ... 57 58 // Each tarball may have groups from multiple external systems, but groups from 59 // some external system must not be split between multiple tarballs. When importer 60 // sees <external group system name>/* in a tarball, it modifies group list from 61 // that system on the server to match group list in the tarball _exactly_, 62 // including removal of groups that are on the server, but no longer present in 63 // the tarball. 64 65 // Plain list format should have one userid per line and can only describe a single 66 // group in a single system. Such groups will be added to 'external/*' groups 67 // namespace. Removing such group from importer config will remove it from 68 // service too. 69 70 // The service can also be configured to accept tarball uploads (instead of 71 // fetching them). Fetched and uploaded tarballs are handled in the exact same way, 72 // in particular all caveats related to external group system names apply. 73 74 // GroupImporterConfig is a singleton entity that contains the contents of the imports.cfg file. 75 type GroupImporterConfig struct { 76 Kind string `gae:"$kind,GroupImporterConfig"` 77 ID string `gae:"$id,config"` 78 79 // ConfigProto is the plaintext copy of the config found at imports.cfg. 80 ConfigProto string `gae:"config_proto"` 81 82 // ConfigRevision is revision version of the config found at imports.cfg. 83 ConfigRevision []byte `gae:"config_revision"` 84 85 // ModifiedBy is the email of the user who modified the cfg. 86 ModifiedBy string `gae:"modified_by"` 87 88 // ModifiedTS is the time when this entity was last modified. 89 ModifiedTS time.Time `gae:"modified_ts"` 90 } 91 92 var GroupNameRe = regexp.MustCompile(`^([a-z\-]+/)?[0-9a-z_\-\.@]{1,100}$`) 93 94 // GroupBundle is a map where k: groupName, v: list of identities belonging to group k. 95 type GroupBundle = map[string][]identity.Identity 96 97 // GetGroupImporterConfig fetches the GroupImporterConfig entity from the datastore. 98 // 99 // Returns GroupImporterConfig entity if present. 100 // Returns datastore.ErrNoSuchEntity if the entity is not present. 101 // Returns annotated error for all other errors. 102 func GetGroupImporterConfig(ctx context.Context) (*GroupImporterConfig, error) { 103 groupsCfg := &GroupImporterConfig{ 104 Kind: "GroupImporterConfig", 105 ID: "config", 106 } 107 108 switch err := datastore.Get(ctx, groupsCfg); { 109 case err == nil: 110 return groupsCfg, nil 111 case errors.Is(err, datastore.ErrNoSuchEntity): 112 return nil, err 113 default: 114 return nil, errors.Annotate(err, "error getting GroupImporterConfig").Err() 115 } 116 } 117 118 // IngestTarball handles upload of tarball's specified in 'tarball_upload' config entries. 119 // expected to be called in an auth context of the upload PUT request. 120 // 121 // returns 122 // 123 // []string - list of modified groups 124 // int64 - authDBRevision 125 // error 126 // proto translation error 127 // entry is nil 128 // entry not found in tarball upload config 129 // unauthorized uploader 130 // bad tarball structure 131 func IngestTarball(ctx context.Context, name string, content io.Reader) ([]string, int64, error) { 132 g, err := GetGroupImporterConfig(ctx) 133 if err != nil { 134 return nil, 0, err 135 } 136 gConfigProto, err := g.ToProto() 137 if err != nil { 138 return nil, 0, errors.Annotate(err, "issue getting proto from config entity").Err() 139 } 140 caller := auth.CurrentIdentity(ctx) 141 var entry *configspb.GroupImporterConfig_TarballUploadEntry 142 143 // make sure that tarball_upload entry we're looking for is specified in config 144 for _, tbu := range gConfigProto.GetTarballUpload() { 145 if tbu.Name == name { 146 entry = tbu 147 break 148 } 149 } 150 151 if entry == nil { 152 return nil, 0, errors.New("entry is nil") 153 } 154 155 if entry.Name == "" { 156 return nil, 0, errors.New("entry not found in tarball upload names") 157 } 158 if !contains(caller.Email(), entry.AuthorizedUploader) { 159 return nil, 0, errors.New(fmt.Sprintf("%q is not an authorized uploader", caller.Email())) 160 } 161 162 bundles, err := loadTarball(ctx, content, entry.GetDomain(), entry.GetSystems(), entry.GetGroups()) 163 if err != nil { 164 return nil, 0, errors.Annotate(err, "bad tarball").Err() 165 } 166 167 return importBundles(ctx, bundles, caller, nil) 168 } 169 170 // loadTarball unzips tarball with groups and deserializes them. 171 func loadTarball(ctx context.Context, content io.Reader, domain string, systems, groups []string) (map[string]GroupBundle, error) { 172 // map looks like: K: system, V: { K: groupName, V: []identities } 173 bundles := make(map[string]GroupBundle) 174 entries, err := extractTarArchive(content) 175 if err != nil { 176 return nil, err 177 } 178 179 // verify system/groupname and then parse blob if valid 180 for filename, fileobj := range entries { 181 chunks := strings.Split(filename, "/") 182 if len(chunks) != 2 || !GroupNameRe.MatchString(chunks[1]) { 183 logging.Warningf(ctx, "Skipping file %s, not a valid name", filename) 184 continue 185 } 186 if groups != nil && !contains(filename, groups) { 187 continue 188 } 189 system := chunks[0] 190 if !contains(system, systems) { 191 logging.Warningf(ctx, "Skipping file %s, not allowed", filename) 192 continue 193 } 194 identities, err := loadGroupFile(string(fileobj), domain) 195 if err != nil { 196 return nil, err 197 } 198 if _, ok := bundles[system]; !ok { 199 bundles[system] = make(GroupBundle) 200 } 201 bundles[system][filename] = identities 202 } 203 return bundles, nil 204 } 205 206 func loadGroupFile(identities string, domain string) ([]identity.Identity, error) { 207 members := make(map[identity.Identity]bool) 208 memsSplit := strings.Split(identities, "\n") 209 for _, uid := range memsSplit { 210 uid = strings.TrimSpace(uid) 211 if uid == "" { 212 continue 213 } 214 var ident string 215 if domain == "" { 216 ident = fmt.Sprintf("user:%s", uid) 217 } else { 218 ident = fmt.Sprintf("user:%s@%s", uid, domain) 219 } 220 emailIdent, err := identity.MakeIdentity(ident) 221 if err != nil { 222 return nil, err 223 } 224 members[emailIdent] = true 225 } 226 227 membersSorted := make([]identity.Identity, 0, len(members)) 228 for mem := range members { 229 membersSorted = append(membersSorted, mem) 230 } 231 sort.Slice(membersSorted, func(i, j int) bool { 232 return membersSorted[i].Value() < membersSorted[j].Value() 233 }) 234 235 return membersSorted, nil 236 } 237 238 // importBundles imports given set of bundles all at once. 239 // A bundle is a map with groups that is the result of a processing of some tarball. 240 // A bundle specifies the desired state of all groups under some system, e.g. 241 // importBundles({'ldap': {}}, ...) will REMOVE all existing 'ldap/*' groups. 242 // 243 // Group names in the bundle are specified in their full prefixed form (with 244 // system name prefix). An example of expected 'bundles': 245 // 246 // { 247 // 'ldap': { 248 // 'ldap/group': [Identity(...), Identity(...)], 249 // }, 250 // } 251 // 252 // Args: 253 // 254 // bundles: map system name -> GroupBundle 255 // providedBy: auth.Identity to put in modifiedBy or createdBy fields. 256 // 257 // Returns: 258 // 259 // (list of modified groups, 260 // new AuthDB revision number or 0 if no changes, 261 // error if issue with writing entities). 262 func importBundles(ctx context.Context, bundles map[string]GroupBundle, providedBy identity.Identity, testHook func()) ([]string, int64, error) { 263 // Nothing to process. 264 if len(bundles) == 0 { 265 return []string{}, 0, nil 266 } 267 268 getAuthDBRevision := func(ctx context.Context) (int64, error) { 269 state, err := GetReplicationState(ctx) 270 switch { 271 case errors.Is(err, datastore.ErrNoSuchEntity): 272 return 0, nil 273 case err != nil: 274 return -1, err 275 default: 276 return state.AuthDBRev, nil 277 } 278 } 279 280 // Fetches all existing groups and AuthDB revision number. 281 groupsSnapshot := func(ctx context.Context) (gMap map[string]*AuthGroup, rev int64, err error) { 282 err = datastore.RunInTransaction(ctx, func(ctx context.Context) error { 283 groups, err := GetAllAuthGroups(ctx) 284 if err != nil { 285 return err 286 } 287 gMap = make(map[string]*AuthGroup, len(groups)) 288 for _, g := range groups { 289 gMap[g.ID] = g 290 } 291 rev, err = getAuthDBRevision(ctx) 292 if err != nil { 293 return errors.Annotate(err, "couldn't get AuthDBRev").Err() 294 } 295 return nil 296 }, nil) 297 return gMap, rev, err 298 } 299 300 // Transactionally puts and deletes a bunch of entities. 301 applyImport := func(expectedRevision int64, entitiesToPut, entitiesToDelete []*AuthGroup, ts time.Time) error { 302 // Runs in transaction. 303 return runAuthDBChange(ctx, "Imported from group bundles", func(ctx context.Context, cae commitAuthEntity) error { 304 rev, err := getAuthDBRevision(ctx) 305 if err != nil { 306 return err 307 } 308 309 // DB changed between transactions try again. 310 if rev != expectedRevision { 311 return errors.New("revision numbers don't match") 312 } 313 for _, e := range entitiesToPut { 314 if err := cae(e, ts, providedBy, false); err != nil { 315 return err 316 } 317 } 318 319 for _, e := range entitiesToDelete { 320 if err := cae(e, ts, providedBy, true); err != nil { 321 return err 322 } 323 } 324 return nil 325 }) 326 } 327 328 updatedGroups := stringset.New(0) 329 revision := int64(0) 330 loopCount := 0 331 var groups map[string]*AuthGroup 332 var err error 333 334 // Try to apply the change in batches until it lands completely or deadline 335 // happens. Split each batch update into two transactions (assuming AuthDB 336 // changes infrequently) to avoid reading and writing too much stuff from 337 // within a single transaction (and to avoid keeping the transaction open while 338 // calculating the diff). 339 for { 340 // Use same timestamp everywhere to reflect that groups were imported 341 // atomically within a single transaction. 342 ts := clock.Now(ctx).UTC() 343 loopCount += 1 344 groups, revision, err = groupsSnapshot(ctx) 345 if err != nil { 346 return nil, revision, err 347 } 348 // For testing purposes only. 349 if testHook != nil && loopCount == 2 { 350 testHook() 351 } 352 entitiesToPut := []*AuthGroup{} 353 entitiesToDel := []*AuthGroup{} 354 for sys := range bundles { 355 iGroups := bundles[sys] 356 toPut, toDel := prepareImport(ctx, sys, groups, iGroups) 357 entitiesToPut = append(entitiesToPut, toPut...) 358 entitiesToDel = append(entitiesToDel, toDel...) 359 } 360 361 if len(entitiesToPut) == 0 && len(entitiesToDel) == 0 { 362 logging.Infof(ctx, "nothing to do") 363 break 364 } 365 366 // An `applyImport` transaction can touch at most 500 entities. Cap the 367 // number of entities we create/delete by 200 each since we attach a historical 368 // entity to each entity. The rest will be updated on the next cycle of the loop. 369 // This is safe to do since: 370 // * Imported groups are "leaf" groups (have no subgroups) and can be added 371 // in arbitrary order without worrying about referential integrity. 372 // * Deleted groups are guaranteed to be unreferenced by `prepareImport` 373 // and can be deleted in arbitrary order as well. 374 truncated := false 375 376 // Both these operations happen in the same transaction so we have 377 // to trim it to make sure the total is <= 200. 378 if len(entitiesToPut) > 200 { 379 entitiesToPut = entitiesToPut[:200] 380 entitiesToDel = nil 381 truncated = true 382 } else if len(entitiesToPut)+len(entitiesToDel) > 200 { 383 entitiesToDel = entitiesToDel[:200-len(entitiesToPut)] 384 truncated = true 385 } 386 387 // Log what we are about to do to help debugging transaction errors. 388 logging.Infof(ctx, "Preparing AuthDB rev %d with %d puts and %d deletes:", revision+1, len(entitiesToPut), len(entitiesToDel)) 389 for _, e := range entitiesToPut { 390 logging.Infof(ctx, "U %s", e.ID) 391 updatedGroups.Add(e.ID) 392 } 393 for _, e := range entitiesToDel { 394 logging.Infof(ctx, "D %s", e.ID) 395 updatedGroups.Add(e.ID) 396 } 397 398 // Land the change iff the current AuthDB revision is still == `revision`. 399 err := applyImport(revision, entitiesToPut, entitiesToDel, ts) 400 if err != nil && strings.Contains(err.Error(), "revision numbers don't match") { 401 logging.Warningf(ctx, "authdb changed between transactions, retrying...") 402 continue 403 } else if err != nil { 404 logging.Errorf(ctx, "couldn't apply changes to datastore entities %s", err.Error()) 405 return nil, revision, err 406 } 407 408 // The new revision has landed 409 revision += 1 410 411 if truncated { 412 logging.Infof(ctx, "going for another round to push the rest of the groups") 413 clock.Sleep(ctx, 5*time.Second) 414 continue 415 } 416 417 logging.Infof(ctx, "Done") 418 break 419 } 420 421 if len(updatedGroups) > 0 { 422 return updatedGroups.ToSortedSlice(), int64(revision), nil 423 } 424 425 return nil, 0, nil 426 } 427 428 // prepareImport compares the bundle given to the what is currently present in datastore 429 // to get the operations for all the groups. 430 func prepareImport(ctx context.Context, systemName string, existingGroups map[string]*AuthGroup, iGroups GroupBundle) (toPut []*AuthGroup, toDel []*AuthGroup) { 431 systemGroups := []string{} 432 iGroupsSet := stringset.New(len(iGroups)) 433 for gID := range existingGroups { 434 if strings.HasPrefix(gID, fmt.Sprintf("%s/", systemName)) { 435 systemGroups = append(systemGroups, gID) 436 } 437 } 438 439 for groupName := range iGroups { 440 iGroupsSet.Add(groupName) 441 } 442 443 sysGroupsSet := stringset.NewFromSlice(systemGroups...) 444 445 toCreate := iGroupsSet.Difference(sysGroupsSet).ToSlice() 446 for _, g := range toCreate { 447 group := makeAuthGroup(ctx, g) 448 group.Members = identitiesToStrings(iGroups[g]) 449 toPut = append(toPut, group) 450 } 451 452 toUpdate := sysGroupsSet.Intersect(iGroupsSet).ToSlice() 453 for _, g := range toUpdate { 454 importGMems := stringset.NewFromSlice(identitiesToStrings(iGroups[g])...) 455 existMems := existingGroups[g].Members 456 if !(len(importGMems) == len(existMems) && importGMems.HasAll(existMems...)) { 457 group := makeAuthGroup(ctx, g) 458 group.Members = importGMems.ToSlice() 459 toPut = append(toPut, group) 460 } 461 } 462 463 toDelete := sysGroupsSet.Difference(iGroupsSet).ToSlice() 464 for _, g := range toDelete { 465 group := makeAuthGroup(ctx, g) 466 toDel = append(toDel, group) 467 } 468 469 return toPut, toDel 470 } 471 472 func identitiesToStrings(idents []identity.Identity) []string { 473 res := make([]string, len(idents)) 474 for i, id := range idents { 475 res[i] = string(id) 476 } 477 return res 478 } 479 480 // extractTarArchive unpacks a tar archive and returns a map 481 // of filename -> fileobj pairs. 482 func extractTarArchive(r io.Reader) (map[string][]byte, error) { 483 entries := make(map[string][]byte) 484 gzr, err := gzip.NewReader(r) 485 if err != nil { 486 return nil, err 487 } 488 489 tr := tar.NewReader(gzr) 490 for { 491 header, err := tr.Next() 492 if errors.Is(err, io.EOF) { 493 break 494 } 495 if err != nil { 496 return nil, err 497 } 498 fileContents, err := io.ReadAll(tr) 499 if err != nil { 500 return nil, err 501 } 502 entries[header.Name] = fileContents 503 } 504 505 if err := gzr.Close(); err != nil { 506 return nil, err 507 } 508 return entries, nil 509 } 510 511 // TODO(cjacomet): replace with slices.Contains when 512 // slices package isn't experimental. 513 func contains(key string, search []string) bool { 514 for _, val := range search { 515 if val == key { 516 return true 517 } 518 } 519 return false 520 } 521 522 // ToProto converts the GroupImporterConfig entity to the proto equivalent. 523 func (g *GroupImporterConfig) ToProto() (*configspb.GroupImporterConfig, error) { 524 gConfig := &configspb.GroupImporterConfig{} 525 if err := prototext.Unmarshal([]byte(g.ConfigProto), gConfig); err != nil { 526 return nil, err 527 } 528 return gConfig, nil 529 }