go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/auth_service/impl/model/importer.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package model
    16  
    17  import (
    18  	"archive/tar"
    19  	"compress/gzip"
    20  	"context"
    21  	"fmt"
    22  	"io"
    23  	"regexp"
    24  	"sort"
    25  	"strings"
    26  	"time"
    27  
    28  	"google.golang.org/protobuf/encoding/prototext"
    29  
    30  	"go.chromium.org/luci/auth/identity"
    31  	"go.chromium.org/luci/common/clock"
    32  	"go.chromium.org/luci/common/data/stringset"
    33  	"go.chromium.org/luci/common/errors"
    34  	"go.chromium.org/luci/common/logging"
    35  	"go.chromium.org/luci/gae/service/datastore"
    36  	"go.chromium.org/luci/server/auth"
    37  
    38  	"go.chromium.org/luci/auth_service/api/configspb"
    39  )
    40  
    41  // Imports groups from some external tar.gz bundle or plain text list.
    42  // External URL should serve *.tar.gz file with the following file structure:
    43  //   <external group system name>/<group name>:
    44  //     userid
    45  //     userid
    46  //     ...
    47  
    48  // For example ldap.tar.gz may look like:
    49  //   ldap/trusted-users:
    50  //     jane
    51  //     joe
    52  //     ...
    53  //   ldap/all:
    54  //     jane
    55  //     joe
    56  //     ...
    57  
    58  // Each tarball may have groups from multiple external systems, but groups from
    59  // some external system must not be split between multiple tarballs. When importer
    60  // sees <external group system name>/* in a tarball, it modifies group list from
    61  // that system on the server to match group list in the tarball _exactly_,
    62  // including removal of groups that are on the server, but no longer present in
    63  // the tarball.
    64  
    65  // Plain list format should have one userid per line and can only describe a single
    66  // group in a single system. Such groups will be added to 'external/*' groups
    67  // namespace. Removing such group from importer config will remove it from
    68  // service too.
    69  
    70  // The service can also be configured to accept tarball uploads (instead of
    71  // fetching them). Fetched and uploaded tarballs are handled in the exact same way,
    72  // in particular all caveats related to external group system names apply.
    73  
    74  // GroupImporterConfig is a singleton entity that contains the contents of the imports.cfg file.
    75  type GroupImporterConfig struct {
    76  	Kind string `gae:"$kind,GroupImporterConfig"`
    77  	ID   string `gae:"$id,config"`
    78  
    79  	// ConfigProto is the plaintext copy of the config found at imports.cfg.
    80  	ConfigProto string `gae:"config_proto"`
    81  
    82  	// ConfigRevision is revision version of the config found at imports.cfg.
    83  	ConfigRevision []byte `gae:"config_revision"`
    84  
    85  	// ModifiedBy is the email of the user who modified the cfg.
    86  	ModifiedBy string `gae:"modified_by"`
    87  
    88  	// ModifiedTS is the time when this entity was last modified.
    89  	ModifiedTS time.Time `gae:"modified_ts"`
    90  }
    91  
    92  var GroupNameRe = regexp.MustCompile(`^([a-z\-]+/)?[0-9a-z_\-\.@]{1,100}$`)
    93  
    94  // GroupBundle is a map where k: groupName, v: list of identities belonging to group k.
    95  type GroupBundle = map[string][]identity.Identity
    96  
    97  // GetGroupImporterConfig fetches the GroupImporterConfig entity from the datastore.
    98  //
    99  //	Returns GroupImporterConfig entity if present.
   100  //	Returns datastore.ErrNoSuchEntity if the entity is not present.
   101  //	Returns annotated error for all other errors.
   102  func GetGroupImporterConfig(ctx context.Context) (*GroupImporterConfig, error) {
   103  	groupsCfg := &GroupImporterConfig{
   104  		Kind: "GroupImporterConfig",
   105  		ID:   "config",
   106  	}
   107  
   108  	switch err := datastore.Get(ctx, groupsCfg); {
   109  	case err == nil:
   110  		return groupsCfg, nil
   111  	case errors.Is(err, datastore.ErrNoSuchEntity):
   112  		return nil, err
   113  	default:
   114  		return nil, errors.Annotate(err, "error getting GroupImporterConfig").Err()
   115  	}
   116  }
   117  
   118  // IngestTarball handles upload of tarball's specified in 'tarball_upload' config entries.
   119  // expected to be called in an auth context of the upload PUT request.
   120  //
   121  // returns
   122  //
   123  //	[]string - list of modified groups
   124  //	int64 - authDBRevision
   125  //	error
   126  //		proto translation error
   127  //		entry is nil
   128  //		entry not found in tarball upload config
   129  //		unauthorized uploader
   130  //		bad tarball structure
   131  func IngestTarball(ctx context.Context, name string, content io.Reader) ([]string, int64, error) {
   132  	g, err := GetGroupImporterConfig(ctx)
   133  	if err != nil {
   134  		return nil, 0, err
   135  	}
   136  	gConfigProto, err := g.ToProto()
   137  	if err != nil {
   138  		return nil, 0, errors.Annotate(err, "issue getting proto from config entity").Err()
   139  	}
   140  	caller := auth.CurrentIdentity(ctx)
   141  	var entry *configspb.GroupImporterConfig_TarballUploadEntry
   142  
   143  	// make sure that tarball_upload entry we're looking for is specified in config
   144  	for _, tbu := range gConfigProto.GetTarballUpload() {
   145  		if tbu.Name == name {
   146  			entry = tbu
   147  			break
   148  		}
   149  	}
   150  
   151  	if entry == nil {
   152  		return nil, 0, errors.New("entry is nil")
   153  	}
   154  
   155  	if entry.Name == "" {
   156  		return nil, 0, errors.New("entry not found in tarball upload names")
   157  	}
   158  	if !contains(caller.Email(), entry.AuthorizedUploader) {
   159  		return nil, 0, errors.New(fmt.Sprintf("%q is not an authorized uploader", caller.Email()))
   160  	}
   161  
   162  	bundles, err := loadTarball(ctx, content, entry.GetDomain(), entry.GetSystems(), entry.GetGroups())
   163  	if err != nil {
   164  		return nil, 0, errors.Annotate(err, "bad tarball").Err()
   165  	}
   166  
   167  	return importBundles(ctx, bundles, caller, nil)
   168  }
   169  
   170  // loadTarball unzips tarball with groups and deserializes them.
   171  func loadTarball(ctx context.Context, content io.Reader, domain string, systems, groups []string) (map[string]GroupBundle, error) {
   172  	// map looks like: K: system, V: { K: groupName, V: []identities }
   173  	bundles := make(map[string]GroupBundle)
   174  	entries, err := extractTarArchive(content)
   175  	if err != nil {
   176  		return nil, err
   177  	}
   178  
   179  	// verify system/groupname and then parse blob if valid
   180  	for filename, fileobj := range entries {
   181  		chunks := strings.Split(filename, "/")
   182  		if len(chunks) != 2 || !GroupNameRe.MatchString(chunks[1]) {
   183  			logging.Warningf(ctx, "Skipping file %s, not a valid name", filename)
   184  			continue
   185  		}
   186  		if groups != nil && !contains(filename, groups) {
   187  			continue
   188  		}
   189  		system := chunks[0]
   190  		if !contains(system, systems) {
   191  			logging.Warningf(ctx, "Skipping file %s, not allowed", filename)
   192  			continue
   193  		}
   194  		identities, err := loadGroupFile(string(fileobj), domain)
   195  		if err != nil {
   196  			return nil, err
   197  		}
   198  		if _, ok := bundles[system]; !ok {
   199  			bundles[system] = make(GroupBundle)
   200  		}
   201  		bundles[system][filename] = identities
   202  	}
   203  	return bundles, nil
   204  }
   205  
   206  func loadGroupFile(identities string, domain string) ([]identity.Identity, error) {
   207  	members := make(map[identity.Identity]bool)
   208  	memsSplit := strings.Split(identities, "\n")
   209  	for _, uid := range memsSplit {
   210  		uid = strings.TrimSpace(uid)
   211  		if uid == "" {
   212  			continue
   213  		}
   214  		var ident string
   215  		if domain == "" {
   216  			ident = fmt.Sprintf("user:%s", uid)
   217  		} else {
   218  			ident = fmt.Sprintf("user:%s@%s", uid, domain)
   219  		}
   220  		emailIdent, err := identity.MakeIdentity(ident)
   221  		if err != nil {
   222  			return nil, err
   223  		}
   224  		members[emailIdent] = true
   225  	}
   226  
   227  	membersSorted := make([]identity.Identity, 0, len(members))
   228  	for mem := range members {
   229  		membersSorted = append(membersSorted, mem)
   230  	}
   231  	sort.Slice(membersSorted, func(i, j int) bool {
   232  		return membersSorted[i].Value() < membersSorted[j].Value()
   233  	})
   234  
   235  	return membersSorted, nil
   236  }
   237  
   238  // importBundles imports given set of bundles all at once.
   239  // A bundle is a map with groups that is the result of a processing of some tarball.
   240  // A bundle specifies the desired state of all groups under some system, e.g.
   241  // importBundles({'ldap': {}}, ...) will REMOVE all existing 'ldap/*' groups.
   242  //
   243  // Group names in the bundle are specified in their full prefixed form (with
   244  // system name prefix). An example of expected 'bundles':
   245  //
   246  //	{
   247  //	  'ldap': {
   248  //			'ldap/group': [Identity(...), Identity(...)],
   249  //	  },
   250  //	}
   251  //
   252  // Args:
   253  //
   254  //	bundles: map system name -> GroupBundle
   255  //	providedBy: auth.Identity to put in modifiedBy or createdBy fields.
   256  //
   257  // Returns:
   258  //
   259  //	(list of modified groups,
   260  //	new AuthDB revision number or 0 if no changes,
   261  //	error if issue with writing entities).
   262  func importBundles(ctx context.Context, bundles map[string]GroupBundle, providedBy identity.Identity, testHook func()) ([]string, int64, error) {
   263  	// Nothing to process.
   264  	if len(bundles) == 0 {
   265  		return []string{}, 0, nil
   266  	}
   267  
   268  	getAuthDBRevision := func(ctx context.Context) (int64, error) {
   269  		state, err := GetReplicationState(ctx)
   270  		switch {
   271  		case errors.Is(err, datastore.ErrNoSuchEntity):
   272  			return 0, nil
   273  		case err != nil:
   274  			return -1, err
   275  		default:
   276  			return state.AuthDBRev, nil
   277  		}
   278  	}
   279  
   280  	// Fetches all existing groups and AuthDB revision number.
   281  	groupsSnapshot := func(ctx context.Context) (gMap map[string]*AuthGroup, rev int64, err error) {
   282  		err = datastore.RunInTransaction(ctx, func(ctx context.Context) error {
   283  			groups, err := GetAllAuthGroups(ctx)
   284  			if err != nil {
   285  				return err
   286  			}
   287  			gMap = make(map[string]*AuthGroup, len(groups))
   288  			for _, g := range groups {
   289  				gMap[g.ID] = g
   290  			}
   291  			rev, err = getAuthDBRevision(ctx)
   292  			if err != nil {
   293  				return errors.Annotate(err, "couldn't get AuthDBRev").Err()
   294  			}
   295  			return nil
   296  		}, nil)
   297  		return gMap, rev, err
   298  	}
   299  
   300  	// Transactionally puts and deletes a bunch of entities.
   301  	applyImport := func(expectedRevision int64, entitiesToPut, entitiesToDelete []*AuthGroup, ts time.Time) error {
   302  		// Runs in transaction.
   303  		return runAuthDBChange(ctx, "Imported from group bundles", func(ctx context.Context, cae commitAuthEntity) error {
   304  			rev, err := getAuthDBRevision(ctx)
   305  			if err != nil {
   306  				return err
   307  			}
   308  
   309  			// DB changed between transactions try again.
   310  			if rev != expectedRevision {
   311  				return errors.New("revision numbers don't match")
   312  			}
   313  			for _, e := range entitiesToPut {
   314  				if err := cae(e, ts, providedBy, false); err != nil {
   315  					return err
   316  				}
   317  			}
   318  
   319  			for _, e := range entitiesToDelete {
   320  				if err := cae(e, ts, providedBy, true); err != nil {
   321  					return err
   322  				}
   323  			}
   324  			return nil
   325  		})
   326  	}
   327  
   328  	updatedGroups := stringset.New(0)
   329  	revision := int64(0)
   330  	loopCount := 0
   331  	var groups map[string]*AuthGroup
   332  	var err error
   333  
   334  	// Try to apply the change in batches until it lands completely or deadline
   335  	// happens. Split each batch update into two transactions (assuming AuthDB
   336  	// changes infrequently) to avoid reading and writing too much stuff from
   337  	// within a single transaction (and to avoid keeping the transaction open while
   338  	// calculating the diff).
   339  	for {
   340  		// Use same timestamp everywhere to reflect that groups were imported
   341  		// atomically within a single transaction.
   342  		ts := clock.Now(ctx).UTC()
   343  		loopCount += 1
   344  		groups, revision, err = groupsSnapshot(ctx)
   345  		if err != nil {
   346  			return nil, revision, err
   347  		}
   348  		// For testing purposes only.
   349  		if testHook != nil && loopCount == 2 {
   350  			testHook()
   351  		}
   352  		entitiesToPut := []*AuthGroup{}
   353  		entitiesToDel := []*AuthGroup{}
   354  		for sys := range bundles {
   355  			iGroups := bundles[sys]
   356  			toPut, toDel := prepareImport(ctx, sys, groups, iGroups)
   357  			entitiesToPut = append(entitiesToPut, toPut...)
   358  			entitiesToDel = append(entitiesToDel, toDel...)
   359  		}
   360  
   361  		if len(entitiesToPut) == 0 && len(entitiesToDel) == 0 {
   362  			logging.Infof(ctx, "nothing to do")
   363  			break
   364  		}
   365  
   366  		// An `applyImport` transaction can touch at most 500 entities. Cap the
   367  		// number of entities we create/delete by 200 each since we attach a historical
   368  		// entity to each entity. The rest will be updated on the next cycle of the loop.
   369  		// This is safe to do since:
   370  		//  * Imported groups are "leaf" groups (have no subgroups) and can be added
   371  		//    in arbitrary order without worrying about referential integrity.
   372  		//  * Deleted groups are guaranteed to be unreferenced by `prepareImport`
   373  		//    and can be deleted in arbitrary order as well.
   374  		truncated := false
   375  
   376  		// Both these operations happen in the same transaction so we have
   377  		// to trim it to make sure the total is <= 200.
   378  		if len(entitiesToPut) > 200 {
   379  			entitiesToPut = entitiesToPut[:200]
   380  			entitiesToDel = nil
   381  			truncated = true
   382  		} else if len(entitiesToPut)+len(entitiesToDel) > 200 {
   383  			entitiesToDel = entitiesToDel[:200-len(entitiesToPut)]
   384  			truncated = true
   385  		}
   386  
   387  		// Log what we are about to do to help debugging transaction errors.
   388  		logging.Infof(ctx, "Preparing AuthDB rev %d with %d puts and %d deletes:", revision+1, len(entitiesToPut), len(entitiesToDel))
   389  		for _, e := range entitiesToPut {
   390  			logging.Infof(ctx, "U %s", e.ID)
   391  			updatedGroups.Add(e.ID)
   392  		}
   393  		for _, e := range entitiesToDel {
   394  			logging.Infof(ctx, "D %s", e.ID)
   395  			updatedGroups.Add(e.ID)
   396  		}
   397  
   398  		// Land the change iff the current AuthDB revision is still == `revision`.
   399  		err := applyImport(revision, entitiesToPut, entitiesToDel, ts)
   400  		if err != nil && strings.Contains(err.Error(), "revision numbers don't match") {
   401  			logging.Warningf(ctx, "authdb changed between transactions, retrying...")
   402  			continue
   403  		} else if err != nil {
   404  			logging.Errorf(ctx, "couldn't apply changes to datastore entities %s", err.Error())
   405  			return nil, revision, err
   406  		}
   407  
   408  		// The new revision has landed
   409  		revision += 1
   410  
   411  		if truncated {
   412  			logging.Infof(ctx, "going for another round to push the rest of the groups")
   413  			clock.Sleep(ctx, 5*time.Second)
   414  			continue
   415  		}
   416  
   417  		logging.Infof(ctx, "Done")
   418  		break
   419  	}
   420  
   421  	if len(updatedGroups) > 0 {
   422  		return updatedGroups.ToSortedSlice(), int64(revision), nil
   423  	}
   424  
   425  	return nil, 0, nil
   426  }
   427  
   428  // prepareImport compares the bundle given to the what is currently present in datastore
   429  // to get the operations for all the groups.
   430  func prepareImport(ctx context.Context, systemName string, existingGroups map[string]*AuthGroup, iGroups GroupBundle) (toPut []*AuthGroup, toDel []*AuthGroup) {
   431  	systemGroups := []string{}
   432  	iGroupsSet := stringset.New(len(iGroups))
   433  	for gID := range existingGroups {
   434  		if strings.HasPrefix(gID, fmt.Sprintf("%s/", systemName)) {
   435  			systemGroups = append(systemGroups, gID)
   436  		}
   437  	}
   438  
   439  	for groupName := range iGroups {
   440  		iGroupsSet.Add(groupName)
   441  	}
   442  
   443  	sysGroupsSet := stringset.NewFromSlice(systemGroups...)
   444  
   445  	toCreate := iGroupsSet.Difference(sysGroupsSet).ToSlice()
   446  	for _, g := range toCreate {
   447  		group := makeAuthGroup(ctx, g)
   448  		group.Members = identitiesToStrings(iGroups[g])
   449  		toPut = append(toPut, group)
   450  	}
   451  
   452  	toUpdate := sysGroupsSet.Intersect(iGroupsSet).ToSlice()
   453  	for _, g := range toUpdate {
   454  		importGMems := stringset.NewFromSlice(identitiesToStrings(iGroups[g])...)
   455  		existMems := existingGroups[g].Members
   456  		if !(len(importGMems) == len(existMems) && importGMems.HasAll(existMems...)) {
   457  			group := makeAuthGroup(ctx, g)
   458  			group.Members = importGMems.ToSlice()
   459  			toPut = append(toPut, group)
   460  		}
   461  	}
   462  
   463  	toDelete := sysGroupsSet.Difference(iGroupsSet).ToSlice()
   464  	for _, g := range toDelete {
   465  		group := makeAuthGroup(ctx, g)
   466  		toDel = append(toDel, group)
   467  	}
   468  
   469  	return toPut, toDel
   470  }
   471  
   472  func identitiesToStrings(idents []identity.Identity) []string {
   473  	res := make([]string, len(idents))
   474  	for i, id := range idents {
   475  		res[i] = string(id)
   476  	}
   477  	return res
   478  }
   479  
   480  // extractTarArchive unpacks a tar archive and returns a map
   481  // of filename -> fileobj pairs.
   482  func extractTarArchive(r io.Reader) (map[string][]byte, error) {
   483  	entries := make(map[string][]byte)
   484  	gzr, err := gzip.NewReader(r)
   485  	if err != nil {
   486  		return nil, err
   487  	}
   488  
   489  	tr := tar.NewReader(gzr)
   490  	for {
   491  		header, err := tr.Next()
   492  		if errors.Is(err, io.EOF) {
   493  			break
   494  		}
   495  		if err != nil {
   496  			return nil, err
   497  		}
   498  		fileContents, err := io.ReadAll(tr)
   499  		if err != nil {
   500  			return nil, err
   501  		}
   502  		entries[header.Name] = fileContents
   503  	}
   504  
   505  	if err := gzr.Close(); err != nil {
   506  		return nil, err
   507  	}
   508  	return entries, nil
   509  }
   510  
   511  // TODO(cjacomet): replace with slices.Contains when
   512  // slices package isn't experimental.
   513  func contains(key string, search []string) bool {
   514  	for _, val := range search {
   515  		if val == key {
   516  			return true
   517  		}
   518  	}
   519  	return false
   520  }
   521  
   522  // ToProto converts the GroupImporterConfig entity to the proto equivalent.
   523  func (g *GroupImporterConfig) ToProto() (*configspb.GroupImporterConfig, error) {
   524  	gConfig := &configspb.GroupImporterConfig{}
   525  	if err := prototext.Unmarshal([]byte(g.ConfigProto), gConfig); err != nil {
   526  		return nil, err
   527  	}
   528  	return gConfig, nil
   529  }