code.gitea.io/gitea@v1.22.3/services/repository/contributors_graph.go (about)

     1  // Copyright 2023 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package repository
     5  
     6  import (
     7  	"bufio"
     8  	"context"
     9  	"errors"
    10  	"fmt"
    11  	"os"
    12  	"strconv"
    13  	"strings"
    14  	"sync"
    15  	"time"
    16  
    17  	"code.gitea.io/gitea/models/avatars"
    18  	repo_model "code.gitea.io/gitea/models/repo"
    19  	user_model "code.gitea.io/gitea/models/user"
    20  	"code.gitea.io/gitea/modules/cache"
    21  	"code.gitea.io/gitea/modules/git"
    22  	"code.gitea.io/gitea/modules/gitrepo"
    23  	"code.gitea.io/gitea/modules/graceful"
    24  	"code.gitea.io/gitea/modules/log"
    25  	api "code.gitea.io/gitea/modules/structs"
    26  )
    27  
    28  const (
    29  	contributorStatsCacheKey           = "GetContributorStats/%s/%s"
    30  	contributorStatsCacheTimeout int64 = 60 * 10
    31  )
    32  
    33  var (
    34  	ErrAwaitGeneration  = errors.New("generation took longer than ")
    35  	awaitGenerationTime = time.Second * 5
    36  	generateLock        = sync.Map{}
    37  )
    38  
    39  type WeekData struct {
    40  	Week      int64 `json:"week"`      // Starting day of the week as Unix timestamp
    41  	Additions int   `json:"additions"` // Number of additions in that week
    42  	Deletions int   `json:"deletions"` // Number of deletions in that week
    43  	Commits   int   `json:"commits"`   // Number of commits in that week
    44  }
    45  
    46  // ContributorData represents statistical git commit count data
    47  type ContributorData struct {
    48  	Name         string              `json:"name"`  // Display name of the contributor
    49  	Login        string              `json:"login"` // Login name of the contributor in case it exists
    50  	AvatarLink   string              `json:"avatar_link"`
    51  	HomeLink     string              `json:"home_link"`
    52  	TotalCommits int64               `json:"total_commits"`
    53  	Weeks        map[int64]*WeekData `json:"weeks"`
    54  }
    55  
    56  // ExtendedCommitStats contains information for commit stats with author data
    57  type ExtendedCommitStats struct {
    58  	Author *api.CommitUser  `json:"author"`
    59  	Stats  *api.CommitStats `json:"stats"`
    60  }
    61  
    62  const layout = time.DateOnly
    63  
    64  func findLastSundayBeforeDate(dateStr string) (string, error) {
    65  	date, err := time.Parse(layout, dateStr)
    66  	if err != nil {
    67  		return "", err
    68  	}
    69  
    70  	weekday := date.Weekday()
    71  	daysToSubtract := int(weekday) - int(time.Sunday)
    72  	if daysToSubtract < 0 {
    73  		daysToSubtract += 7
    74  	}
    75  
    76  	lastSunday := date.AddDate(0, 0, -daysToSubtract)
    77  	return lastSunday.Format(layout), nil
    78  }
    79  
    80  // GetContributorStats returns contributors stats for git commits for given revision or default branch
    81  func GetContributorStats(ctx context.Context, cache cache.StringCache, repo *repo_model.Repository, revision string) (map[string]*ContributorData, error) {
    82  	// as GetContributorStats is resource intensive we cache the result
    83  	cacheKey := fmt.Sprintf(contributorStatsCacheKey, repo.FullName(), revision)
    84  	if !cache.IsExist(cacheKey) {
    85  		genReady := make(chan struct{})
    86  
    87  		// dont start multiple async generations
    88  		_, run := generateLock.Load(cacheKey)
    89  		if run {
    90  			return nil, ErrAwaitGeneration
    91  		}
    92  
    93  		generateLock.Store(cacheKey, struct{}{})
    94  		// run generation async
    95  		go generateContributorStats(genReady, cache, cacheKey, repo, revision)
    96  
    97  		select {
    98  		case <-time.After(awaitGenerationTime):
    99  			return nil, ErrAwaitGeneration
   100  		case <-genReady:
   101  			// we got generation ready before timeout
   102  			break
   103  		}
   104  	}
   105  	// TODO: renew timeout of cache cache.UpdateTimeout(cacheKey, contributorStatsCacheTimeout)
   106  	var res map[string]*ContributorData
   107  	if _, cacheErr := cache.GetJSON(cacheKey, &res); cacheErr != nil {
   108  		return nil, fmt.Errorf("cached error: %w", cacheErr.ToError())
   109  	}
   110  	return res, nil
   111  }
   112  
   113  // getExtendedCommitStats return the list of *ExtendedCommitStats for the given revision
   114  func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int */) ([]*ExtendedCommitStats, error) {
   115  	baseCommit, err := repo.GetCommit(revision)
   116  	if err != nil {
   117  		return nil, err
   118  	}
   119  	stdoutReader, stdoutWriter, err := os.Pipe()
   120  	if err != nil {
   121  		return nil, err
   122  	}
   123  	defer func() {
   124  		_ = stdoutReader.Close()
   125  		_ = stdoutWriter.Close()
   126  	}()
   127  
   128  	gitCmd := git.NewCommand(repo.Ctx, "log", "--shortstat", "--no-merges", "--pretty=format:---%n%aN%n%aE%n%as", "--reverse")
   129  	// AddOptionFormat("--max-count=%d", limit)
   130  	gitCmd.AddDynamicArguments(baseCommit.ID.String())
   131  
   132  	var extendedCommitStats []*ExtendedCommitStats
   133  	stderr := new(strings.Builder)
   134  	err = gitCmd.Run(&git.RunOpts{
   135  		Dir:    repo.Path,
   136  		Stdout: stdoutWriter,
   137  		Stderr: stderr,
   138  		PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
   139  			_ = stdoutWriter.Close()
   140  			scanner := bufio.NewScanner(stdoutReader)
   141  
   142  			for scanner.Scan() {
   143  				line := strings.TrimSpace(scanner.Text())
   144  				if line != "---" {
   145  					continue
   146  				}
   147  				scanner.Scan()
   148  				authorName := strings.TrimSpace(scanner.Text())
   149  				scanner.Scan()
   150  				authorEmail := strings.TrimSpace(scanner.Text())
   151  				scanner.Scan()
   152  				date := strings.TrimSpace(scanner.Text())
   153  				scanner.Scan()
   154  				stats := strings.TrimSpace(scanner.Text())
   155  				if authorName == "" || authorEmail == "" || date == "" || stats == "" {
   156  					// FIXME: find a better way to parse the output so that we will handle this properly
   157  					log.Warn("Something is wrong with git log output, skipping...")
   158  					log.Warn("authorName: %s,  authorEmail: %s,  date: %s,  stats: %s", authorName, authorEmail, date, stats)
   159  					continue
   160  				}
   161  				//  1 file changed, 1 insertion(+), 1 deletion(-)
   162  				fields := strings.Split(stats, ",")
   163  
   164  				commitStats := api.CommitStats{}
   165  				for _, field := range fields[1:] {
   166  					parts := strings.Split(strings.TrimSpace(field), " ")
   167  					value, contributionType := parts[0], parts[1]
   168  					amount, _ := strconv.Atoi(value)
   169  
   170  					if strings.HasPrefix(contributionType, "insertion") {
   171  						commitStats.Additions = amount
   172  					} else {
   173  						commitStats.Deletions = amount
   174  					}
   175  				}
   176  				commitStats.Total = commitStats.Additions + commitStats.Deletions
   177  				scanner.Text() // empty line at the end
   178  
   179  				res := &ExtendedCommitStats{
   180  					Author: &api.CommitUser{
   181  						Identity: api.Identity{
   182  							Name:  authorName,
   183  							Email: authorEmail,
   184  						},
   185  						Date: date,
   186  					},
   187  					Stats: &commitStats,
   188  				}
   189  				extendedCommitStats = append(extendedCommitStats, res)
   190  			}
   191  			_ = stdoutReader.Close()
   192  			return nil
   193  		},
   194  	})
   195  	if err != nil {
   196  		return nil, fmt.Errorf("Failed to get ContributorsCommitStats for repository.\nError: %w\nStderr: %s", err, stderr)
   197  	}
   198  
   199  	return extendedCommitStats, nil
   200  }
   201  
   202  func generateContributorStats(genDone chan struct{}, cache cache.StringCache, cacheKey string, repo *repo_model.Repository, revision string) {
   203  	ctx := graceful.GetManager().HammerContext()
   204  
   205  	gitRepo, closer, err := gitrepo.RepositoryFromContextOrOpen(ctx, repo)
   206  	if err != nil {
   207  		_ = cache.PutJSON(cacheKey, fmt.Errorf("OpenRepository: %w", err), contributorStatsCacheTimeout)
   208  		return
   209  	}
   210  	defer closer.Close()
   211  
   212  	if len(revision) == 0 {
   213  		revision = repo.DefaultBranch
   214  	}
   215  	extendedCommitStats, err := getExtendedCommitStats(gitRepo, revision)
   216  	if err != nil {
   217  		_ = cache.PutJSON(cacheKey, fmt.Errorf("ExtendedCommitStats: %w", err), contributorStatsCacheTimeout)
   218  		return
   219  	}
   220  	if len(extendedCommitStats) == 0 {
   221  		_ = cache.PutJSON(cacheKey, fmt.Errorf("no commit stats returned for revision '%s'", revision), contributorStatsCacheTimeout)
   222  		return
   223  	}
   224  
   225  	layout := time.DateOnly
   226  
   227  	unknownUserAvatarLink := user_model.NewGhostUser().AvatarLinkWithSize(ctx, 0)
   228  	contributorsCommitStats := make(map[string]*ContributorData)
   229  	contributorsCommitStats["total"] = &ContributorData{
   230  		Name:  "Total",
   231  		Weeks: make(map[int64]*WeekData),
   232  	}
   233  	total := contributorsCommitStats["total"]
   234  
   235  	for _, v := range extendedCommitStats {
   236  		userEmail := v.Author.Email
   237  		if len(userEmail) == 0 {
   238  			continue
   239  		}
   240  		u, _ := user_model.GetUserByEmail(ctx, userEmail)
   241  		if u != nil {
   242  			// update userEmail with user's primary email address so
   243  			// that different mail addresses will linked to same account
   244  			userEmail = u.GetEmail()
   245  		}
   246  		// duplicated logic
   247  		if _, ok := contributorsCommitStats[userEmail]; !ok {
   248  			if u == nil {
   249  				avatarLink := avatars.GenerateEmailAvatarFastLink(ctx, userEmail, 0)
   250  				if avatarLink == "" {
   251  					avatarLink = unknownUserAvatarLink
   252  				}
   253  				contributorsCommitStats[userEmail] = &ContributorData{
   254  					Name:       v.Author.Name,
   255  					AvatarLink: avatarLink,
   256  					Weeks:      make(map[int64]*WeekData),
   257  				}
   258  			} else {
   259  				contributorsCommitStats[userEmail] = &ContributorData{
   260  					Name:       u.DisplayName(),
   261  					Login:      u.LowerName,
   262  					AvatarLink: u.AvatarLinkWithSize(ctx, 0),
   263  					HomeLink:   u.HomeLink(),
   264  					Weeks:      make(map[int64]*WeekData),
   265  				}
   266  			}
   267  		}
   268  		// Update user statistics
   269  		user := contributorsCommitStats[userEmail]
   270  		startingOfWeek, _ := findLastSundayBeforeDate(v.Author.Date)
   271  
   272  		val, _ := time.Parse(layout, startingOfWeek)
   273  		week := val.UnixMilli()
   274  
   275  		if user.Weeks[week] == nil {
   276  			user.Weeks[week] = &WeekData{
   277  				Additions: 0,
   278  				Deletions: 0,
   279  				Commits:   0,
   280  				Week:      week,
   281  			}
   282  		}
   283  		if total.Weeks[week] == nil {
   284  			total.Weeks[week] = &WeekData{
   285  				Additions: 0,
   286  				Deletions: 0,
   287  				Commits:   0,
   288  				Week:      week,
   289  			}
   290  		}
   291  		user.Weeks[week].Additions += v.Stats.Additions
   292  		user.Weeks[week].Deletions += v.Stats.Deletions
   293  		user.Weeks[week].Commits++
   294  		user.TotalCommits++
   295  
   296  		// Update overall statistics
   297  		total.Weeks[week].Additions += v.Stats.Additions
   298  		total.Weeks[week].Deletions += v.Stats.Deletions
   299  		total.Weeks[week].Commits++
   300  		total.TotalCommits++
   301  	}
   302  
   303  	_ = cache.PutJSON(cacheKey, contributorsCommitStats, contributorStatsCacheTimeout)
   304  	generateLock.Delete(cacheKey)
   305  	if genDone != nil {
   306  		genDone <- struct{}{}
   307  	}
   308  }