github.com/git-chglog/git-chglog@v0.15.5-0.20240126074033-6a6993d52d69/commit_parser.go (about)

     1  package chglog
     2  
     3  import (
     4  	"fmt"
     5  	"regexp"
     6  	"strconv"
     7  	"strings"
     8  	"time"
     9  
    10  	"github.com/tsuyoshiwada/go-gitcmd"
    11  )
    12  
    13  var (
    14  	// constants
    15  	separator = "@@__CHGLOG__@@"
    16  	delimiter = "@@__CHGLOG_DELIMITER__@@"
    17  
    18  	// fields
    19  	hashField      = "HASH"
    20  	authorField    = "AUTHOR"
    21  	committerField = "COMMITTER"
    22  	subjectField   = "SUBJECT"
    23  	bodyField      = "BODY"
    24  
    25  	// formats
    26  	hashFormat      = hashField + ":%H\t%h"
    27  	authorFormat    = authorField + ":%an\t%ae\t%at"
    28  	committerFormat = committerField + ":%cn\t%ce\t%ct"
    29  	subjectFormat   = subjectField + ":%s"
    30  	bodyFormat      = bodyField + ":%b"
    31  
    32  	// log
    33  	logFormat = separator + strings.Join([]string{
    34  		hashFormat,
    35  		authorFormat,
    36  		committerFormat,
    37  		subjectFormat,
    38  		bodyFormat,
    39  	}, delimiter)
    40  )
    41  
    42  func joinAndQuoteMeta(list []string, sep string) string {
    43  	arr := make([]string, len(list))
    44  	for i, s := range list {
    45  		arr[i] = regexp.QuoteMeta(s)
    46  	}
    47  	return strings.Join(arr, sep)
    48  }
    49  
    50  type commitParser struct {
    51  	logger                 *Logger
    52  	client                 gitcmd.Client
    53  	jiraClient             JiraClient
    54  	config                 *Config
    55  	reHeader               *regexp.Regexp
    56  	reMerge                *regexp.Regexp
    57  	reRevert               *regexp.Regexp
    58  	reRef                  *regexp.Regexp
    59  	reIssue                *regexp.Regexp
    60  	reNotes                *regexp.Regexp
    61  	reMention              *regexp.Regexp
    62  	reSignOff              *regexp.Regexp
    63  	reCoAuthor             *regexp.Regexp
    64  	reJiraIssueDescription *regexp.Regexp
    65  }
    66  
    67  func newCommitParser(logger *Logger, client gitcmd.Client, jiraClient JiraClient, config *Config) *commitParser {
    68  	opts := config.Options
    69  
    70  	joinedRefActions := joinAndQuoteMeta(opts.RefActions, "|")
    71  	joinedIssuePrefix := joinAndQuoteMeta(opts.IssuePrefix, "|")
    72  	joinedNoteKeywords := joinAndQuoteMeta(opts.NoteKeywords, "|")
    73  
    74  	return &commitParser{
    75  		logger:                 logger,
    76  		client:                 client,
    77  		jiraClient:             jiraClient,
    78  		config:                 config,
    79  		reHeader:               regexp.MustCompile(opts.HeaderPattern),
    80  		reMerge:                regexp.MustCompile(opts.MergePattern),
    81  		reRevert:               regexp.MustCompile(opts.RevertPattern),
    82  		reRef:                  regexp.MustCompile("(?i)(" + joinedRefActions + ")\\s?([\\w/\\.\\-]+)?(?:" + joinedIssuePrefix + ")(\\d+)"),
    83  		reIssue:                regexp.MustCompile("(?:" + joinedIssuePrefix + ")(\\d+)"),
    84  		reNotes:                regexp.MustCompile("^(?i)\\s*(" + joinedNoteKeywords + ")[:\\s]+(.*)"),
    85  		reMention:              regexp.MustCompile(`@([\w-]+)`),
    86  		reSignOff:              regexp.MustCompile(`Signed-off-by:\s+([\p{L}\s\-\[\]]+)\s+<([\w+\-\[\].@]+)>`),
    87  		reCoAuthor:             regexp.MustCompile(`Co-authored-by:\s+([\p{L}\s\-\[\]]+)\s+<([\w+\-\[\].@]+)>`),
    88  		reJiraIssueDescription: regexp.MustCompile(opts.JiraIssueDescriptionPattern),
    89  	}
    90  }
    91  
    92  func (p *commitParser) Parse(rev string) ([]*Commit, error) {
    93  	paths := p.config.Options.Paths
    94  
    95  	args := []string{
    96  		rev,
    97  		"--no-decorate",
    98  		"--pretty=" + logFormat,
    99  	}
   100  
   101  	if len(paths) > 0 {
   102  		args = append(args, "--")
   103  		args = append(args, paths...)
   104  	}
   105  
   106  	out, err := p.client.Exec("log", args...)
   107  
   108  	if err != nil {
   109  		return nil, err
   110  	}
   111  
   112  	processor := p.config.Options.Processor
   113  	lines := strings.Split(out, separator)
   114  	lines = lines[1:]
   115  	commits := make([]*Commit, len(lines))
   116  
   117  	for i, line := range lines {
   118  		commit := p.parseCommit(line)
   119  
   120  		if processor != nil {
   121  			commit = processor.ProcessCommit(commit)
   122  			if commit == nil {
   123  				continue
   124  			}
   125  		}
   126  
   127  		commits[i] = commit
   128  	}
   129  
   130  	return commits, nil
   131  }
   132  
   133  func (p *commitParser) parseCommit(input string) *Commit {
   134  	commit := &Commit{}
   135  	tokens := strings.Split(input, delimiter)
   136  
   137  	for _, token := range tokens {
   138  		firstSep := strings.Index(token, ":")
   139  		field := token[0:firstSep]
   140  		value := strings.TrimSpace(token[firstSep+1:])
   141  
   142  		switch field {
   143  		case hashField:
   144  			commit.Hash = p.parseHash(value)
   145  		case authorField:
   146  			commit.Author = p.parseAuthor(value)
   147  		case committerField:
   148  			commit.Committer = p.parseCommitter(value)
   149  		case subjectField:
   150  			p.processHeader(commit, value)
   151  		case bodyField:
   152  			p.processBody(commit, value)
   153  		}
   154  	}
   155  
   156  	commit.Refs = p.uniqRefs(commit.Refs)
   157  	commit.Mentions = p.uniqMentions(commit.Mentions)
   158  
   159  	return commit
   160  }
   161  
   162  func (p *commitParser) parseHash(input string) *Hash {
   163  	arr := strings.Split(input, "\t")
   164  
   165  	return &Hash{
   166  		Long:  arr[0],
   167  		Short: arr[1],
   168  	}
   169  }
   170  
   171  func (p *commitParser) parseAuthor(input string) *Author {
   172  	arr := strings.Split(input, "\t")
   173  	ts, err := strconv.Atoi(arr[2])
   174  	if err != nil {
   175  		ts = 0
   176  	}
   177  
   178  	return &Author{
   179  		Name:  arr[0],
   180  		Email: arr[1],
   181  		Date:  time.Unix(int64(ts), 0),
   182  	}
   183  }
   184  
   185  func (p *commitParser) parseCommitter(input string) *Committer {
   186  	author := p.parseAuthor(input)
   187  
   188  	return &Committer{
   189  		Name:  author.Name,
   190  		Email: author.Email,
   191  		Date:  author.Date,
   192  	}
   193  }
   194  
   195  func (p *commitParser) processHeader(commit *Commit, input string) {
   196  	opts := p.config.Options
   197  
   198  	// header (raw)
   199  	commit.Header = input
   200  
   201  	var res [][]string
   202  
   203  	// Type, Scope, Subject etc ...
   204  	res = p.reHeader.FindAllStringSubmatch(input, -1)
   205  	if len(res) > 0 {
   206  		assignDynamicValues(commit, opts.HeaderPatternMaps, res[0][1:])
   207  	}
   208  
   209  	// Merge
   210  	res = p.reMerge.FindAllStringSubmatch(input, -1)
   211  	if len(res) > 0 {
   212  		merge := &Merge{}
   213  		assignDynamicValues(merge, opts.MergePatternMaps, res[0][1:])
   214  		commit.Merge = merge
   215  	}
   216  
   217  	// Revert
   218  	res = p.reRevert.FindAllStringSubmatch(input, -1)
   219  	if len(res) > 0 {
   220  		revert := &Revert{}
   221  		assignDynamicValues(revert, opts.RevertPatternMaps, res[0][1:])
   222  		commit.Revert = revert
   223  	}
   224  
   225  	// refs & mentions
   226  	commit.Refs = p.parseRefs(input)
   227  	commit.Mentions = p.parseMentions(input)
   228  
   229  	// Jira
   230  	if commit.JiraIssueID != "" {
   231  		p.processJiraIssue(commit, commit.JiraIssueID)
   232  	}
   233  }
   234  
   235  func (p *commitParser) extractLineMetadata(commit *Commit, line string) bool {
   236  	meta := false
   237  
   238  	refs := p.parseRefs(line)
   239  	if len(refs) > 0 {
   240  		meta = true
   241  		commit.Refs = append(commit.Refs, refs...)
   242  	}
   243  
   244  	mentions := p.parseMentions(line)
   245  	if len(mentions) > 0 {
   246  		meta = true
   247  		commit.Mentions = append(commit.Mentions, mentions...)
   248  	}
   249  
   250  	coAuthors := p.parseCoAuthors(line)
   251  	if len(coAuthors) > 0 {
   252  		meta = true
   253  		commit.CoAuthors = append(commit.CoAuthors, coAuthors...)
   254  	}
   255  
   256  	signers := p.parseSigners(line)
   257  	if len(signers) > 0 {
   258  		meta = true
   259  		commit.Signers = append(commit.Signers, signers...)
   260  	}
   261  
   262  	return meta
   263  }
   264  
   265  func (p *commitParser) processBody(commit *Commit, input string) {
   266  	input = convNewline(input, "\n")
   267  
   268  	// body
   269  	commit.Body = input
   270  
   271  	// notes & refs & mentions
   272  	commit.Notes = []*Note{}
   273  	inNote := false
   274  	trim := false
   275  	fenceDetector := newMdFenceDetector()
   276  	lines := strings.Split(input, "\n")
   277  
   278  	// body without notes & refs & mentions
   279  	trimmedBody := make([]string, 0, len(lines))
   280  
   281  	for _, line := range lines {
   282  		if !inNote {
   283  			trim = false
   284  		}
   285  		fenceDetector.Update(line)
   286  
   287  		if !fenceDetector.InCodeblock() && p.extractLineMetadata(commit, line) {
   288  			trim = true
   289  			inNote = false
   290  		}
   291  		// Q: should this check also only be outside of code blocks?
   292  		res := p.reNotes.FindAllStringSubmatch(line, -1)
   293  
   294  		if len(res) > 0 {
   295  			inNote = true
   296  			trim = true
   297  			for _, r := range res {
   298  				commit.Notes = append(commit.Notes, &Note{
   299  					Title: r[1],
   300  					Body:  r[2],
   301  				})
   302  			}
   303  		} else if inNote {
   304  			last := commit.Notes[len(commit.Notes)-1]
   305  			last.Body = last.Body + "\n" + line
   306  		}
   307  
   308  		if !trim {
   309  			trimmedBody = append(trimmedBody, line)
   310  		}
   311  	}
   312  
   313  	commit.TrimmedBody = strings.TrimSpace(strings.Join(trimmedBody, "\n"))
   314  	p.trimSpaceInNotes(commit)
   315  }
   316  
   317  func (*commitParser) trimSpaceInNotes(commit *Commit) {
   318  	for _, note := range commit.Notes {
   319  		note.Body = strings.TrimSpace(note.Body)
   320  	}
   321  }
   322  
   323  func (p *commitParser) parseRefs(input string) []*Ref {
   324  	refs := []*Ref{}
   325  
   326  	// references
   327  	res := p.reRef.FindAllStringSubmatch(input, -1)
   328  
   329  	for _, r := range res {
   330  		refs = append(refs, &Ref{
   331  			Action: r[1],
   332  			Source: r[2],
   333  			Ref:    r[3],
   334  		})
   335  	}
   336  
   337  	// issues
   338  	res = p.reIssue.FindAllStringSubmatch(input, -1)
   339  	for _, r := range res {
   340  		duplicate := false
   341  		for _, ref := range refs {
   342  			if ref.Ref == r[1] {
   343  				duplicate = true
   344  			}
   345  		}
   346  		if !duplicate {
   347  			refs = append(refs, &Ref{
   348  				Action: "",
   349  				Source: "",
   350  				Ref:    r[1],
   351  			})
   352  		}
   353  	}
   354  
   355  	return refs
   356  }
   357  
   358  func (p *commitParser) parseSigners(input string) []Contact {
   359  	res := p.reSignOff.FindAllStringSubmatch(input, -1)
   360  	contacts := make([]Contact, len(res))
   361  
   362  	for i, r := range res {
   363  		contacts[i].Name = r[1]
   364  		contacts[i].Email = r[2]
   365  	}
   366  
   367  	return contacts
   368  }
   369  
   370  func (p *commitParser) parseCoAuthors(input string) []Contact {
   371  	res := p.reCoAuthor.FindAllStringSubmatch(input, -1)
   372  	contacts := make([]Contact, len(res))
   373  
   374  	for i, r := range res {
   375  		contacts[i].Name = r[1]
   376  		contacts[i].Email = r[2]
   377  	}
   378  
   379  	return contacts
   380  }
   381  
   382  func (p *commitParser) parseMentions(input string) []string {
   383  	res := p.reMention.FindAllStringSubmatch(input, -1)
   384  	mentions := make([]string, len(res))
   385  
   386  	for i, r := range res {
   387  		mentions[i] = r[1]
   388  	}
   389  
   390  	return mentions
   391  }
   392  
   393  func (p *commitParser) uniqRefs(refs []*Ref) []*Ref {
   394  	arr := []*Ref{}
   395  
   396  	for _, ref := range refs {
   397  		exist := false
   398  		for _, r := range arr {
   399  			if ref.Ref == r.Ref && ref.Action == r.Action && ref.Source == r.Source {
   400  				exist = true
   401  			}
   402  		}
   403  		if !exist {
   404  			arr = append(arr, ref)
   405  		}
   406  	}
   407  
   408  	return arr
   409  }
   410  
   411  func (p *commitParser) uniqMentions(mentions []string) []string {
   412  	arr := []string{}
   413  
   414  	for _, mention := range mentions {
   415  		exist := false
   416  		for _, m := range arr {
   417  			if mention == m {
   418  				exist = true
   419  			}
   420  		}
   421  		if !exist {
   422  			arr = append(arr, mention)
   423  		}
   424  	}
   425  
   426  	return arr
   427  }
   428  
   429  func (p *commitParser) processJiraIssue(commit *Commit, issueID string) {
   430  	issue, err := p.jiraClient.GetJiraIssue(commit.JiraIssueID)
   431  	if err != nil {
   432  		p.logger.Error(fmt.Sprintf("Failed to parse Jira story %s: %s\n", issueID, err))
   433  		return
   434  	}
   435  	commit.Type = p.config.Options.JiraTypeMaps[issue.Fields.Type.Name]
   436  	commit.JiraIssue = &JiraIssue{
   437  		Type:        issue.Fields.Type.Name,
   438  		Summary:     issue.Fields.Summary,
   439  		Description: issue.Fields.Description,
   440  		Labels:      issue.Fields.Labels,
   441  	}
   442  
   443  	if p.config.Options.JiraIssueDescriptionPattern != "" {
   444  		res := p.reJiraIssueDescription.FindStringSubmatch(commit.JiraIssue.Description)
   445  		if len(res) > 1 {
   446  			commit.JiraIssue.Description = res[1]
   447  		}
   448  	}
   449  }
   450  
   451  var (
   452  	fenceTypes = []string{
   453  		"```",
   454  		"~~~",
   455  		"    ",
   456  		"\t",
   457  	}
   458  )
   459  
   460  type mdFenceDetector struct {
   461  	fence int
   462  }
   463  
   464  func newMdFenceDetector() *mdFenceDetector {
   465  	return &mdFenceDetector{
   466  		fence: -1,
   467  	}
   468  }
   469  
   470  func (d *mdFenceDetector) InCodeblock() bool {
   471  	return d.fence > -1
   472  }
   473  
   474  func (d *mdFenceDetector) Update(input string) {
   475  	for i, s := range fenceTypes {
   476  		if d.fence < 0 {
   477  			if strings.Index(input, s) == 0 {
   478  				d.fence = i
   479  				break
   480  			}
   481  		} else {
   482  			if strings.Index(input, s) == 0 && i == d.fence {
   483  				d.fence = -1
   484  				break
   485  			}
   486  		}
   487  	}
   488  }