github.com/soypat/gitaligned@v0.3.4-0.20221228122414-e435aab44fbc/nlp.go (about)

     1  package main
     2  
     3  import (
     4  	"strings"
     5  
     6  	"github.com/jdkato/prose/v2"
     7  )
     8  
     9  var replacecommits = strings.NewReplacer(".", " ", "(", " ", ")", " ", ":", " ", ",", " ",
    10  	"[", " ", "]", " ", "\\", " ", `"`, " ", "'", " ", "!", " ", ";", " ", "?", " ",
    11  	"/", " ", "<", " ", ">", " ")
    12  
    13  func tokenizeCommits(commits []commit) ([]prose.Token, error) {
    14  	var err error
    15  	if len(commits) == 0 {
    16  		panic("expected non-nil/non-zero number of commits")
    17  	}
    18  
    19  	var doc *prose.Document
    20  	var allCommits = &strings.Builder{}
    21  	cap := allCommits.Cap()
    22  	if cap < len(commits)*20 {
    23  		allCommits.Grow(len(commits)*20 - cap)
    24  	}
    25  
    26  	for i := range commits {
    27  		msg := replacecommits.Replace(commits[i].Message) + " . "
    28  		allCommits.WriteString(msg)
    29  	}
    30  	// allstr :=allCommits.String()  // debugging purposes
    31  
    32  	doc, err = prose.NewDocument(allCommits.String(),
    33  		prose.WithExtraction(false), prose.WithSegmentation(false), prose.WithTokenization(false))
    34  	return doc.Tokens(), err
    35  }
    36  
    37  // walkCommits is SLOW. This is because it processes all commit messages into one
    38  //
    39  func walkCommits(commits []commit, f func(*commit, []prose.Token)) error {
    40  	tokens, err := tokenizeCommits(commits)
    41  	if err != nil {
    42  		return err
    43  	}
    44  	atCommit := 0
    45  	last := -1
    46  	for i := range tokens {
    47  		if tokens[i].Tag == "." {
    48  			f(&commits[atCommit], tokens[last+1:i])
    49  			last = i
    50  			atCommit++
    51  		}
    52  	}
    53  	return nil
    54  }
    55  
    56  func min(a, b int) int {
    57  	if a < b {
    58  		return a
    59  	}
    60  	return b
    61  }
    62  
    63  func max(a, b int) int {
    64  	if a > b {
    65  		return a
    66  	}
    67  	return b
    68  }
    69  
    70  func spaces(n int) string {
    71  	const spaces32 = "                                "
    72  	if n < 32 {
    73  		return spaces32[:n]
    74  	}
    75  	var res string
    76  	for i := 0; i < n/32; i++ {
    77  		res += spaces32
    78  	}
    79  	return res + spaces32[:n%32]
    80  }