github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/javascript/internal/commitextractor/commit.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package commitextractor provides a function to extract commit hash from the full git URL
    16  package commitextractor
    17  
    18  import (
    19  	"net/url"
    20  	"regexp"
    21  	"slices"
    22  )
    23  
    24  // language=GoRegExp
    25  var matchers = []*regexp.Regexp{
    26  	// ssh://...
    27  	// git://...
    28  	// git+ssh://...
    29  	// git+https://...
    30  	regexp.MustCompile(`(?:^|.+@)(?:git(?:\+(?:ssh|https))?|ssh)://.+#(\w+)$`),
    31  	// https://....git/...
    32  	regexp.MustCompile(`(?:^|.+@)https://.+\.git#(\w+)$`),
    33  	regexp.MustCompile(`https://codeload\.github\.com(?:/[\w-.]+){2}/tar\.gz/(\w+)$`),
    34  	regexp.MustCompile(`.+#commit[:=](\w+)$`),
    35  	// github:...
    36  	// gitlab:...
    37  	// bitbucket:...
    38  	regexp.MustCompile(`^(?:github|gitlab|bitbucket):.+#(\w+)$`),
    39  }
    40  
    41  // TryExtractCommit tries to extract the commit hash from a full git url.
    42  func TryExtractCommit(resolution string) string {
    43  	// Test with regexes first
    44  	for _, re := range matchers {
    45  		matched := re.FindStringSubmatch(resolution)
    46  
    47  		if matched != nil {
    48  			return matched[1]
    49  		}
    50  	}
    51  
    52  	// Otherwise, check if we can retrieve the hash from either the fragment or
    53  	// the query ref
    54  	u, err := url.Parse(resolution)
    55  
    56  	if err != nil {
    57  		return ""
    58  	}
    59  
    60  	gitRepoHosts := []string{
    61  		"bitbucket.org",
    62  		"github.com",
    63  		"gitlab.com",
    64  	}
    65  
    66  	if !slices.Contains(gitRepoHosts, u.Host) {
    67  		return ""
    68  	}
    69  
    70  	if u.RawQuery == "" {
    71  		return u.Fragment
    72  	}
    73  
    74  	queries := u.Query()
    75  
    76  	// Returns an empty string if there is no ref
    77  	return queries.Get("ref")
    78  }