github.com/google/osv-scalibr@v0.4.1/veles/secrets/gitlabpat/detector.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package gitlabpat contains a Veles Secret type and a Detector for 16 // Gitlab Personal Access Tokens (prefix `glpat-`). 17 package gitlabpat 18 19 import ( 20 "hash/crc32" 21 "regexp" 22 "sort" 23 "strconv" 24 "strings" 25 26 "github.com/google/osv-scalibr/veles" 27 ) 28 29 // maxTokenLength is the maximum size of a Gitlab personal access token. 30 const maxTokenLength = 319 31 32 // Regular expressions for GitLab Personal Access Tokens: 33 // 34 // Based on the specs at: https://gitlab.com/gitlab-com/content-sites/handbook/-/blob/a5c49599bd88f1751616b40e4e32331aa2c8bf50/content/handbook/engineering/architecture/design-documents/cells/routable_tokens.md#L80 35 var ( 36 reRoutableVersioned = regexp.MustCompile(`(?P<prefix>glpat-)(?P<payload>[0-9A-Za-z_-]{27,300})\.(?P<version>[0-9a-z]{2})\.(?P<length>[0-9a-z]{2})(?P<crc>[0-9a-z]{7})`) 37 reRoutable = regexp.MustCompile(`glpat-[0-9A-Za-z_-]{27,300}\.[0-9a-z]{2}[0-9a-z]{7}`) 38 reLegacy = regexp.MustCompile(`glpat-[0-9A-Za-z_-]{20}`) 39 ) 40 41 var _ veles.Detector = NewDetector() 42 43 // isValidCRC32 validates the CRC32 checksum of a GitLab Versioned Routable PAT. 44 // According to the documentation, the CRC32 is calculated on 45 // <prefix><base64-payload>.<token-version>.<base64-payload-length> 46 // and encoded as base36 with leading zeros to make 7 characters. 47 func isValidCRC32(prefix, payload, version, length, crcToCheck string) bool { 48 // Construct the string to calculate CRC32 on 49 checksumTarget := prefix + payload + "." + version + "." + length 50 51 // Calculate CRC32 checksum 52 crc := crc32.ChecksumIEEE([]byte(checksumTarget)) 53 54 // Convert to base36 string with leading zeros to make 7 characters 55 calculatedCRC := strconv.FormatInt(int64(crc), 36) 56 for len(calculatedCRC) < 7 { 57 calculatedCRC = "0" + calculatedCRC 58 } 59 60 // Compare calculated CRC with the provided CRC 61 return strings.EqualFold(calculatedCRC, crcToCheck) 62 } 63 64 // detector is a Veles Detector. 65 type detector struct{} 66 67 // NewDetector returns a new Detector that matches 68 // Gitlab Personal Access Tokens. 69 func NewDetector() veles.Detector { 70 return &detector{} 71 } 72 73 func (d *detector) MaxSecretLen() uint32 { 74 return maxTokenLength 75 } 76 77 func (d *detector) Detect(content []byte) ([]veles.Secret, []int) { 78 type match struct { 79 start int 80 token string 81 } 82 83 var versionedMatches, routableMatches, legacyMatches []match 84 85 // Collect routable versioned matches 86 contentStr := string(content) 87 for _, tokenMatchIndex := range reRoutableVersioned.FindAllStringSubmatchIndex(contentStr, -1) { 88 versionedMatches = append(versionedMatches, match{ 89 start: tokenMatchIndex[0], 90 token: contentStr[tokenMatchIndex[0]:tokenMatchIndex[1]], 91 }) 92 } 93 94 // Collect routable matches 95 for _, loc := range reRoutable.FindAllIndex(content, -1) { 96 routableMatches = append(routableMatches, match{ 97 start: loc[0], 98 token: string(content[loc[0]:loc[1]]), 99 }) 100 } 101 102 // Collect legacy matches 103 for _, loc := range reLegacy.FindAllIndex(content, -1) { 104 legacyMatches = append(legacyMatches, match{ 105 start: loc[0], 106 token: string(content[loc[0]:loc[1]]), 107 }) 108 } 109 110 var pruned []match 111 112 // Always keep versioned tokens 113 pruned = append(pruned, versionedMatches...) 114 115 // Keep routable tokens only if they're not contained in any versioned token 116 for _, routable := range routableMatches { 117 contained := false 118 for _, versioned := range versionedMatches { 119 if strings.Contains(versioned.token, routable.token) { 120 contained = true 121 break 122 } 123 } 124 if !contained { 125 pruned = append(pruned, routable) 126 } 127 } 128 129 // Keep legacy tokens only if they're not contained in any routable or versioned token 130 for _, legacy := range legacyMatches { 131 contained := false 132 // Check against versioned tokens 133 for _, versioned := range versionedMatches { 134 if strings.Contains(versioned.token, legacy.token) { 135 contained = true 136 break 137 } 138 } 139 // If not contained in versioned, check against routable 140 if !contained { 141 for _, routable := range routableMatches { 142 if strings.Contains(routable.token, legacy.token) { 143 contained = true 144 break 145 } 146 } 147 } 148 if !contained { 149 pruned = append(pruned, legacy) 150 } 151 } 152 153 // Filter out invalid versioned tokens based on CRC32 validation 154 finalMatches := make([]match, 0, len(pruned)) 155 for _, m := range pruned { 156 if reRoutableVersioned.MatchString(m.token) { 157 submatch := reRoutableVersioned.FindStringSubmatch(m.token) 158 if len(submatch) == 6 && 159 isValidCRC32(submatch[1], submatch[2], submatch[3], submatch[4], submatch[5]) { 160 finalMatches = append(finalMatches, m) 161 } 162 } else { 163 finalMatches = append(finalMatches, m) 164 } 165 } 166 167 // Sort by start offset to preserve document order 168 sort.Slice(finalMatches, func(i, j int) bool { return finalMatches[i].start < finalMatches[j].start }) 169 170 secrets := make([]veles.Secret, 0, len(finalMatches)) 171 offsets := make([]int, 0, len(finalMatches)) 172 for _, m := range finalMatches { 173 secrets = append(secrets, GitlabPAT{Pat: m.token}) 174 offsets = append(offsets, m.start) 175 } 176 return secrets, offsets 177 }