github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/ruby/parse_gemspec.go (about) 1 package ruby 2 3 import ( 4 "bufio" 5 "context" 6 "encoding/json" 7 "fmt" 8 "regexp" 9 "strings" 10 11 "github.com/mitchellh/mapstructure" 12 13 "github.com/anchore/syft/internal" 14 "github.com/anchore/syft/syft/artifact" 15 "github.com/anchore/syft/syft/file" 16 "github.com/anchore/syft/syft/pkg" 17 "github.com/anchore/syft/syft/pkg/cataloger/generic" 18 ) 19 20 var _ generic.Parser = parseGemFileLockEntries 21 22 type postProcessor func(string) []string 23 24 type gemData struct { 25 Licenses []string `mapstructure:"licenses" json:"licenses,omitempty"` 26 pkg.RubyGemspec `mapstructure:",squash" json:",inline"` 27 } 28 29 // match example: Al\u003Ex ---> 003E 30 var unicodePattern = regexp.MustCompile(`\\u(?P<unicode>[0-9A-F]{4})`) 31 32 var patterns = map[string]*regexp.Regexp{ 33 // match example: name = "railties".freeze ---> railties 34 "name": regexp.MustCompile(`.*\.name\s*=\s*["']{1}(?P<name>.*)["']{1} *`), 35 36 // match example: version = "1.0.4".freeze ---> 1.0.4 37 "version": regexp.MustCompile(`.*\.version\s*=\s*["']{1}(?P<version>.*)["']{1} *`), 38 39 // match example: 40 // homepage = "https://github.com/anchore/syft".freeze ---> https://github.com/anchore/syft 41 "homepage": regexp.MustCompile(`.*\.homepage\s*=\s*["']{1}(?P<homepage>.*)["']{1} *`), 42 43 // match example: files = ["exe/bundle".freeze, "exe/bundler".freeze] ---> "exe/bundle".freeze, "exe/bundler".freeze 44 "files": regexp.MustCompile(`.*\.files\s*=\s*\[(?P<files>.*)] *`), 45 46 // match example: authors = ["Andr\u00E9 Arko".freeze, "Samuel Giddins".freeze, "Colby Swandale".freeze, 47 // "Hiroshi Shibata".freeze, "David Rodr\u00EDguez".freeze, "Grey Baker".freeze...] 48 "authors": regexp.MustCompile(`.*\.authors\s*=\s*\[(?P<authors>.*)] *`), 49 50 // match example: licenses = ["MIT".freeze] ----> "MIT".freeze 51 "licenses": regexp.MustCompile(`.*\.licenses\s*=\s*\[(?P<licenses>.*)] *`), 52 } 53 54 var postProcessors = map[string]postProcessor{ 55 "files": processList, 56 "authors": processList, 57 "licenses": processList, 58 } 59 60 func processList(s string) []string { 61 var results []string 62 for _, item := range strings.Split(s, ",") { 63 results = append(results, strings.Trim(item, "\" ")) 64 } 65 return results 66 } 67 68 // parseGemSpecEntries parses the gemspec file and returns the packages and relationships found. 69 func parseGemSpecEntries(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 70 var pkgs []pkg.Package 71 var fields = make(map[string]interface{}) 72 scanner := bufio.NewScanner(reader) 73 74 for scanner.Scan() { 75 line := scanner.Text() 76 77 sanitizedLine := strings.TrimSpace(line) 78 sanitizedLine = strings.ReplaceAll(sanitizedLine, ".freeze", "") 79 sanitizedLine = renderUtf8(sanitizedLine) 80 81 if sanitizedLine == "" { 82 continue 83 } 84 85 for field, pattern := range patterns { 86 matchMap := internal.MatchNamedCaptureGroups(pattern, sanitizedLine) 87 if value := matchMap[field]; value != "" { 88 if pp := postProcessors[field]; pp != nil { 89 fields[field] = pp(value) 90 } else { 91 fields[field] = value 92 } 93 // TODO: know that a line could actually match on multiple patterns, this is unlikely though 94 break 95 } 96 } 97 } 98 99 if fields["name"] != "" && fields["version"] != "" { 100 var metadata gemData 101 if err := mapstructure.Decode(fields, &metadata); err != nil { 102 return nil, nil, fmt.Errorf("unable to decode gem metadata: %w", err) 103 } 104 105 pkgs = append( 106 pkgs, 107 newGemspecPackage( 108 metadata, 109 reader.Location, 110 ), 111 ) 112 } 113 114 return pkgs, nil, nil 115 } 116 117 // renderUtf8 takes any string escaped string subsections from the ruby string and replaces those sections with the UTF8 runes. 118 func renderUtf8(s string) string { 119 fullReplacement := unicodePattern.ReplaceAllStringFunc(s, func(unicodeSection string) string { 120 var replacement string 121 // note: the json parser already has support for interpreting hex-representations of unicode escaped strings as unicode runes. 122 // we can do this ourselves with strconv.Atoi, or leverage the existing json package. 123 if err := json.Unmarshal([]byte(`"`+unicodeSection+`"`), &replacement); err != nil { 124 return unicodeSection 125 } 126 return replacement 127 }) 128 return fullReplacement 129 }