github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/ruby/parse_gemspec.go (about)

     1  package ruby
     2  
     3  import (
     4  	"bufio"
     5  	"context"
     6  	"encoding/json"
     7  	"fmt"
     8  	"regexp"
     9  	"strings"
    10  
    11  	"github.com/mitchellh/mapstructure"
    12  
    13  	"github.com/anchore/syft/internal"
    14  	"github.com/anchore/syft/syft/artifact"
    15  	"github.com/anchore/syft/syft/file"
    16  	"github.com/anchore/syft/syft/pkg"
    17  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    18  )
    19  
    20  var _ generic.Parser = parseGemFileLockEntries
    21  
    22  type postProcessor func(string) []string
    23  
    24  type gemData struct {
    25  	Licenses        []string `mapstructure:"licenses" json:"licenses,omitempty"`
    26  	pkg.RubyGemspec `mapstructure:",squash" json:",inline"`
    27  }
    28  
    29  // match example:      Al\u003Ex   --->   003E
    30  var unicodePattern = regexp.MustCompile(`\\u(?P<unicode>[0-9A-F]{4})`)
    31  
    32  var patterns = map[string]*regexp.Regexp{
    33  	// match example:       name = "railties".freeze   --->   railties
    34  	"name": regexp.MustCompile(`.*\.name\s*=\s*["']{1}(?P<name>.*)["']{1} *`),
    35  
    36  	// match example:       version = "1.0.4".freeze   --->   1.0.4
    37  	"version": regexp.MustCompile(`.*\.version\s*=\s*["']{1}(?P<version>.*)["']{1} *`),
    38  
    39  	// match example:
    40  	// homepage = "https://github.com/anchore/syft".freeze   --->   https://github.com/anchore/syft
    41  	"homepage": regexp.MustCompile(`.*\.homepage\s*=\s*["']{1}(?P<homepage>.*)["']{1} *`),
    42  
    43  	// match example:       files = ["exe/bundle".freeze, "exe/bundler".freeze]    --->    "exe/bundle".freeze, "exe/bundler".freeze
    44  	"files": regexp.MustCompile(`.*\.files\s*=\s*\[(?P<files>.*)] *`),
    45  
    46  	// match example:       authors = ["Andr\u00E9 Arko".freeze, "Samuel Giddins".freeze, "Colby Swandale".freeze,
    47  	//								   "Hiroshi Shibata".freeze, "David Rodr\u00EDguez".freeze, "Grey Baker".freeze...]
    48  	"authors": regexp.MustCompile(`.*\.authors\s*=\s*\[(?P<authors>.*)] *`),
    49  
    50  	// match example:	    licenses = ["MIT".freeze]   ----> "MIT".freeze
    51  	"licenses": regexp.MustCompile(`.*\.licenses\s*=\s*\[(?P<licenses>.*)] *`),
    52  }
    53  
    54  var postProcessors = map[string]postProcessor{
    55  	"files":    processList,
    56  	"authors":  processList,
    57  	"licenses": processList,
    58  }
    59  
    60  func processList(s string) []string {
    61  	var results []string
    62  	for _, item := range strings.Split(s, ",") {
    63  		results = append(results, strings.Trim(item, "\" "))
    64  	}
    65  	return results
    66  }
    67  
    68  // parseGemSpecEntries parses the gemspec file and returns the packages and relationships found.
    69  func parseGemSpecEntries(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    70  	var pkgs []pkg.Package
    71  	var fields = make(map[string]interface{})
    72  	scanner := bufio.NewScanner(reader)
    73  
    74  	for scanner.Scan() {
    75  		line := scanner.Text()
    76  
    77  		sanitizedLine := strings.TrimSpace(line)
    78  		sanitizedLine = strings.ReplaceAll(sanitizedLine, ".freeze", "")
    79  		sanitizedLine = renderUtf8(sanitizedLine)
    80  
    81  		if sanitizedLine == "" {
    82  			continue
    83  		}
    84  
    85  		for field, pattern := range patterns {
    86  			matchMap := internal.MatchNamedCaptureGroups(pattern, sanitizedLine)
    87  			if value := matchMap[field]; value != "" {
    88  				if pp := postProcessors[field]; pp != nil {
    89  					fields[field] = pp(value)
    90  				} else {
    91  					fields[field] = value
    92  				}
    93  				// TODO: know that a line could actually match on multiple patterns, this is unlikely though
    94  				break
    95  			}
    96  		}
    97  	}
    98  
    99  	if fields["name"] != "" && fields["version"] != "" {
   100  		var metadata gemData
   101  		if err := mapstructure.Decode(fields, &metadata); err != nil {
   102  			return nil, nil, fmt.Errorf("unable to decode gem metadata: %w", err)
   103  		}
   104  
   105  		pkgs = append(
   106  			pkgs,
   107  			newGemspecPackage(
   108  				metadata,
   109  				reader.Location,
   110  			),
   111  		)
   112  	}
   113  
   114  	return pkgs, nil, nil
   115  }
   116  
   117  // renderUtf8 takes any string escaped string subsections from the ruby string and replaces those sections with the UTF8 runes.
   118  func renderUtf8(s string) string {
   119  	fullReplacement := unicodePattern.ReplaceAllStringFunc(s, func(unicodeSection string) string {
   120  		var replacement string
   121  		// note: the json parser already has support for interpreting hex-representations of unicode escaped strings as unicode runes.
   122  		// we can do this ourselves with strconv.Atoi, or leverage the existing json package.
   123  		if err := json.Unmarshal([]byte(`"`+unicodeSection+`"`), &replacement); err != nil {
   124  			return unicodeSection
   125  		}
   126  		return replacement
   127  	})
   128  	return fullReplacement
   129  }