github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/r/parse_description.go (about)

     1  package r
     2  
     3  import (
     4  	"bufio"
     5  	"context"
     6  	"io"
     7  	"regexp"
     8  	"strings"
     9  
    10  	"github.com/anchore/syft/syft/artifact"
    11  	"github.com/anchore/syft/syft/file"
    12  	"github.com/anchore/syft/syft/pkg"
    13  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    14  )
    15  
    16  /* some examples of license strings found in DESCRIPTION files:
    17  find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep 'License:' | sort | uniq
    18  License: GPL
    19  License: GPL (>= 2)
    20  License: GPL (>=2)
    21  License: GPL(>=2)
    22  License: GPL (>= 2) | file LICENCE
    23  License: GPL-2 | GPL-3
    24  License: GPL-3
    25  License: LGPL (>= 2)
    26  License: LGPL (>= 2.1)
    27  License: MIT + file LICENSE
    28  License: Part of R 4.3.0
    29  License: Unlimited
    30  */
    31  
    32  func parseDescriptionFile(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    33  	values := extractFieldsFromDescriptionFile(reader)
    34  	m := parseDataFromDescriptionMap(values)
    35  	p := newPackage(m, []file.Location{reader.Location}...)
    36  	if p.Name == "" || p.Version == "" {
    37  		return nil, nil, nil
    38  	}
    39  	return []pkg.Package{p}, nil, nil
    40  }
    41  
    42  type parseData struct {
    43  	Package string
    44  	Version string
    45  	License string
    46  	pkg.RDescription
    47  }
    48  
    49  func parseDataFromDescriptionMap(values map[string]string) parseData {
    50  	return parseData{
    51  		License: values["License"],
    52  		Package: values["Package"],
    53  		Version: values["Version"],
    54  		RDescription: pkg.RDescription{
    55  			Title:            values["Title"],
    56  			Description:      cleanMultiLineValue(values["Description"]),
    57  			Maintainer:       values["Maintainer"],
    58  			URL:              commaSeparatedList(values["URL"]),
    59  			Depends:          commaSeparatedList(values["Depends"]),
    60  			Imports:          commaSeparatedList(values["Imports"]),
    61  			Suggests:         commaSeparatedList(values["Suggests"]),
    62  			NeedsCompilation: yesNoToBool(values["NeedsCompilation"]),
    63  			Author:           values["Author"],
    64  			Repository:       values["Repository"],
    65  			Built:            values["Built"],
    66  		},
    67  	}
    68  }
    69  
    70  func yesNoToBool(s string) bool {
    71  	/*
    72  		$ docker run --rm -it rocker/r-ver bash
    73  		$ install2.r ggplot2 dplyr mlr3 caret # just some packages for a larger sample
    74  		$ find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep 'NeedsCompilation:' | sort | uniq
    75  		NeedsCompilation: no
    76  		NeedsCompilation: yes
    77  		$ find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep 'NeedsCompilation:' | wc -l
    78  		105
    79  	*/
    80  	return strings.EqualFold(s, "yes")
    81  }
    82  
    83  func commaSeparatedList(s string) []string {
    84  	var result []string
    85  	split := strings.Split(s, ",")
    86  	for _, piece := range split {
    87  		value := strings.TrimSpace(piece)
    88  		if value == "" {
    89  			continue
    90  		}
    91  		result = append(result, value)
    92  	}
    93  	return result
    94  }
    95  
    96  var space = regexp.MustCompile(`\s+`)
    97  
    98  func cleanMultiLineValue(s string) string {
    99  	return space.ReplaceAllString(s, " ")
   100  }
   101  
   102  func extractFieldsFromDescriptionFile(reader io.Reader) map[string]string {
   103  	result := make(map[string]string)
   104  	key := ""
   105  	var valueFragment strings.Builder
   106  	scanner := bufio.NewScanner(reader)
   107  
   108  	for scanner.Scan() {
   109  		line := scanner.Text()
   110  		// line is like Key: Value -> start capturing value; close out previous value
   111  		// line is like \t\t continued value -> append to existing value
   112  		if len(line) == 0 {
   113  			continue
   114  		}
   115  		if startsWithWhitespace(line) {
   116  			// we're continuing a value
   117  			if key == "" {
   118  				continue
   119  			}
   120  			valueFragment.WriteByte('\n')
   121  			valueFragment.WriteString(strings.TrimSpace(line))
   122  		} else {
   123  			if key != "" {
   124  				// capture previous value
   125  				result[key] = valueFragment.String()
   126  				key = ""
   127  				valueFragment = strings.Builder{}
   128  			}
   129  			parts := strings.SplitN(line, ":", 2)
   130  			if len(parts) != 2 {
   131  				continue
   132  			}
   133  			key = parts[0]
   134  			valueFragment.WriteString(strings.TrimSpace(parts[1]))
   135  		}
   136  	}
   137  	if key != "" {
   138  		result[key] = valueFragment.String()
   139  	}
   140  	return result
   141  }
   142  
   143  func startsWithWhitespace(s string) bool {
   144  	if s == "" {
   145  		return false
   146  	}
   147  	return s[0] == ' ' || s[0] == '\t'
   148  }