github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/r/parse_description.go (about)

     1  package r
     2  
     3  import (
     4  	"bufio"
     5  	"context"
     6  	"io"
     7  	"regexp"
     8  	"strings"
     9  
    10  	"github.com/anchore/syft/syft/artifact"
    11  	"github.com/anchore/syft/syft/file"
    12  	"github.com/anchore/syft/syft/pkg"
    13  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    14  	"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
    15  )
    16  
    17  /* some examples of license strings found in DESCRIPTION files:
    18  find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep 'License:' | sort | uniq
    19  License: GPL
    20  License: GPL (>= 2)
    21  License: GPL (>=2)
    22  License: GPL(>=2)
    23  License: GPL (>= 2) | file LICENCE
    24  License: GPL-2 | GPL-3
    25  License: GPL-3
    26  License: LGPL (>= 2)
    27  License: LGPL (>= 2.1)
    28  License: MIT + file LICENSE
    29  License: Part of R 4.3.0
    30  License: Unlimited
    31  */
    32  
    33  func parseDescriptionFile(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    34  	values := extractFieldsFromDescriptionFile(reader)
    35  	m := parseDataFromDescriptionMap(values)
    36  	p := newPackage(ctx, m, []file.Location{reader.Location}...)
    37  	if p.Name == "" || p.Version == "" {
    38  		return nil, nil, nil
    39  	}
    40  	p = licenses.RelativeToPackage(ctx, resolver, p)
    41  	return []pkg.Package{p}, nil, nil
    42  }
    43  
    44  type parseData struct {
    45  	Package string
    46  	Version string
    47  	License string
    48  	pkg.RDescription
    49  }
    50  
    51  func parseDataFromDescriptionMap(values map[string]string) parseData {
    52  	return parseData{
    53  		License: values["License"],
    54  		Package: values["Package"],
    55  		Version: values["Version"],
    56  		RDescription: pkg.RDescription{
    57  			Title:            values["Title"],
    58  			Description:      cleanMultiLineValue(values["Description"]),
    59  			Maintainer:       values["Maintainer"],
    60  			URL:              commaSeparatedList(values["URL"]),
    61  			Depends:          commaSeparatedList(values["Depends"]),
    62  			Imports:          commaSeparatedList(values["Imports"]),
    63  			Suggests:         commaSeparatedList(values["Suggests"]),
    64  			NeedsCompilation: yesNoToBool(values["NeedsCompilation"]),
    65  			Author:           values["Author"],
    66  			Repository:       values["Repository"],
    67  			Built:            values["Built"],
    68  		},
    69  	}
    70  }
    71  
    72  func yesNoToBool(s string) bool {
    73  	/*
    74  		$ docker run --rm -it rocker/r-ver bash
    75  		$ install2.r ggplot2 dplyr mlr3 caret # just some packages for a larger sample
    76  		$ find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep 'NeedsCompilation:' | sort | uniq
    77  		NeedsCompilation: no
    78  		NeedsCompilation: yes
    79  		$ find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep 'NeedsCompilation:' | wc -l
    80  		105
    81  	*/
    82  	return strings.EqualFold(s, "yes")
    83  }
    84  
    85  func commaSeparatedList(s string) []string {
    86  	var result []string
    87  	split := strings.Split(s, ",")
    88  	for _, piece := range split {
    89  		value := strings.TrimSpace(piece)
    90  		if value == "" {
    91  			continue
    92  		}
    93  		result = append(result, value)
    94  	}
    95  	return result
    96  }
    97  
    98  var space = regexp.MustCompile(`\s+`)
    99  
   100  func cleanMultiLineValue(s string) string {
   101  	return space.ReplaceAllString(s, " ")
   102  }
   103  
   104  func extractFieldsFromDescriptionFile(reader io.Reader) map[string]string {
   105  	result := make(map[string]string)
   106  	key := ""
   107  	var valueFragment strings.Builder
   108  	scanner := bufio.NewScanner(reader)
   109  
   110  	for scanner.Scan() {
   111  		line := scanner.Text()
   112  		// line is like Key: Value -> start capturing value; close out previous value
   113  		// line is like \t\t continued value -> append to existing value
   114  		if len(line) == 0 {
   115  			continue
   116  		}
   117  		if startsWithWhitespace(line) {
   118  			// we're continuing a value
   119  			if key == "" {
   120  				continue
   121  			}
   122  			valueFragment.WriteByte('\n')
   123  			valueFragment.WriteString(strings.TrimSpace(line))
   124  		} else {
   125  			if key != "" {
   126  				// capture previous value
   127  				result[key] = valueFragment.String()
   128  				key = ""
   129  				valueFragment = strings.Builder{}
   130  			}
   131  			parts := strings.SplitN(line, ":", 2)
   132  			if len(parts) != 2 {
   133  				continue
   134  			}
   135  			key = parts[0]
   136  			valueFragment.WriteString(strings.TrimSpace(parts[1]))
   137  		}
   138  	}
   139  	if key != "" {
   140  		result[key] = valueFragment.String()
   141  	}
   142  	return result
   143  }
   144  
   145  func startsWithWhitespace(s string) bool {
   146  	if s == "" {
   147  		return false
   148  	}
   149  	return s[0] == ' ' || s[0] == '\t'
   150  }