github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/r/parse_description.go (about) 1 package r 2 3 import ( 4 "bufio" 5 "context" 6 "io" 7 "regexp" 8 "strings" 9 10 "github.com/anchore/syft/syft/artifact" 11 "github.com/anchore/syft/syft/file" 12 "github.com/anchore/syft/syft/pkg" 13 "github.com/anchore/syft/syft/pkg/cataloger/generic" 14 ) 15 16 /* some examples of license strings found in DESCRIPTION files: 17 find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep 'License:' | sort | uniq 18 License: GPL 19 License: GPL (>= 2) 20 License: GPL (>=2) 21 License: GPL(>=2) 22 License: GPL (>= 2) | file LICENCE 23 License: GPL-2 | GPL-3 24 License: GPL-3 25 License: LGPL (>= 2) 26 License: LGPL (>= 2.1) 27 License: MIT + file LICENSE 28 License: Part of R 4.3.0 29 License: Unlimited 30 */ 31 32 func parseDescriptionFile(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 33 values := extractFieldsFromDescriptionFile(reader) 34 m := parseDataFromDescriptionMap(values) 35 p := newPackage(m, []file.Location{reader.Location}...) 36 if p.Name == "" || p.Version == "" { 37 return nil, nil, nil 38 } 39 return []pkg.Package{p}, nil, nil 40 } 41 42 type parseData struct { 43 Package string 44 Version string 45 License string 46 pkg.RDescription 47 } 48 49 func parseDataFromDescriptionMap(values map[string]string) parseData { 50 return parseData{ 51 License: values["License"], 52 Package: values["Package"], 53 Version: values["Version"], 54 RDescription: pkg.RDescription{ 55 Title: values["Title"], 56 Description: cleanMultiLineValue(values["Description"]), 57 Maintainer: values["Maintainer"], 58 URL: commaSeparatedList(values["URL"]), 59 Depends: commaSeparatedList(values["Depends"]), 60 Imports: commaSeparatedList(values["Imports"]), 61 Suggests: commaSeparatedList(values["Suggests"]), 62 NeedsCompilation: yesNoToBool(values["NeedsCompilation"]), 63 Author: values["Author"], 64 Repository: values["Repository"], 65 Built: values["Built"], 66 }, 67 } 68 } 69 70 func yesNoToBool(s string) bool { 71 /* 72 $ docker run --rm -it rocker/r-ver bash 73 $ install2.r ggplot2 dplyr mlr3 caret # just some packages for a larger sample 74 $ find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep 'NeedsCompilation:' | sort | uniq 75 NeedsCompilation: no 76 NeedsCompilation: yes 77 $ find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep 'NeedsCompilation:' | wc -l 78 105 79 */ 80 return strings.EqualFold(s, "yes") 81 } 82 83 func commaSeparatedList(s string) []string { 84 var result []string 85 split := strings.Split(s, ",") 86 for _, piece := range split { 87 value := strings.TrimSpace(piece) 88 if value == "" { 89 continue 90 } 91 result = append(result, value) 92 } 93 return result 94 } 95 96 var space = regexp.MustCompile(`\s+`) 97 98 func cleanMultiLineValue(s string) string { 99 return space.ReplaceAllString(s, " ") 100 } 101 102 func extractFieldsFromDescriptionFile(reader io.Reader) map[string]string { 103 result := make(map[string]string) 104 key := "" 105 var valueFragment strings.Builder 106 scanner := bufio.NewScanner(reader) 107 108 for scanner.Scan() { 109 line := scanner.Text() 110 // line is like Key: Value -> start capturing value; close out previous value 111 // line is like \t\t continued value -> append to existing value 112 if len(line) == 0 { 113 continue 114 } 115 if startsWithWhitespace(line) { 116 // we're continuing a value 117 if key == "" { 118 continue 119 } 120 valueFragment.WriteByte('\n') 121 valueFragment.WriteString(strings.TrimSpace(line)) 122 } else { 123 if key != "" { 124 // capture previous value 125 result[key] = valueFragment.String() 126 key = "" 127 valueFragment = strings.Builder{} 128 } 129 parts := strings.SplitN(line, ":", 2) 130 if len(parts) != 2 { 131 continue 132 } 133 key = parts[0] 134 valueFragment.WriteString(strings.TrimSpace(parts[1])) 135 } 136 } 137 if key != "" { 138 result[key] = valueFragment.String() 139 } 140 return result 141 } 142 143 func startsWithWhitespace(s string) bool { 144 if s == "" { 145 return false 146 } 147 return s[0] == ' ' || s[0] == '\t' 148 }