github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/erlang/erlang_parser.go (about)

     1  package erlang
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"strings"
     9  	"unicode"
    10  )
    11  
    12  type erlangNode struct {
    13  	value interface{}
    14  }
    15  
    16  var errSkipComments = errors.New("")
    17  
    18  func (e erlangNode) Slice() []erlangNode {
    19  	out, ok := e.value.([]erlangNode)
    20  	if ok {
    21  		return out
    22  	}
    23  	return []erlangNode{}
    24  }
    25  
    26  func (e erlangNode) String() string {
    27  	out, ok := e.value.(string)
    28  	if ok {
    29  		return out
    30  	}
    31  	return ""
    32  }
    33  
    34  func (e erlangNode) Get(index int) erlangNode {
    35  	s := e.Slice()
    36  	if len(s) > index {
    37  		return s[index]
    38  	}
    39  	return erlangNode{}
    40  }
    41  
    42  func node(value interface{}) erlangNode {
    43  	return erlangNode{
    44  		value: value,
    45  	}
    46  }
    47  
    48  // parseErlang basic parser for erlang, used by rebar.lock
    49  func parseErlang(reader io.Reader) (erlangNode, error) {
    50  	data, err := io.ReadAll(reader)
    51  	if err != nil {
    52  		return node(nil), err
    53  	}
    54  
    55  	out := erlangNode{
    56  		value: []erlangNode{},
    57  	}
    58  
    59  	i := 0
    60  	for i < len(data) {
    61  		item, err := parseErlangBlock(data, &i)
    62  		if err == errSkipComments {
    63  			skipWhitespace(data, &i)
    64  			continue
    65  		}
    66  		if err != nil {
    67  			return node(nil), fmt.Errorf("%w\n%s", err, printError(data, i))
    68  		}
    69  
    70  		skipWhitespace(data, &i)
    71  
    72  		if i, ok := item.value.(string); ok && i == "." {
    73  			continue
    74  		}
    75  
    76  		out.value = append(out.value.([]erlangNode), item)
    77  	}
    78  	return out, nil
    79  }
    80  
    81  func printError(data []byte, i int) string {
    82  	line := 1
    83  	char := 1
    84  
    85  	prev := []string{}
    86  	curr := bytes.Buffer{}
    87  
    88  	for idx, c := range data {
    89  		if c == '\n' {
    90  			prev = append(prev, curr.String())
    91  			curr.Reset()
    92  
    93  			if idx >= i {
    94  				break
    95  			}
    96  
    97  			line++
    98  			char = 1
    99  			continue
   100  		}
   101  		if idx < i {
   102  			char++
   103  		}
   104  		curr.WriteByte(c)
   105  	}
   106  
   107  	l1 := fmt.Sprintf("%d", line-1)
   108  	l2 := fmt.Sprintf("%d", line)
   109  
   110  	if len(l1) < len(l2) {
   111  		l1 = " " + l1
   112  	}
   113  
   114  	sep := ": "
   115  
   116  	lines := ""
   117  	if len(prev) > 1 {
   118  		lines += fmt.Sprintf("%s%s%s\n", l1, sep, prev[len(prev)-2])
   119  	}
   120  	if len(prev) > 0 {
   121  		lines += fmt.Sprintf("%s%s%s\n", l2, sep, prev[len(prev)-1])
   122  	}
   123  
   124  	pointer := strings.Repeat(" ", len(l2)+len(sep)+char-1) + "^"
   125  
   126  	return fmt.Sprintf("line: %v, char: %v\n%s%s", line, char, lines, pointer)
   127  }
   128  
   129  func skipWhitespace(data []byte, i *int) {
   130  	for *i < len(data) && isWhitespace(data[*i]) {
   131  		*i++
   132  	}
   133  }
   134  
   135  func parseErlangBlock(data []byte, i *int) (erlangNode, error) {
   136  	block, err := parseErlangNode(data, i)
   137  	if err != nil {
   138  		return node(nil), err
   139  	}
   140  
   141  	skipWhitespace(data, i)
   142  	*i++ // skip the trailing .
   143  	return block, nil
   144  }
   145  
   146  func parseErlangNode(data []byte, i *int) (erlangNode, error) {
   147  	skipWhitespace(data, i)
   148  	c := data[*i]
   149  	switch c {
   150  	case '[', '{':
   151  		offset := *i + 1
   152  		skipWhitespace(data, &offset)
   153  		c2 := data[offset]
   154  
   155  		// Add support for empty lists
   156  		if (c == '[' && c2 == ']') || (c == '{' && c2 == '}') {
   157  			*i = offset + 1
   158  			return node(nil), nil
   159  		}
   160  
   161  		return parseErlangList(data, i)
   162  	case '"':
   163  		fallthrough
   164  	case '\'':
   165  		return parseErlangString(data, i)
   166  	case '<':
   167  		return parseErlangAngleString(data, i)
   168  	case '%':
   169  		parseErlangComment(data, i)
   170  		return node(nil), errSkipComments
   171  	}
   172  
   173  	if isLiteral(c) {
   174  		return parseErlangLiteral(data, i)
   175  	}
   176  
   177  	return erlangNode{}, fmt.Errorf("invalid literal character: %s", string(c))
   178  }
   179  
   180  func isWhitespace(c byte) bool {
   181  	return unicode.IsSpace(rune(c))
   182  }
   183  
   184  func isLiteral(c byte) bool {
   185  	r := rune(c)
   186  	return unicode.IsNumber(r) || unicode.IsLetter(r) || r == '.' || r == '_'
   187  }
   188  
   189  func parseErlangLiteral(data []byte, i *int) (erlangNode, error) {
   190  	var buf bytes.Buffer
   191  	for *i < len(data) {
   192  		c := data[*i]
   193  		if isLiteral(c) {
   194  			buf.WriteByte(c)
   195  		} else {
   196  			break
   197  		}
   198  		*i++
   199  	}
   200  	return node(buf.String()), nil
   201  }
   202  
   203  func parseErlangAngleString(data []byte, i *int) (erlangNode, error) {
   204  	*i += 2
   205  	out, err := parseErlangString(data, i)
   206  	*i += 2
   207  	return out, err
   208  }
   209  
   210  func parseErlangString(data []byte, i *int) (erlangNode, error) {
   211  	delim := data[*i]
   212  	*i++
   213  	var buf bytes.Buffer
   214  	for *i < len(data) {
   215  		c := data[*i]
   216  		if c == delim {
   217  			*i++
   218  			return node(buf.String()), nil
   219  		}
   220  		if c == '\\' {
   221  			*i++
   222  			if len(data) >= *i {
   223  				return node(nil), fmt.Errorf("invalid escape without closed string at %d", *i)
   224  			}
   225  			c = data[*i]
   226  		}
   227  		buf.WriteByte(c)
   228  		*i++
   229  	}
   230  	return node(nil), fmt.Errorf("unterminated string at %d", *i)
   231  }
   232  
   233  func parseErlangList(data []byte, i *int) (erlangNode, error) {
   234  	*i++
   235  	out := erlangNode{
   236  		value: []erlangNode{},
   237  	}
   238  	for *i < len(data) {
   239  		item, err := parseErlangNode(data, i)
   240  		if err != nil {
   241  			if err == errSkipComments {
   242  				skipWhitespace(data, i)
   243  				continue
   244  			}
   245  			return node(nil), err
   246  		}
   247  		out.value = append(out.value.([]erlangNode), item)
   248  		skipWhitespace(data, i)
   249  		c := data[*i]
   250  		switch c {
   251  		case ',':
   252  			*i++
   253  			continue
   254  		case '%':
   255  			// Starts a new comment node
   256  			continue
   257  		case ']', '}':
   258  			*i++
   259  			return out, nil
   260  		default:
   261  			return node(nil), fmt.Errorf("unexpected character: %s", string(c))
   262  		}
   263  	}
   264  	return out, nil
   265  }
   266  
   267  func parseErlangComment(data []byte, i *int) {
   268  	for *i < len(data) {
   269  		c := data[*i]
   270  
   271  		*i++
   272  
   273  		// Rest of a line is a comment. Deals with CR, LF and CR/LF
   274  		if c == '\n' {
   275  			break
   276  		} else if c == '\r' && data[*i] == '\n' {
   277  			*i++
   278  			break
   279  		}
   280  	}
   281  }