github.com/google/osv-scalibr@v0.4.1/veles/detect_test.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package veles_test
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"io"
    21  	"strings"
    22  	"testing"
    23  
    24  	"github.com/google/go-cmp/cmp"
    25  	"github.com/google/go-cmp/cmp/cmpopts"
    26  	"github.com/google/osv-scalibr/veles"
    27  	"github.com/google/osv-scalibr/veles/velestest"
    28  )
    29  
    30  type testDetectionEngineSubCase struct {
    31  	name  string
    32  	input string
    33  	want  []veles.Secret
    34  }
    35  
    36  // TestDetectionEngine_withSmallBuffer makes sure that the DetectionEngine
    37  // handles retention and overlap correctly. This is best tested with small
    38  // buffer sizes.
    39  //
    40  // Each test case manually ensures that retainLen is set to the maximum secret
    41  // length.
    42  func TestDetectionEngine_withSmallBuffer(t *testing.T) {
    43  	fakeSecrets := velestest.FakeSecretsT(t)
    44  	mustEngine := func(e *veles.DetectionEngine, err error) *veles.DetectionEngine {
    45  		t.Helper()
    46  		if err != nil {
    47  			t.Fatalf("veles.NewDetectionEngine() error: %v, want nil", err)
    48  		}
    49  		return e
    50  	}
    51  	cases := []struct {
    52  		name   string
    53  		engine *veles.DetectionEngine
    54  		sub    []testDetectionEngineSubCase
    55  	}{
    56  		{
    57  			name: "single_detector",
    58  			engine: mustEngine(veles.NewDetectionEngine(
    59  				velestest.FakeDetectors("FOO"),
    60  				veles.WithReadLen(5),
    61  				veles.WithRetainLen(3),
    62  			)),
    63  			sub: []testDetectionEngineSubCase{
    64  				{
    65  					name:  "empty string",
    66  					input: "",
    67  					want:  nil,
    68  				},
    69  				{
    70  					name:  "no matches chunk smaller retain Len",
    71  					input: "aaa",
    72  					want:  nil,
    73  				},
    74  				{
    75  					name:  "no matches in single chunk",
    76  					input: "aaaaaaab",
    77  					want:  nil,
    78  				},
    79  				{
    80  					name:  "no matches in multiple chunks",
    81  					input: "aaaaaaabaaaabaaa",
    82  					want:  nil,
    83  				},
    84  				{
    85  					name:  "single match at start of single chunk",
    86  					input: "FOOaaaab",
    87  					want:  fakeSecrets("FOO"),
    88  				},
    89  				{
    90  					name:  "single match in middle of single chunk",
    91  					input: "aaFOOaab",
    92  					want:  fakeSecrets("FOO"),
    93  				},
    94  				{
    95  					name:  "single match at end of single chunk",
    96  					input: "aaaaaFOO",
    97  					want:  fakeSecrets("FOO"),
    98  				},
    99  				{
   100  					name:  "single match in overlap",
   101  					input: "aaaaaaFOOaaab",
   102  					want:  fakeSecrets("FOO"),
   103  				},
   104  				{
   105  					name:  "single match at start of second chunk",
   106  					input: "aaaaaaabFOOab",
   107  					want:  fakeSecrets("FOO"),
   108  				},
   109  				{
   110  					name:  "single match in middle of second chunk",
   111  					input: "aaaaaaabaFOOb",
   112  					want:  fakeSecrets("FOO"),
   113  				},
   114  				{
   115  					name:  "single match at end of second chunk",
   116  					input: "aaaaaaabaaFOO",
   117  					want:  fakeSecrets("FOO"),
   118  				},
   119  				{
   120  					name:  "multiple matches in single chunk",
   121  					input: "FOOFOOab",
   122  					want:  fakeSecrets("FOO", "FOO"),
   123  				},
   124  				{
   125  					name:  "multiple matches across chunks no overlap",
   126  					input: "aaFOOaabaaFOOaab",
   127  					want:  fakeSecrets("FOO", "FOO"),
   128  				},
   129  				{
   130  					name:  "multiple matches across chunks with overlap",
   131  					input: "aaFOOaFOOaabFOOab",
   132  					want:  fakeSecrets("FOO", "FOO", "FOO"),
   133  				},
   134  			},
   135  		},
   136  		{
   137  			name: "multiple_same_length_detectors",
   138  			engine: mustEngine(veles.NewDetectionEngine(
   139  				velestest.FakeDetectors("FOO", "BAR", "BAZ"),
   140  				veles.WithReadLen(5),
   141  				veles.WithRetainLen(3),
   142  			)),
   143  			sub: []testDetectionEngineSubCase{
   144  				{
   145  					name:  "empty input",
   146  					input: "",
   147  					want:  nil,
   148  				},
   149  				{
   150  					name:  "no match",
   151  					input: "aaaaabafsdfasdfjlasdjfalsdkjflkasdjflasdfklasjdfyhekhladsf",
   152  					want:  nil,
   153  				},
   154  				{
   155  					name:  "matches only first",
   156  					input: "aaFOOaab",
   157  					want:  fakeSecrets("FOO"),
   158  				},
   159  				{
   160  					name:  "matches only second",
   161  					input: "aaBARaab",
   162  					want:  fakeSecrets("BAR"),
   163  				},
   164  				{
   165  					name:  "matches only third",
   166  					input: "aaBAZaab",
   167  					want:  fakeSecrets("BAZ"),
   168  				},
   169  				{
   170  					name:  "matches back to back",
   171  					input: "FOOBARBAZ",
   172  					want:  fakeSecrets("FOO", "BAR", "BAZ"),
   173  				},
   174  				{
   175  					name:  "matches back to back unordered",
   176  					input: "BAZBARFOO",
   177  					want:  fakeSecrets("FOO", "BAR", "BAZ"),
   178  				},
   179  				{
   180  					name:  "matches multiple",
   181  					input: "aaBARBARaFOOBARaBAZaaaaBAZFOOaFOOa",
   182  					want: fakeSecrets(
   183  						"FOO", "FOO", "FOO",
   184  						"BAR", "BAR", "BAR",
   185  						"BAZ", "BAZ",
   186  					),
   187  				},
   188  			},
   189  		},
   190  		{
   191  			name: "multiple_different_length_detectors",
   192  			engine: mustEngine(veles.NewDetectionEngine(
   193  				velestest.FakeDetectors("FOO", "HELLO", "FRIENDS"),
   194  				veles.WithRetainLen(7),
   195  				veles.WithReadLen(5),
   196  			)),
   197  			sub: []testDetectionEngineSubCase{
   198  				{
   199  					name:  "empty input",
   200  					input: "",
   201  					want:  nil,
   202  				},
   203  				{
   204  					name:  "no match",
   205  					input: "ksdjlf;alksjkljfa;lsdkfukasdfjm;lasdufieuraoerwoijfdasf93423",
   206  					want:  nil,
   207  				},
   208  				{
   209  					name:  "two matches in overlap",
   210  					input: "aaaaaFOOFOOb",
   211  					want:  fakeSecrets("FOO", "FOO"),
   212  				},
   213  				{
   214  					name:  "all match",
   215  					input: "FOOaFRIENDSHELLOaFOOaaaaaHELLOa",
   216  					want:  fakeSecrets("FOO", "FOO", "HELLO", "HELLO", "FRIENDS"),
   217  				},
   218  			},
   219  		},
   220  		{
   221  			name: "overlapping_detectors",
   222  			engine: mustEngine(veles.NewDetectionEngine(
   223  				velestest.FakeDetectors("TEST13", "TEST1337"),
   224  				veles.WithRetainLen(8),
   225  				veles.WithReadLen(8),
   226  			)),
   227  			sub: []testDetectionEngineSubCase{
   228  				{
   229  					name:  "empty input",
   230  					input: "",
   231  					want:  nil,
   232  				},
   233  				{
   234  					name:  "no match",
   235  					input: "kjsd;aflkduyrkyerye84793248723094jhklfdslkajfahldfe7ear",
   236  					want:  nil,
   237  				},
   238  				{
   239  					name:  "matches just the smaller",
   240  					input: "aaTEST13aaaaaaabaaa",
   241  					want:  fakeSecrets("TEST13"),
   242  				},
   243  				{
   244  					name:  "matches both",
   245  					input: "aaTEST1337aaaaabaaTEST13aa",
   246  					want:  fakeSecrets("TEST13", "TEST13", "TEST1337"),
   247  				},
   248  			},
   249  		},
   250  	}
   251  	for _, tc := range cases {
   252  		t.Run(tc.name, func(t *testing.T) {
   253  			t.Parallel()
   254  			for _, sc := range tc.sub {
   255  				t.Run(sc.name, func(t *testing.T) {
   256  					t.Parallel()
   257  					got, err := tc.engine.Detect(t.Context(), strings.NewReader(sc.input))
   258  					if err != nil {
   259  						t.Errorf("Detect() error: %v, want nil", err)
   260  					}
   261  					if diff := cmp.Diff(sc.want, got, cmpopts.EquateEmpty(), cmpopts.SortSlices(velestest.LessFakeSecretT(t))); diff != "" {
   262  						t.Errorf("Detect() diff (-want +got):\n%s", diff)
   263  					}
   264  				})
   265  			}
   266  		})
   267  	}
   268  }
   269  
   270  func TestDetectionEngine_withDefaults(t *testing.T) {
   271  	engine, err := veles.NewDetectionEngine(velestest.FakeDetectors("BEGIN", "END"))
   272  	if err != nil {
   273  		t.Errorf("NewDetectionEngine() error: %v, want nil", err)
   274  	}
   275  	want := velestest.FakeSecretsT(t)("BEGIN", "END")
   276  	cases := []struct {
   277  		name     string
   278  		inputLen int
   279  	}{
   280  		{
   281  			name:     "1 kiB",
   282  			inputLen: 1 * veles.KiB,
   283  		},
   284  		{
   285  			name:     "1 MiB",
   286  			inputLen: 1 * veles.MiB,
   287  		},
   288  		{
   289  			name:     "1 GiB",
   290  			inputLen: 1 * veles.GiB,
   291  		},
   292  	}
   293  	for _, tc := range cases {
   294  		t.Run(tc.name, func(t *testing.T) {
   295  			t.Parallel()
   296  			r := newFakeReader(tc.inputLen)
   297  			got, err := engine.Detect(t.Context(), r)
   298  			if err != nil {
   299  				t.Fatalf("Detect() error: %v, want nil", err)
   300  			}
   301  			if diff := cmp.Diff(want, got, cmpopts.EquateEmpty(), cmpopts.SortSlices(velestest.LessFakeSecretT(t))); diff != "" {
   302  				t.Errorf("Detect() diff (-want +got):\n%s", diff)
   303  			}
   304  		})
   305  	}
   306  }
   307  
   308  func TestDetectionEngine_respectsContext(t *testing.T) {
   309  	engine, err := veles.NewDetectionEngine(velestest.FakeDetectors("FOO"))
   310  	if err != nil {
   311  		t.Errorf("NewDetectionEngine() error: %v, want nil", err)
   312  	}
   313  	ctx, cancel := context.WithCancel(t.Context())
   314  	cancel()
   315  	_, err = engine.Detect(ctx, strings.NewReader("meaningless test input"))
   316  	if !errors.Is(err, context.Canceled) {
   317  		t.Errorf("Detect() error: %v, want context.Canceled", err)
   318  	}
   319  }
   320  
   321  func TestNewDetectionEngine_errors(t *testing.T) {
   322  	cases := []struct {
   323  		name      string
   324  		detectors []veles.Detector
   325  		opts      []veles.DetectionEngineOption
   326  	}{
   327  		{
   328  			name:      "missing detectors",
   329  			detectors: nil,
   330  		},
   331  		{
   332  			name:      "empty detectors",
   333  			detectors: []veles.Detector{},
   334  		},
   335  		{
   336  			name:      "too small retain len",
   337  			detectors: velestest.FakeDetectors("HELLOWORLD"),
   338  			opts:      []veles.DetectionEngineOption{veles.WithRetainLen(3)},
   339  		},
   340  	}
   341  	for _, tc := range cases {
   342  		t.Run(tc.name, func(t *testing.T) {
   343  			t.Parallel()
   344  			if _, err := veles.NewDetectionEngine(tc.detectors, tc.opts...); err == nil {
   345  				t.Error("NewDetectionEngine() error: nil, want non-nil")
   346  			}
   347  		})
   348  	}
   349  }
   350  
   351  // fakeReader can be used to simulate reads from arbitrarily large files.
   352  //
   353  // It will output "BEGINaaa...aaaEND" with number of 'a' in the middle so that
   354  // the total length equals the configured len.
   355  type fakeReader struct {
   356  	size    int
   357  	written int
   358  }
   359  
   360  func newFakeReader(size int) *fakeReader {
   361  	return &fakeReader{
   362  		size:    size,
   363  		written: 0,
   364  	}
   365  }
   366  
   367  func (r *fakeReader) Read(b []byte) (int, error) {
   368  	n := 0
   369  	if r.written == 0 {
   370  		// Write "BEGIN" on first Read.
   371  		if len(b) < 5 {
   372  			return 0, io.ErrShortBuffer
   373  		}
   374  		b[0] = 'B'
   375  		b[1] = 'E'
   376  		b[2] = 'G'
   377  		b[3] = 'I'
   378  		b[4] = 'N'
   379  		n = 5
   380  		r.written = 5
   381  	}
   382  	if r.written >= r.size {
   383  		return 0, io.EOF
   384  	}
   385  	remains := r.size - 3 - r.written
   386  	for ; n < min(len(b), remains); n++ {
   387  		b[n] = 'a'
   388  		r.written++
   389  	}
   390  	if n == len(b) {
   391  		return n, nil
   392  	}
   393  	// Write "END" at the end. Need to take special care for edge cases where the
   394  	// buffer is almost full.
   395  	if n < len(b) && r.size-r.written == 3 {
   396  		b[n] = 'E'
   397  		n++
   398  		r.written++
   399  	}
   400  	if n < len(b) && r.size-r.written == 2 {
   401  		b[n] = 'N'
   402  		n++
   403  		r.written++
   404  	}
   405  	if n < len(b) && r.size-r.written == 1 {
   406  		b[n] = 'D'
   407  		n++
   408  		r.written++
   409  	}
   410  	return n, nil
   411  }