code-intelligence.com/cifuzz@v0.40.0/internal/bundler/archive/archive_test.go (about)

     1  package archive
     2  
     3  import (
     4  	"archive/tar"
     5  	"bufio"
     6  	"compress/gzip"
     7  	"fmt"
     8  	"io/fs"
     9  	"os"
    10  	"os/exec"
    11  	"path/filepath"
    12  	"runtime"
    13  	"strings"
    14  	"testing"
    15  
    16  	"github.com/otiai10/copy"
    17  	"github.com/stretchr/testify/require"
    18  
    19  	"code-intelligence.com/cifuzz/internal/testutil"
    20  	"code-intelligence.com/cifuzz/pkg/log"
    21  	"code-intelligence.com/cifuzz/util/fileutil"
    22  )
    23  
    24  func TestWriteArchive(t *testing.T) {
    25  	testdataDir := filepath.Join("testdata", "archive_test")
    26  	require.DirExists(t, testdataDir)
    27  	dir := testutil.MkdirTemp(t, "", "write-archive-test-*")
    28  	err := copy.Copy(testdataDir, dir)
    29  	require.NoError(t, err)
    30  
    31  	// Create an empty directory to test that WriteArchive handles it - it can't be kept in testdata since Git doesn't
    32  	// allow checking in empty directories.
    33  	err = os.MkdirAll(filepath.Join(dir, "empty_dir"), 0o755)
    34  	require.NoError(t, err)
    35  
    36  	// Walk the testdata dir and write all contents to an archive
    37  	archive, err := os.CreateTemp("", "bundle-*.tar.gz")
    38  	require.NoError(t, err)
    39  	t.Cleanup(func() { fileutil.Cleanup(archive.Name()) })
    40  	writer := bufio.NewWriter(archive)
    41  	archiveWriter := NewTarArchiveWriter(writer, true)
    42  	err = archiveWriter.WriteDir("", dir)
    43  	require.NoError(t, err)
    44  	err = archiveWriter.WriteHardLink(filepath.Join("dir1", "dir2", "test.sh"), filepath.Join("dir1", "hardlink"))
    45  	require.NoError(t, err)
    46  
    47  	err = archiveWriter.Close()
    48  	require.NoError(t, err)
    49  	err = writer.Flush()
    50  	require.NoError(t, err)
    51  	err = archive.Close()
    52  	require.NoError(t, err)
    53  
    54  	// Unpack archive contents with tar.
    55  	out := testutil.MkdirTemp(t, "", "archive-test-*")
    56  	cmd := exec.Command("tar", "-xvf", archive.Name(), "-C", out)
    57  	cmd.Stdout = os.Stdout
    58  	cmd.Stderr = os.Stderr
    59  	log.Printf("Command: %v", cmd.String())
    60  	err = cmd.Run()
    61  	require.NoError(t, err)
    62  
    63  	remainingExpectedEntries := []struct {
    64  		RelPath          string
    65  		FileContent      string
    66  		IsExecutableFile bool
    67  	}{
    68  		{".", "", false},
    69  		{"dir1", "", false},
    70  		{filepath.Join("dir1", "symlink"), "#!/usr/bin/env bash", true},
    71  		{filepath.Join("dir1", "hardlink"), "#!/usr/bin/env bash", true},
    72  		{filepath.Join("dir1", "dir2"), "", false},
    73  		{filepath.Join("dir1", "dir2", "test.sh"), "#!/usr/bin/env bash", true},
    74  		{filepath.Join("dir1", "dir2", "test.txt"), "foobar", false},
    75  		{"empty_dir", "", false},
    76  	}
    77  	// Verify that the archive contains exactly the expected files and directories.
    78  	// Do not assert group and other permissions which may be affected by masks.
    79  	err = filepath.WalkDir(out, func(absPath string, d fs.DirEntry, err error) error {
    80  		if err != nil {
    81  			return err
    82  		}
    83  		relPath, err := filepath.Rel(out, absPath)
    84  		if err != nil {
    85  			return err
    86  		}
    87  		for i, expectedEntry := range remainingExpectedEntries {
    88  			if relPath != expectedEntry.RelPath {
    89  				continue
    90  			}
    91  
    92  			shouldBeDir := expectedEntry.FileContent == ""
    93  			isDir := fileutil.IsDir(absPath)
    94  			require.Equalf(t, shouldBeDir, isDir, "Directory/file status doesn't match for %q", relPath)
    95  
    96  			if isDir {
    97  				remainingExpectedEntries = append(remainingExpectedEntries[:i], remainingExpectedEntries[i+1:]...)
    98  				return nil
    99  			}
   100  
   101  			// Perform additional checks on files.
   102  			stat, err := os.Lstat(absPath)
   103  			require.NoError(t, err)
   104  			require.Falsef(
   105  				t,
   106  				stat.Mode()&os.ModeSymlink == os.ModeSymlink,
   107  				"Expected symlinks to be archived as regular files: %q is a symlink",
   108  				relPath,
   109  			)
   110  
   111  			if runtime.GOOS != "windows" {
   112  				shouldBeExecutable := expectedEntry.IsExecutableFile
   113  				isExecutable := stat.Mode()&0o100 == 0o100
   114  				require.Equalf(
   115  					t,
   116  					shouldBeExecutable,
   117  					isExecutable,
   118  					"Expected executable bit to be preserved, unexpected value for %s",
   119  					relPath,
   120  				)
   121  			}
   122  
   123  			content, err := os.ReadFile(absPath)
   124  			require.NoError(t, err)
   125  			require.Equalf(t, expectedEntry.FileContent, string(content), "Contents are not as expected: %q", relPath)
   126  
   127  			remainingExpectedEntries = append(remainingExpectedEntries[:i], remainingExpectedEntries[i+1:]...)
   128  			return nil
   129  		}
   130  		require.Fail(t, "Unexpected archive content: "+relPath)
   131  		return nil
   132  	})
   133  	require.NoError(t, err)
   134  	var msg strings.Builder
   135  	for _, missingEntry := range remainingExpectedEntries {
   136  		msg.WriteString(fmt.Sprintf("  %q\n", missingEntry.RelPath))
   137  	}
   138  	require.Empty(t, remainingExpectedEntries, "Archive did not contain the following expected entries: %s", msg.String())
   139  }
   140  
   141  // Independently from the operating system, path separators in archive files have
   142  // to be always forward slashes.
   143  func TestInternalPaths(t *testing.T) {
   144  	testFile := filepath.Join("testdata", "archive_test", "dir1", "dir2", "test.txt")
   145  	require.FileExists(t, testFile)
   146  
   147  	archiveFile := createArchive(t, []fileEntry{
   148  		{filepath.Join("archive-dir", "hello"), testFile},
   149  	})
   150  
   151  	// Verify that file header has correct path separators.
   152  	// Unfortunately extracting the archive under Windows
   153  	// with the tar command or the archiveutils.Untar function
   154  	// will not show the actual problem, as it seems there are
   155  	// workarounds already in place.
   156  	archiveRead, err := os.Open(archiveFile.Name())
   157  	require.NoError(t, err)
   158  	t.Cleanup(func() { archiveRead.Close() })
   159  
   160  	gr, err := gzip.NewReader(archiveRead)
   161  	require.NoError(t, err)
   162  	t.Cleanup(func() { gr.Close() })
   163  
   164  	tr := tar.NewReader(gr)
   165  	header, err := tr.Next()
   166  	require.NoError(t, err)
   167  
   168  	require.Equal(t, "archive-dir/hello", header.Name)
   169  }
   170  
   171  // TestDuplicateFileContent verifies that the same file content is only stored
   172  // once in the archive. This tests a regression where the same file content was
   173  // stored multiple times, resulting in an unnecessarily large archive.
   174  func TestDuplicateFileContent(t *testing.T) {
   175  	testFile := filepath.Join("testdata", "dummy.blob")
   176  	require.FileExists(t, testFile)
   177  
   178  	archiveFile := createArchive(t, []fileEntry{
   179  		{"dummy.blob", testFile},
   180  	})
   181  
   182  	archiveStat, err := archiveFile.Stat()
   183  	require.NoError(t, err)
   184  
   185  	expectedSize := archiveStat.Size()
   186  	t.Logf("Created archive with size %d", expectedSize)
   187  
   188  	// Create a new archive with the same file content multiple times.
   189  	archiveFile = createArchive(t, []fileEntry{
   190  		{"dummy.blob", testFile},
   191  		{"dummy.blob", testFile},
   192  		{"dummy.blob", testFile},
   193  		{"dummy.blob", testFile},
   194  	})
   195  
   196  	archiveStat, err = archiveFile.Stat()
   197  	require.NoError(t, err)
   198  
   199  	actualSize := archiveStat.Size()
   200  	t.Logf("Created archive with size %d", actualSize)
   201  
   202  	require.Equal(t, expectedSize, actualSize)
   203  }
   204  
   205  // Use a struct instead of a map to allow multiple entries with the same
   206  // archive / source path.
   207  type fileEntry struct {
   208  	archivePath string
   209  	sourcePath  string
   210  }
   211  
   212  // Creates a tar.gz archive with the given files.
   213  func createArchive(t *testing.T, files []fileEntry) *os.File {
   214  	archiveFile, err := os.CreateTemp("", "bundle-*.tar.gz")
   215  	require.NoError(t, err)
   216  	t.Cleanup(func() { fileutil.Cleanup(archiveFile.Name()) })
   217  
   218  	writer := bufio.NewWriter(archiveFile)
   219  	archiveWriter := NewTarArchiveWriter(writer, true)
   220  
   221  	for _, fileEntry := range files {
   222  		err = archiveWriter.WriteFile(fileEntry.archivePath, fileEntry.sourcePath)
   223  		require.NoError(t, err)
   224  	}
   225  
   226  	err = archiveWriter.Close()
   227  	require.NoError(t, err)
   228  	err = writer.Flush()
   229  	require.NoError(t, err)
   230  	t.Cleanup(func() {
   231  		archiveFile.Close()
   232  	})
   233  
   234  	t.Logf("Created archive at: %s", archiveFile.Name())
   235  	return archiveFile
   236  }