code-intelligence.com/cifuzz@v0.40.0/internal/bundler/archive/archive_test.go (about) 1 package archive 2 3 import ( 4 "archive/tar" 5 "bufio" 6 "compress/gzip" 7 "fmt" 8 "io/fs" 9 "os" 10 "os/exec" 11 "path/filepath" 12 "runtime" 13 "strings" 14 "testing" 15 16 "github.com/otiai10/copy" 17 "github.com/stretchr/testify/require" 18 19 "code-intelligence.com/cifuzz/internal/testutil" 20 "code-intelligence.com/cifuzz/pkg/log" 21 "code-intelligence.com/cifuzz/util/fileutil" 22 ) 23 24 func TestWriteArchive(t *testing.T) { 25 testdataDir := filepath.Join("testdata", "archive_test") 26 require.DirExists(t, testdataDir) 27 dir := testutil.MkdirTemp(t, "", "write-archive-test-*") 28 err := copy.Copy(testdataDir, dir) 29 require.NoError(t, err) 30 31 // Create an empty directory to test that WriteArchive handles it - it can't be kept in testdata since Git doesn't 32 // allow checking in empty directories. 33 err = os.MkdirAll(filepath.Join(dir, "empty_dir"), 0o755) 34 require.NoError(t, err) 35 36 // Walk the testdata dir and write all contents to an archive 37 archive, err := os.CreateTemp("", "bundle-*.tar.gz") 38 require.NoError(t, err) 39 t.Cleanup(func() { fileutil.Cleanup(archive.Name()) }) 40 writer := bufio.NewWriter(archive) 41 archiveWriter := NewTarArchiveWriter(writer, true) 42 err = archiveWriter.WriteDir("", dir) 43 require.NoError(t, err) 44 err = archiveWriter.WriteHardLink(filepath.Join("dir1", "dir2", "test.sh"), filepath.Join("dir1", "hardlink")) 45 require.NoError(t, err) 46 47 err = archiveWriter.Close() 48 require.NoError(t, err) 49 err = writer.Flush() 50 require.NoError(t, err) 51 err = archive.Close() 52 require.NoError(t, err) 53 54 // Unpack archive contents with tar. 55 out := testutil.MkdirTemp(t, "", "archive-test-*") 56 cmd := exec.Command("tar", "-xvf", archive.Name(), "-C", out) 57 cmd.Stdout = os.Stdout 58 cmd.Stderr = os.Stderr 59 log.Printf("Command: %v", cmd.String()) 60 err = cmd.Run() 61 require.NoError(t, err) 62 63 remainingExpectedEntries := []struct { 64 RelPath string 65 FileContent string 66 IsExecutableFile bool 67 }{ 68 {".", "", false}, 69 {"dir1", "", false}, 70 {filepath.Join("dir1", "symlink"), "#!/usr/bin/env bash", true}, 71 {filepath.Join("dir1", "hardlink"), "#!/usr/bin/env bash", true}, 72 {filepath.Join("dir1", "dir2"), "", false}, 73 {filepath.Join("dir1", "dir2", "test.sh"), "#!/usr/bin/env bash", true}, 74 {filepath.Join("dir1", "dir2", "test.txt"), "foobar", false}, 75 {"empty_dir", "", false}, 76 } 77 // Verify that the archive contains exactly the expected files and directories. 78 // Do not assert group and other permissions which may be affected by masks. 79 err = filepath.WalkDir(out, func(absPath string, d fs.DirEntry, err error) error { 80 if err != nil { 81 return err 82 } 83 relPath, err := filepath.Rel(out, absPath) 84 if err != nil { 85 return err 86 } 87 for i, expectedEntry := range remainingExpectedEntries { 88 if relPath != expectedEntry.RelPath { 89 continue 90 } 91 92 shouldBeDir := expectedEntry.FileContent == "" 93 isDir := fileutil.IsDir(absPath) 94 require.Equalf(t, shouldBeDir, isDir, "Directory/file status doesn't match for %q", relPath) 95 96 if isDir { 97 remainingExpectedEntries = append(remainingExpectedEntries[:i], remainingExpectedEntries[i+1:]...) 98 return nil 99 } 100 101 // Perform additional checks on files. 102 stat, err := os.Lstat(absPath) 103 require.NoError(t, err) 104 require.Falsef( 105 t, 106 stat.Mode()&os.ModeSymlink == os.ModeSymlink, 107 "Expected symlinks to be archived as regular files: %q is a symlink", 108 relPath, 109 ) 110 111 if runtime.GOOS != "windows" { 112 shouldBeExecutable := expectedEntry.IsExecutableFile 113 isExecutable := stat.Mode()&0o100 == 0o100 114 require.Equalf( 115 t, 116 shouldBeExecutable, 117 isExecutable, 118 "Expected executable bit to be preserved, unexpected value for %s", 119 relPath, 120 ) 121 } 122 123 content, err := os.ReadFile(absPath) 124 require.NoError(t, err) 125 require.Equalf(t, expectedEntry.FileContent, string(content), "Contents are not as expected: %q", relPath) 126 127 remainingExpectedEntries = append(remainingExpectedEntries[:i], remainingExpectedEntries[i+1:]...) 128 return nil 129 } 130 require.Fail(t, "Unexpected archive content: "+relPath) 131 return nil 132 }) 133 require.NoError(t, err) 134 var msg strings.Builder 135 for _, missingEntry := range remainingExpectedEntries { 136 msg.WriteString(fmt.Sprintf(" %q\n", missingEntry.RelPath)) 137 } 138 require.Empty(t, remainingExpectedEntries, "Archive did not contain the following expected entries: %s", msg.String()) 139 } 140 141 // Independently from the operating system, path separators in archive files have 142 // to be always forward slashes. 143 func TestInternalPaths(t *testing.T) { 144 testFile := filepath.Join("testdata", "archive_test", "dir1", "dir2", "test.txt") 145 require.FileExists(t, testFile) 146 147 archiveFile := createArchive(t, []fileEntry{ 148 {filepath.Join("archive-dir", "hello"), testFile}, 149 }) 150 151 // Verify that file header has correct path separators. 152 // Unfortunately extracting the archive under Windows 153 // with the tar command or the archiveutils.Untar function 154 // will not show the actual problem, as it seems there are 155 // workarounds already in place. 156 archiveRead, err := os.Open(archiveFile.Name()) 157 require.NoError(t, err) 158 t.Cleanup(func() { archiveRead.Close() }) 159 160 gr, err := gzip.NewReader(archiveRead) 161 require.NoError(t, err) 162 t.Cleanup(func() { gr.Close() }) 163 164 tr := tar.NewReader(gr) 165 header, err := tr.Next() 166 require.NoError(t, err) 167 168 require.Equal(t, "archive-dir/hello", header.Name) 169 } 170 171 // TestDuplicateFileContent verifies that the same file content is only stored 172 // once in the archive. This tests a regression where the same file content was 173 // stored multiple times, resulting in an unnecessarily large archive. 174 func TestDuplicateFileContent(t *testing.T) { 175 testFile := filepath.Join("testdata", "dummy.blob") 176 require.FileExists(t, testFile) 177 178 archiveFile := createArchive(t, []fileEntry{ 179 {"dummy.blob", testFile}, 180 }) 181 182 archiveStat, err := archiveFile.Stat() 183 require.NoError(t, err) 184 185 expectedSize := archiveStat.Size() 186 t.Logf("Created archive with size %d", expectedSize) 187 188 // Create a new archive with the same file content multiple times. 189 archiveFile = createArchive(t, []fileEntry{ 190 {"dummy.blob", testFile}, 191 {"dummy.blob", testFile}, 192 {"dummy.blob", testFile}, 193 {"dummy.blob", testFile}, 194 }) 195 196 archiveStat, err = archiveFile.Stat() 197 require.NoError(t, err) 198 199 actualSize := archiveStat.Size() 200 t.Logf("Created archive with size %d", actualSize) 201 202 require.Equal(t, expectedSize, actualSize) 203 } 204 205 // Use a struct instead of a map to allow multiple entries with the same 206 // archive / source path. 207 type fileEntry struct { 208 archivePath string 209 sourcePath string 210 } 211 212 // Creates a tar.gz archive with the given files. 213 func createArchive(t *testing.T, files []fileEntry) *os.File { 214 archiveFile, err := os.CreateTemp("", "bundle-*.tar.gz") 215 require.NoError(t, err) 216 t.Cleanup(func() { fileutil.Cleanup(archiveFile.Name()) }) 217 218 writer := bufio.NewWriter(archiveFile) 219 archiveWriter := NewTarArchiveWriter(writer, true) 220 221 for _, fileEntry := range files { 222 err = archiveWriter.WriteFile(fileEntry.archivePath, fileEntry.sourcePath) 223 require.NoError(t, err) 224 } 225 226 err = archiveWriter.Close() 227 require.NoError(t, err) 228 err = writer.Flush() 229 require.NoError(t, err) 230 t.Cleanup(func() { 231 archiveFile.Close() 232 }) 233 234 t.Logf("Created archive at: %s", archiveFile.Name()) 235 return archiveFile 236 }