github.com/ActiveState/cli@v0.0.0-20240508170324-6801f60cd051/internal/fileutils/replace_bench_test.go (about) 1 package fileutils 2 3 import ( 4 "bufio" 5 "io" 6 "math/rand" 7 "os" 8 "testing" 9 10 "github.com/stretchr/testify/assert" 11 "github.com/stretchr/testify/require" 12 ) 13 14 func setup(oldPath, newPath, filler string, binary bool) []byte { 15 l := 1000000 16 res := make([]byte, l) 17 for i := 0; i < l; i++ { 18 if binary { 19 res[i] = byte(rand.Intn(256)) 20 } else { 21 res[i] = byte(rand.Intn(255) + 1) 22 } 23 } 24 25 return res 26 } 27 28 var result []byte 29 30 // BenchmarkRead compares how fast it takes to read a file that we want to replace later 31 // Three methods are compared: 32 // 1) read everything in bulk 33 // 2) use bufio.NewReader to read the data 34 // 3) read only a fixed amount of data 35 // On my SSD, (3) is about 15% faster than (1), (2) is the slowest by a factor of 8 compared to (3) 36 // Streaming would therefore improve the performance, but is more complicated to implement and leads to a slower replacement step 37 func BenchmarkRead(b *testing.B) { 38 oldPath := "abc/def/ghi" 39 newPath := "def/ghi" 40 byts := setup(oldPath, newPath, "/bin/python.sh", true) 41 42 testFile := TempFileUnsafe("", "") 43 _, err := testFile.Write(byts) 44 if err != nil { 45 b.Errorf("failed to write test file: %v", err) 46 } 47 err = testFile.Close() 48 if err != nil { 49 b.Errorf("failed to close test file: %v", err) 50 } 51 defer os.Remove(testFile.Name()) 52 53 b.ResetTimer() 54 55 b.Run("read file (bulk)", func(bb *testing.B) { 56 for n := 0; n < bb.N; n++ { 57 f, err := os.Open(testFile.Name()) 58 if err != nil { 59 bb.Errorf("Failed to open file: %v", err) 60 } 61 defer func() { 62 f.Close() 63 }() 64 r, err := io.ReadAll(f) 65 if err != nil { 66 bb.Errorf("Received error reading: %v", err) 67 } 68 if len(r) != len(byts) { 69 bb.Errorf("Expected to read %d bytes, read = %d", len(byts), len(r)) 70 } 71 } 72 }) 73 74 b.Run("read file (bufio)", func(bb *testing.B) { 75 for n := 0; n < bb.N; n++ { 76 f, err := os.Open(testFile.Name()) 77 if err != nil { 78 bb.Errorf("Failed to open file: %v", err) 79 } 80 br := bufio.NewReaderSize(f, 1024) 81 defer func() { 82 f.Close() 83 }() 84 var nr int 85 for { 86 _, err := br.ReadByte() 87 if err == io.EOF { 88 break 89 } 90 nr++ 91 } 92 if err != nil { 93 bb.Errorf("Received error reading: %v", err) 94 } 95 if nr != len(byts) { 96 bb.Errorf("Expected to read %d bytes, read = %d", len(byts), nr) 97 } 98 } 99 }) 100 101 b.Run("read file (stream)", func(bb *testing.B) { 102 for n := 0; n < bb.N; n++ { 103 f, err := os.Open(testFile.Name()) 104 if err != nil { 105 bb.Errorf("Failed to open file: %v", err) 106 } 107 defer func() { 108 f.Close() 109 }() 110 b := make([]byte, 1024) 111 var nr int 112 for { 113 n, err := f.Read(b) 114 if n == 0 && err == io.EOF { 115 break 116 } 117 nr += n 118 } 119 if err != nil { 120 bb.Errorf("Received error reading: %v", err) 121 } 122 if nr != len(byts) { 123 bb.Errorf("Expected to read %d bytes, read = %d", len(byts), nr) 124 } 125 } 126 }) 127 } 128 129 // BenchmarkWrite measures the time it safes to write the replaced file back to disk 130 // The measurements should be compared to the time it takes to read file contents and to replace the strings. 131 // On my SSD Drive, writing to disk takes about 1.6 times as long as the replacement step 132 // and approximately as long as reading the file contents to memory initially. 133 func BenchmarkWrite(b *testing.B) { 134 oldPath := "abc/def/ghi" 135 newPath := "def/ghi" 136 byts := setup(oldPath, newPath, "/bin/python.sh", true) 137 b.ResetTimer() 138 139 for n := 0; n < b.N; n++ { 140 f := TempFileUnsafe("", "") 141 defer func() { 142 f.Close() 143 os.Remove(f.Name()) 144 }() 145 nw, err := f.Write(byts) 146 if err != nil { 147 b.Errorf("Received error writing: %v", err) 148 } 149 if nw != len(byts) { 150 b.Errorf("Expected to write %d bytes, written = %d", len(byts), nw) 151 } 152 } 153 } 154 155 // BenchmarkReplace measures how long it takes to replace text in large files 156 // Two main methods are compared (one based on regular expressions, the other is more specialized) 157 // Replacing in binary files (with nul-terminated strings) and text files is considered separately. 158 // Sample results are: 159 // BenchmarkReplace/with_regex_(binary)-8 2335 458652 ns/op 2021164 B/op 35 allocs/op 160 // BenchmarkReplace/without_regex_(binary)-8 12064 96341 ns/op 4 B/op 1 allocs/op 161 // BenchmarkReplace/with_regex_(string)-8 2142 470041 ns/op 2019299 B/op 28 allocs/op 162 // BenchmarkReplace/without_regex_(string)-8 4059 257515 ns/op 1007616 B/op 1 allocs/op 163 func BenchmarkReplace(b *testing.B) { 164 oldPath := "abc/def/ghi" 165 newPath := "def/ghi" 166 binByts := setup(oldPath, newPath, "/bin/python.sh", true) 167 stringByts := setup(oldPath, newPath, "/bin/python.sh", false) 168 runs := []struct { 169 name string 170 f func([]byte, string, string) (bool, []byte, error) 171 input []byte 172 }{ 173 { 174 "with regex (binary)", 175 replaceInFile, 176 binByts, 177 }, 178 { 179 "with regex (string)", 180 replaceInFile, 181 stringByts, 182 }, 183 } 184 b.ResetTimer() 185 186 for _, run := range runs { 187 b.Run(run.name, func(bb *testing.B) { 188 var r []byte 189 for n := 0; n < bb.N; n++ { 190 _, res, err := run.f(run.input, oldPath, newPath) 191 if err != nil { 192 bb.Errorf("Received error: %v", err) 193 } 194 if len(res) != len(run.input) { 195 bb.Errorf("Expected len = %d, got = %d", len(run.input), len(res)) 196 } 197 r = res 198 } 199 result = r 200 201 }) 202 } 203 } 204 205 func TestReplaceBytesError(t *testing.T) { 206 b := []byte("Hello world\x00") 207 _, _, err := replaceInFile(b, "short", "longer") 208 assert.Error(t, err) 209 } 210 211 func TestReplaceBytes(t *testing.T) { 212 oldPath := "abc/def/ghi" 213 newPath := "def/ghi" 214 215 byts := []byte("123abc/def/ghi/bin/python\x00456abc/def/ghi/bin/perl\x00other") 216 expected := []byte("123def/ghi/bin/python\x00\x00\x00\x00\x00456def/ghi/bin/perl\x00\x00\x00\x00\x00other") 217 218 text := []byte("'123abc/def/ghi/bin/python'456'abc/def/ghi/bin/perl'other") 219 textExpected := []byte("'123def/ghi/bin/python'456'def/ghi/bin/perl'other") 220 221 noMatchByts := []byte("nothing to match here\x00") 222 noMatchText := []byte("nothing to match here\x00") 223 224 runs := []struct { 225 name string 226 f func([]byte, string, string) (bool, []byte, error) 227 input []byte 228 expected []byte 229 changes bool 230 }{ 231 {"nul-terminated with regex", replaceInFile, byts, expected, true}, 232 {"text with regex", replaceInFile, text, textExpected, true}, 233 {"nul-terminated with regex - no match", replaceInFile, noMatchByts, noMatchByts, false}, 234 {"text with regex - no match", replaceInFile, noMatchText, noMatchText, false}, 235 } 236 237 for _, run := range runs { 238 t.Run(run.name, func(tt *testing.T) { 239 240 count, res, err := run.f(run.input, oldPath, newPath) 241 require.NoError(tt, err) 242 243 assert.Equal(t, run.expected, res) 244 assert.Equal(t, run.changes, count) 245 }) 246 } 247 }