github.com/ActiveState/cli@v0.0.0-20240508170324-6801f60cd051/internal/fileutils/replace_bench_test.go (about)

     1  package fileutils
     2  
     3  import (
     4  	"bufio"
     5  	"io"
     6  	"math/rand"
     7  	"os"
     8  	"testing"
     9  
    10  	"github.com/stretchr/testify/assert"
    11  	"github.com/stretchr/testify/require"
    12  )
    13  
    14  func setup(oldPath, newPath, filler string, binary bool) []byte {
    15  	l := 1000000
    16  	res := make([]byte, l)
    17  	for i := 0; i < l; i++ {
    18  		if binary {
    19  			res[i] = byte(rand.Intn(256))
    20  		} else {
    21  			res[i] = byte(rand.Intn(255) + 1)
    22  		}
    23  	}
    24  
    25  	return res
    26  }
    27  
    28  var result []byte
    29  
    30  // BenchmarkRead compares how fast it takes to read a file that we want to replace later
    31  // Three methods are compared:
    32  // 1) read everything in bulk
    33  // 2) use bufio.NewReader to read the data
    34  // 3) read only a fixed amount of data
    35  // On my SSD, (3) is about 15% faster than (1), (2) is the slowest by a factor of 8 compared to (3)
    36  // Streaming would therefore improve the performance, but is more complicated to implement and leads to a slower replacement step
    37  func BenchmarkRead(b *testing.B) {
    38  	oldPath := "abc/def/ghi"
    39  	newPath := "def/ghi"
    40  	byts := setup(oldPath, newPath, "/bin/python.sh", true)
    41  
    42  	testFile := TempFileUnsafe("", "")
    43  	_, err := testFile.Write(byts)
    44  	if err != nil {
    45  		b.Errorf("failed to write test file: %v", err)
    46  	}
    47  	err = testFile.Close()
    48  	if err != nil {
    49  		b.Errorf("failed to close test file: %v", err)
    50  	}
    51  	defer os.Remove(testFile.Name())
    52  
    53  	b.ResetTimer()
    54  
    55  	b.Run("read file (bulk)", func(bb *testing.B) {
    56  		for n := 0; n < bb.N; n++ {
    57  			f, err := os.Open(testFile.Name())
    58  			if err != nil {
    59  				bb.Errorf("Failed to open file: %v", err)
    60  			}
    61  			defer func() {
    62  				f.Close()
    63  			}()
    64  			r, err := io.ReadAll(f)
    65  			if err != nil {
    66  				bb.Errorf("Received error reading: %v", err)
    67  			}
    68  			if len(r) != len(byts) {
    69  				bb.Errorf("Expected to read %d bytes, read = %d", len(byts), len(r))
    70  			}
    71  		}
    72  	})
    73  
    74  	b.Run("read file (bufio)", func(bb *testing.B) {
    75  		for n := 0; n < bb.N; n++ {
    76  			f, err := os.Open(testFile.Name())
    77  			if err != nil {
    78  				bb.Errorf("Failed to open file: %v", err)
    79  			}
    80  			br := bufio.NewReaderSize(f, 1024)
    81  			defer func() {
    82  				f.Close()
    83  			}()
    84  			var nr int
    85  			for {
    86  				_, err := br.ReadByte()
    87  				if err == io.EOF {
    88  					break
    89  				}
    90  				nr++
    91  			}
    92  			if err != nil {
    93  				bb.Errorf("Received error reading: %v", err)
    94  			}
    95  			if nr != len(byts) {
    96  				bb.Errorf("Expected to read %d bytes, read = %d", len(byts), nr)
    97  			}
    98  		}
    99  	})
   100  
   101  	b.Run("read file (stream)", func(bb *testing.B) {
   102  		for n := 0; n < bb.N; n++ {
   103  			f, err := os.Open(testFile.Name())
   104  			if err != nil {
   105  				bb.Errorf("Failed to open file: %v", err)
   106  			}
   107  			defer func() {
   108  				f.Close()
   109  			}()
   110  			b := make([]byte, 1024)
   111  			var nr int
   112  			for {
   113  				n, err := f.Read(b)
   114  				if n == 0 && err == io.EOF {
   115  					break
   116  				}
   117  				nr += n
   118  			}
   119  			if err != nil {
   120  				bb.Errorf("Received error reading: %v", err)
   121  			}
   122  			if nr != len(byts) {
   123  				bb.Errorf("Expected to read %d bytes, read = %d", len(byts), nr)
   124  			}
   125  		}
   126  	})
   127  }
   128  
   129  // BenchmarkWrite measures the time it safes to write the replaced file back to disk
   130  // The measurements should be compared to the time it takes to read file contents and to replace the strings.
   131  // On my SSD Drive, writing to disk takes about 1.6 times as long as the replacement step
   132  // and approximately as long as reading the file contents to memory initially.
   133  func BenchmarkWrite(b *testing.B) {
   134  	oldPath := "abc/def/ghi"
   135  	newPath := "def/ghi"
   136  	byts := setup(oldPath, newPath, "/bin/python.sh", true)
   137  	b.ResetTimer()
   138  
   139  	for n := 0; n < b.N; n++ {
   140  		f := TempFileUnsafe("", "")
   141  		defer func() {
   142  			f.Close()
   143  			os.Remove(f.Name())
   144  		}()
   145  		nw, err := f.Write(byts)
   146  		if err != nil {
   147  			b.Errorf("Received error writing: %v", err)
   148  		}
   149  		if nw != len(byts) {
   150  			b.Errorf("Expected to write %d bytes, written = %d", len(byts), nw)
   151  		}
   152  	}
   153  }
   154  
   155  // BenchmarkReplace measures how long it takes to replace text in large files
   156  // Two main methods are compared (one based on regular expressions, the other is more specialized)
   157  // Replacing in binary files (with nul-terminated strings) and text files is considered separately.
   158  // Sample results are:
   159  // BenchmarkReplace/with_regex_(binary)-8              2335            458652 ns/op         2021164 B/op         35 allocs/op
   160  // BenchmarkReplace/without_regex_(binary)-8                  12064             96341 ns/op               4 B/op          1 allocs/op
   161  // BenchmarkReplace/with_regex_(string)-8                      2142            470041 ns/op         2019299 B/op         28 allocs/op
   162  // BenchmarkReplace/without_regex_(string)-8                   4059            257515 ns/op         1007616 B/op          1 allocs/op
   163  func BenchmarkReplace(b *testing.B) {
   164  	oldPath := "abc/def/ghi"
   165  	newPath := "def/ghi"
   166  	binByts := setup(oldPath, newPath, "/bin/python.sh", true)
   167  	stringByts := setup(oldPath, newPath, "/bin/python.sh", false)
   168  	runs := []struct {
   169  		name  string
   170  		f     func([]byte, string, string) (bool, []byte, error)
   171  		input []byte
   172  	}{
   173  		{
   174  			"with regex (binary)",
   175  			replaceInFile,
   176  			binByts,
   177  		},
   178  		{
   179  			"with regex (string)",
   180  			replaceInFile,
   181  			stringByts,
   182  		},
   183  	}
   184  	b.ResetTimer()
   185  
   186  	for _, run := range runs {
   187  		b.Run(run.name, func(bb *testing.B) {
   188  			var r []byte
   189  			for n := 0; n < bb.N; n++ {
   190  				_, res, err := run.f(run.input, oldPath, newPath)
   191  				if err != nil {
   192  					bb.Errorf("Received error: %v", err)
   193  				}
   194  				if len(res) != len(run.input) {
   195  					bb.Errorf("Expected len = %d, got = %d", len(run.input), len(res))
   196  				}
   197  				r = res
   198  			}
   199  			result = r
   200  
   201  		})
   202  	}
   203  }
   204  
   205  func TestReplaceBytesError(t *testing.T) {
   206  	b := []byte("Hello world\x00")
   207  	_, _, err := replaceInFile(b, "short", "longer")
   208  	assert.Error(t, err)
   209  }
   210  
   211  func TestReplaceBytes(t *testing.T) {
   212  	oldPath := "abc/def/ghi"
   213  	newPath := "def/ghi"
   214  
   215  	byts := []byte("123abc/def/ghi/bin/python\x00456abc/def/ghi/bin/perl\x00other")
   216  	expected := []byte("123def/ghi/bin/python\x00\x00\x00\x00\x00456def/ghi/bin/perl\x00\x00\x00\x00\x00other")
   217  
   218  	text := []byte("'123abc/def/ghi/bin/python'456'abc/def/ghi/bin/perl'other")
   219  	textExpected := []byte("'123def/ghi/bin/python'456'def/ghi/bin/perl'other")
   220  
   221  	noMatchByts := []byte("nothing to match here\x00")
   222  	noMatchText := []byte("nothing to match here\x00")
   223  
   224  	runs := []struct {
   225  		name     string
   226  		f        func([]byte, string, string) (bool, []byte, error)
   227  		input    []byte
   228  		expected []byte
   229  		changes  bool
   230  	}{
   231  		{"nul-terminated with regex", replaceInFile, byts, expected, true},
   232  		{"text with regex", replaceInFile, text, textExpected, true},
   233  		{"nul-terminated with regex - no match", replaceInFile, noMatchByts, noMatchByts, false},
   234  		{"text with regex - no match", replaceInFile, noMatchText, noMatchText, false},
   235  	}
   236  
   237  	for _, run := range runs {
   238  		t.Run(run.name, func(tt *testing.T) {
   239  
   240  			count, res, err := run.f(run.input, oldPath, newPath)
   241  			require.NoError(tt, err)
   242  
   243  			assert.Equal(t, run.expected, res)
   244  			assert.Equal(t, run.changes, count)
   245  		})
   246  	}
   247  }