github.com/coyove/nj@v0.0.0-20221110084952-c7f8db1065c3/tests/bench/fasta.txt (about) 1 WIDTH = 60 -- Fold lines after WIDTH bytes 2 out = bytes(0) 3 min = math.min 4 5 --[[type AminoAcid struct { 6 p float64 7 c byte 8 }]] 9 10 function AccumulateProbabilities(genelist) 11 for i=1,#genelist do 12 genelist[i][0] += genelist[i-1][0] 13 end 14 end 15 16 -- RepeatFasta prints the characters of the byte slice s. When it 17 -- reaches the end of the slice, it goes back to the beginning. 18 -- It stops after generating count characters. 19 -- After each WIDTH characters it prints a newline. 20 -- It assumes that WIDTH <= len(s) + 1. 21 function RepeatFasta(s, count) 22 local pos = 0 23 local s2 = bytes(#s+WIDTH) 24 s2.copy(0, #s2, s) 25 s2.copy(#s, #s2, s) 26 while count > 0 do 27 local line = min(WIDTH, count) 28 out.concat(s2[pos : pos+line]) 29 out.append(10) -- '\n' 30 pos += line 31 if pos >= #s then 32 pos -= #s 33 end 34 count -= line 35 end 36 end 37 38 IM = 139968 39 IA = 3877 40 IC = 29573 41 42 lastrandom = 42 43 44 function generateRandom(buf) 45 for i, _ in buf do 46 lastrandom = (lastrandom*IA + IC) % IM 47 buf[i] = lastrandom / IM 48 end 49 end 50 51 -- generateDna generates DNA text from random sequence. 52 -- Each element of genelist is a struct with a character and 53 -- a floating point number p between 0 and 1. 54 -- generateDna takes a random float r and 55 -- finds the first element such that p >= r. 56 -- This is a weighted random selection. 57 function generateDna(genelist, rb, wb) 58 local count = #rb 59 local i = 0 60 local o = 0 61 while count > 0 do 62 local line = min(WIDTH, count) 63 count -= line 64 for j=0,line do 65 local r = rb[i] 66 for i, v in genelist do 67 if v[0] >= r then 68 wb[o] = v[1] 69 break 70 end 71 end 72 i += 1 73 o += 1 74 end 75 wb[o] = 10 -- '\n' 76 o += 1 77 end 78 return o 79 end 80 81 RANDOM_BUF_SIZE = WIDTH * 1000 82 OUT_BUF_SIZE = (WIDTH + 1) * 1000 83 84 -- 1 for output, 4 for generateDna, 1 for generateRandom and 2 spaces 85 SLOT = 8 86 87 -- RandomFasta then prints the character of the array element. 88 -- This sequence is repeated count times. 89 -- Between each WIDTH consecutive characters, the function prints a newline. 90 function RandomFasta(genelist, count) 91 local rbufs = array.make(SLOT) 92 local wbufs = array.make(SLOT) 93 for i, _ in rbufs do 94 rbufs[i] = array.make(RANDOM_BUF_SIZE) 95 wbufs[i] = bytes(OUT_BUF_SIZE) 96 end 97 98 -- Use `chan []byte` as future object. och is queue of future. 99 local och = channel(4) 100 local done = channel(0) 101 function() 102 for bc in self.och do 103 local buf, _ = bc.recv() 104 out.concat(buf) 105 end 106 self.done.send(true) 107 end.go() 108 109 local i = 0 110 while count > 0 do 111 local chunk = min(count, RANDOM_BUF_SIZE) 112 count -= chunk 113 local rb = rbufs[i%SLOT][:chunk] 114 local wb = wbufs[i%SLOT] 115 generateRandom(rb) 116 117 local c = channel(1) 118 och.send(c) 119 function(rb, wb, c) 120 local o = generateDna(self.genelist, rb, wb) 121 c.send(wb[:o]) 122 end.go(rb, wb, c) 123 i += 1 124 end 125 och.close() 126 done.recv() 127 end 128 129 local n = 25000000 130 131 local iub = [ 132 [0.27, 'a'[0]], 133 [0.12, 'c'[0]], 134 [0.12, 'g'[0]], 135 [0.27, 't'[0]], 136 [0.02, 'B'[0]], 137 [0.02, 'D'[0]], 138 [0.02, 'H'[0]], 139 [0.02, 'K'[0]], 140 [0.02, 'M'[0]], 141 [0.02, 'N'[0]], 142 [0.02, 'R'[0]], 143 [0.02, 'S'[0]], 144 [0.02, 'V'[0]], 145 [0.02, 'W'[0]], 146 [0.02, 'Y'[0]], 147 ] 148 149 local homosapiens = [ 150 [0.3029549426680, 'a'[0]], 151 [0.1979883004921, 'c'[0]], 152 [0.1975473066391, 'g'[0]], 153 [0.3015094502008, 't'[0]], 154 ] 155 156 AccumulateProbabilities(iub) 157 AccumulateProbabilities(homosapiens) 158 159 local alu = bytes( 160 "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" + 161 "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" + 162 "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" + 163 "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" + 164 "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" + 165 "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" + 166 "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA") 167 168 out.concat(bytes(">ONE Homo sapiens alu\n")) 169 RepeatFasta(alu, 2*n) 170 out.concat(bytes(">TWO IUB ambiguity codes\n")) 171 RandomFasta(iub, 3*n) 172 out.concat(bytes(">THREE Homo sapiens frequency\n")) 173 RandomFasta(homosapiens, 5*n) 174 175 -- io.write(os.stdout, out)