github.com/coyove/nj@v0.0.0-20221110084952-c7f8db1065c3/tests/bench/fasta.txt (about)

     1  WIDTH = 60 -- Fold lines after WIDTH bytes
     2  out = bytes(0)
     3  min = math.min
     4  
     5  --[[type AminoAcid struct {
     6     p float64
     7     c byte
     8  }]]
     9  
    10  function AccumulateProbabilities(genelist)
    11      for i=1,#genelist do
    12          genelist[i][0] += genelist[i-1][0]
    13      end
    14  end
    15  
    16  -- RepeatFasta prints the characters of the byte slice s. When it
    17  -- reaches the end of the slice, it goes back to the beginning.
    18  -- It stops after generating count characters.
    19  -- After each WIDTH characters it prints a newline.
    20  -- It assumes that WIDTH <= len(s) + 1.
    21  function RepeatFasta(s, count)
    22      local pos = 0
    23      local s2 = bytes(#s+WIDTH)
    24      s2.copy(0, #s2, s)
    25      s2.copy(#s, #s2, s)
    26      while count > 0 do
    27         local line = min(WIDTH, count)
    28         out.concat(s2[pos : pos+line])
    29         out.append(10) -- '\n'
    30         pos += line
    31         if pos >= #s then
    32              pos -= #s
    33         end
    34         count -= line
    35      end
    36  end
    37  
    38  IM = 139968
    39  IA = 3877
    40  IC = 29573
    41  
    42  lastrandom = 42
    43  
    44  function generateRandom(buf)
    45      for i, _ in buf do
    46          lastrandom = (lastrandom*IA + IC) % IM
    47          buf[i] = lastrandom / IM
    48      end
    49  end
    50  
    51  -- generateDna generates DNA text from random sequence.
    52  -- Each element of genelist is a struct with a character and
    53  -- a floating point number p between 0 and 1.
    54  -- generateDna takes a random float r and
    55  -- finds the first element such that p >= r.
    56  -- This is a weighted random selection.
    57  function generateDna(genelist, rb, wb)
    58      local count = #rb
    59      local i = 0
    60      local o = 0
    61      while count > 0 do
    62          local line = min(WIDTH, count)
    63          count -= line
    64          for j=0,line do
    65              local r = rb[i]
    66              for i, v in genelist do
    67                  if v[0] >= r then
    68                      wb[o] = v[1]
    69                      break
    70                  end
    71              end
    72              i += 1
    73              o += 1
    74          end
    75          wb[o] = 10 -- '\n'
    76          o += 1
    77      end
    78      return o
    79  end
    80  
    81  RANDOM_BUF_SIZE = WIDTH * 1000
    82  OUT_BUF_SIZE    = (WIDTH + 1) * 1000
    83  
    84  -- 1 for output, 4 for generateDna, 1 for generateRandom and 2 spaces
    85  SLOT = 8
    86  
    87  -- RandomFasta then prints the character of the array element.
    88  -- This sequence is repeated count times.
    89  -- Between each WIDTH consecutive characters, the function prints a newline.
    90  function RandomFasta(genelist, count)
    91      local rbufs = array.make(SLOT)
    92      local wbufs = array.make(SLOT)
    93      for i, _ in rbufs do
    94          rbufs[i] = array.make(RANDOM_BUF_SIZE)
    95          wbufs[i] = bytes(OUT_BUF_SIZE)
    96      end
    97  
    98      -- Use `chan []byte` as future object. och is queue of future.
    99      local och = channel(4)
   100      local done = channel(0)
   101      function()
   102          for bc in self.och do
   103              local buf, _ = bc.recv()
   104              out.concat(buf)
   105          end
   106          self.done.send(true)
   107      end.go()
   108  
   109      local i = 0
   110      while count > 0 do
   111          local chunk = min(count, RANDOM_BUF_SIZE)
   112          count -= chunk
   113          local rb = rbufs[i%SLOT][:chunk]
   114          local wb = wbufs[i%SLOT]
   115          generateRandom(rb)
   116  
   117          local c = channel(1)
   118          och.send(c)
   119          function(rb, wb, c)
   120              local o = generateDna(self.genelist, rb, wb)
   121              c.send(wb[:o])
   122          end.go(rb, wb, c)
   123          i += 1
   124      end
   125      och.close()
   126      done.recv()
   127  end
   128  
   129  local n = 25000000
   130  
   131  local iub = [
   132        [0.27, 'a'[0]],
   133        [0.12, 'c'[0]],
   134        [0.12, 'g'[0]],
   135        [0.27, 't'[0]],
   136        [0.02, 'B'[0]],
   137        [0.02, 'D'[0]],
   138        [0.02, 'H'[0]],
   139        [0.02, 'K'[0]],
   140        [0.02, 'M'[0]],
   141        [0.02, 'N'[0]],
   142        [0.02, 'R'[0]],
   143        [0.02, 'S'[0]],
   144        [0.02, 'V'[0]],
   145        [0.02, 'W'[0]],
   146        [0.02, 'Y'[0]],
   147  ]
   148  
   149  local homosapiens = [
   150        [0.3029549426680, 'a'[0]],
   151        [0.1979883004921, 'c'[0]],
   152        [0.1975473066391, 'g'[0]],
   153        [0.3015094502008, 't'[0]],
   154  ]
   155  
   156     AccumulateProbabilities(iub)
   157     AccumulateProbabilities(homosapiens)
   158  
   159  local alu = bytes(
   160        "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" +
   161           "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" +
   162           "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" +
   163           "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" +
   164           "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" +
   165           "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" +
   166           "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA")
   167  
   168     out.concat(bytes(">ONE Homo sapiens alu\n"))
   169     RepeatFasta(alu, 2*n)
   170     out.concat(bytes(">TWO IUB ambiguity codes\n"))
   171     RandomFasta(iub, 3*n)
   172     out.concat(bytes(">THREE Homo sapiens frequency\n"))
   173     RandomFasta(homosapiens, 5*n)
   174  
   175     -- io.write(os.stdout, out)