github.com/biogo/biogo@v1.0.4/align/matrix/matrices/make.go (about)

     1  //go:build ignore
     2  // +build ignore
     3  
     4  package main
     5  
     6  import (
     7  	"fmt"
     8  	"io"
     9  	"io/ioutil"
    10  	"log"
    11  	"os"
    12  	"strconv"
    13  	"strings"
    14  
    15  	"github.com/biogo/biogo/alphabet"
    16  )
    17  
    18  var matrices = []struct {
    19  	file  string
    20  	alpha alphabet.Alphabet
    21  }{
    22  	{"NUC.4", alphabet.DNA},
    23  	{"NUC.4.4", alphabet.DNAredundant},
    24  
    25  	{"DAYHOFF", alphabet.Protein},
    26  	{"GONNET", alphabet.Protein},
    27  	{"IDENTITY", alphabet.Protein},
    28  	{"MATCH", alphabet.Protein},
    29  
    30  	{"BLOSUM100", alphabet.Protein},
    31  	{"BLOSUM30", alphabet.Protein},
    32  	{"BLOSUM35", alphabet.Protein},
    33  	{"BLOSUM40", alphabet.Protein},
    34  	{"BLOSUM45", alphabet.Protein},
    35  	{"BLOSUM50", alphabet.Protein},
    36  	{"BLOSUM55", alphabet.Protein},
    37  	{"BLOSUM60", alphabet.Protein},
    38  	{"BLOSUM62", alphabet.Protein},
    39  	{"BLOSUM65", alphabet.Protein},
    40  	{"BLOSUM70", alphabet.Protein},
    41  	{"BLOSUM75", alphabet.Protein},
    42  	{"BLOSUM80", alphabet.Protein},
    43  	{"BLOSUM85", alphabet.Protein},
    44  	{"BLOSUM90", alphabet.Protein},
    45  	{"BLOSUMN", alphabet.Protein},
    46  
    47  	{"PAM10", alphabet.Protein},
    48  	{"PAM100", alphabet.Protein},
    49  	{"PAM110", alphabet.Protein},
    50  	{"PAM120", alphabet.Protein},
    51  	{"PAM120.cdi", alphabet.Protein},
    52  	{"PAM130", alphabet.Protein},
    53  	{"PAM140", alphabet.Protein},
    54  	{"PAM150", alphabet.Protein},
    55  	{"PAM160", alphabet.Protein},
    56  	{"PAM160.cdi", alphabet.Protein},
    57  	{"PAM170", alphabet.Protein},
    58  	{"PAM180", alphabet.Protein},
    59  	{"PAM190", alphabet.Protein},
    60  	{"PAM20", alphabet.Protein},
    61  	{"PAM200", alphabet.Protein},
    62  	{"PAM200.cdi", alphabet.Protein},
    63  	{"PAM210", alphabet.Protein},
    64  	{"PAM220", alphabet.Protein},
    65  	{"PAM230", alphabet.Protein},
    66  	{"PAM240", alphabet.Protein},
    67  	{"PAM250", alphabet.Protein},
    68  	{"PAM250.cdi", alphabet.Protein},
    69  	{"PAM260", alphabet.Protein},
    70  	{"PAM270", alphabet.Protein},
    71  	{"PAM280", alphabet.Protein},
    72  	{"PAM290", alphabet.Protein},
    73  	{"PAM30", alphabet.Protein},
    74  	{"PAM300", alphabet.Protein},
    75  	{"PAM310", alphabet.Protein},
    76  	{"PAM320", alphabet.Protein},
    77  	{"PAM330", alphabet.Protein},
    78  	{"PAM340", alphabet.Protein},
    79  	{"PAM350", alphabet.Protein},
    80  	{"PAM360", alphabet.Protein},
    81  	{"PAM370", alphabet.Protein},
    82  	{"PAM380", alphabet.Protein},
    83  	{"PAM390", alphabet.Protein},
    84  	{"PAM40", alphabet.Protein},
    85  	{"PAM400", alphabet.Protein},
    86  	{"PAM40.cdi", alphabet.Protein},
    87  	{"PAM410", alphabet.Protein},
    88  	{"PAM420", alphabet.Protein},
    89  	{"PAM430", alphabet.Protein},
    90  	{"PAM440", alphabet.Protein},
    91  	{"PAM450", alphabet.Protein},
    92  	{"PAM460", alphabet.Protein},
    93  	{"PAM470", alphabet.Protein},
    94  	{"PAM480", alphabet.Protein},
    95  	{"PAM490", alphabet.Protein},
    96  	{"PAM50", alphabet.Protein},
    97  	{"PAM500", alphabet.Protein},
    98  	{"PAM60", alphabet.Protein},
    99  	{"PAM70", alphabet.Protein},
   100  	{"PAM80", alphabet.Protein},
   101  	{"PAM80.cdi", alphabet.Protein},
   102  	{"PAM90", alphabet.Protein},
   103  }
   104  
   105  func main() {
   106  	fmt.Fprintln(os.Stdout, `// DO NOT EDIT. This file was autogenerated by make.go.
   107  
   108  // Copyright ©2013 The bíogo Authors. All rights reserved.
   109  // Use of this source code is governed by a BSD-style
   110  // license that can be found in the LICENSE file.
   111  
   112  // Package matrix provides a variety of alignment scoring matrices for sequence alignment.
   113  package matrix
   114  
   115  // All alignment scoring matrices are organised to allow direct lookup using alphabets
   116  // defined in biogo/alphabet. Gap penalties are set to zero for all matrices and the I/L
   117  // single letter amino acid code, "J", is included but not defined for all the protein
   118  // scoring matrices.
   119  var (`)
   120  	for i, m := range matrices {
   121  		if i != 0 {
   122  			fmt.Fprintln(os.Stdout)
   123  		}
   124  		err := genCode(os.Stdout, m.file, m.alpha)
   125  		if err != nil {
   126  			log.Fatalf("Failed to create matrix source for %s: %v", m, err)
   127  		}
   128  	}
   129  	fmt.Println(")")
   130  }
   131  
   132  func genCode(w io.Writer, m string, a alphabet.Alphabet) error {
   133  	f, err := os.Open(m)
   134  	if err != nil {
   135  		return err
   136  	}
   137  	b, err := ioutil.ReadAll(f)
   138  	if err != nil {
   139  		return err
   140  	}
   141  	s := string(b)
   142  	var (
   143  		ind       = a.LetterIndex()
   144  		ref       []string
   145  		perm      []int
   146  		mat       [][]int
   147  		row       int
   148  		lastBlank bool
   149  	)
   150  	for _, l := range strings.Split(s, "\n") {
   151  		nsl := noSpace(l)
   152  		switch {
   153  		case len(l) == 0:
   154  			if !lastBlank {
   155  				lastBlank = true
   156  				fmt.Fprintln(w, "\t//")
   157  			}
   158  		case l[0] == ' ':
   159  			ref = strings.Fields(nsl)
   160  			perm = make([]int, a.Len())
   161  			for i, l := range ref {
   162  				li := ind[l[0]]
   163  				if li < 0 {
   164  					continue
   165  				}
   166  				perm[li] = i
   167  			}
   168  			mat = make([][]int, a.Len())
   169  			for j := range mat {
   170  				mat[j] = make([]int, a.Len())
   171  			}
   172  			fallthrough
   173  		case l[0] == '#':
   174  			lastBlank = false
   175  			fmt.Fprintf(w, "\t// %s\n", l)
   176  		default:
   177  			lastBlank = false
   178  			fmt.Fprintf(w, "\t// %s\n", l)
   179  			for col, f := range strings.Fields(nsl)[1:] {
   180  				mat[ind[ref[row][0]]][ind[ref[col][0]]], err = strconv.Atoi(f)
   181  				if err != nil {
   182  					return err
   183  				}
   184  			}
   185  			row++
   186  		}
   187  	}
   188  	fmt.Fprintf(w, "\t%s = [][]int{\n\t\t/*       ", strings.Replace(m, ".", "_", -1))
   189  	for j := range mat {
   190  		fmt.Printf("%c ", toUpper(a.Letter(j)))
   191  	}
   192  	fmt.Fprintln(w, "*/")
   193  	for i := range mat {
   194  		fmt.Printf("\t\t/* %c */ {", toUpper(a.Letter(i)))
   195  		for j, e := range mat[i] {
   196  			fmt.Fprint(w, e)
   197  			if j < len(mat[i])-1 {
   198  				fmt.Print(", ")
   199  			}
   200  		}
   201  		fmt.Fprintln(w, "},")
   202  	}
   203  	fmt.Fprintln(w, "\t}")
   204  	return nil
   205  }
   206  
   207  func toUpper(l alphabet.Letter) alphabet.Letter {
   208  	if l >= 'a' {
   209  		return l &^ ' '
   210  	}
   211  	return l
   212  }
   213  
   214  func noSpace(s string) string {
   215  	var b []byte
   216  	for i := 0; i < len(s); i++ {
   217  		if s[i] == ' ' {
   218  			if i > 0 && s[i-1] != ' ' {
   219  				b = append(b, ' ')
   220  			}
   221  			continue
   222  		}
   223  		b = append(b, s[i])
   224  	}
   225  	return string(b)
   226  }