github.com/biogo/biogo@v1.0.4/align/matrix/matrices/make.go (about) 1 //go:build ignore 2 // +build ignore 3 4 package main 5 6 import ( 7 "fmt" 8 "io" 9 "io/ioutil" 10 "log" 11 "os" 12 "strconv" 13 "strings" 14 15 "github.com/biogo/biogo/alphabet" 16 ) 17 18 var matrices = []struct { 19 file string 20 alpha alphabet.Alphabet 21 }{ 22 {"NUC.4", alphabet.DNA}, 23 {"NUC.4.4", alphabet.DNAredundant}, 24 25 {"DAYHOFF", alphabet.Protein}, 26 {"GONNET", alphabet.Protein}, 27 {"IDENTITY", alphabet.Protein}, 28 {"MATCH", alphabet.Protein}, 29 30 {"BLOSUM100", alphabet.Protein}, 31 {"BLOSUM30", alphabet.Protein}, 32 {"BLOSUM35", alphabet.Protein}, 33 {"BLOSUM40", alphabet.Protein}, 34 {"BLOSUM45", alphabet.Protein}, 35 {"BLOSUM50", alphabet.Protein}, 36 {"BLOSUM55", alphabet.Protein}, 37 {"BLOSUM60", alphabet.Protein}, 38 {"BLOSUM62", alphabet.Protein}, 39 {"BLOSUM65", alphabet.Protein}, 40 {"BLOSUM70", alphabet.Protein}, 41 {"BLOSUM75", alphabet.Protein}, 42 {"BLOSUM80", alphabet.Protein}, 43 {"BLOSUM85", alphabet.Protein}, 44 {"BLOSUM90", alphabet.Protein}, 45 {"BLOSUMN", alphabet.Protein}, 46 47 {"PAM10", alphabet.Protein}, 48 {"PAM100", alphabet.Protein}, 49 {"PAM110", alphabet.Protein}, 50 {"PAM120", alphabet.Protein}, 51 {"PAM120.cdi", alphabet.Protein}, 52 {"PAM130", alphabet.Protein}, 53 {"PAM140", alphabet.Protein}, 54 {"PAM150", alphabet.Protein}, 55 {"PAM160", alphabet.Protein}, 56 {"PAM160.cdi", alphabet.Protein}, 57 {"PAM170", alphabet.Protein}, 58 {"PAM180", alphabet.Protein}, 59 {"PAM190", alphabet.Protein}, 60 {"PAM20", alphabet.Protein}, 61 {"PAM200", alphabet.Protein}, 62 {"PAM200.cdi", alphabet.Protein}, 63 {"PAM210", alphabet.Protein}, 64 {"PAM220", alphabet.Protein}, 65 {"PAM230", alphabet.Protein}, 66 {"PAM240", alphabet.Protein}, 67 {"PAM250", alphabet.Protein}, 68 {"PAM250.cdi", alphabet.Protein}, 69 {"PAM260", alphabet.Protein}, 70 {"PAM270", alphabet.Protein}, 71 {"PAM280", alphabet.Protein}, 72 {"PAM290", alphabet.Protein}, 73 {"PAM30", alphabet.Protein}, 74 {"PAM300", alphabet.Protein}, 75 {"PAM310", alphabet.Protein}, 76 {"PAM320", alphabet.Protein}, 77 {"PAM330", alphabet.Protein}, 78 {"PAM340", alphabet.Protein}, 79 {"PAM350", alphabet.Protein}, 80 {"PAM360", alphabet.Protein}, 81 {"PAM370", alphabet.Protein}, 82 {"PAM380", alphabet.Protein}, 83 {"PAM390", alphabet.Protein}, 84 {"PAM40", alphabet.Protein}, 85 {"PAM400", alphabet.Protein}, 86 {"PAM40.cdi", alphabet.Protein}, 87 {"PAM410", alphabet.Protein}, 88 {"PAM420", alphabet.Protein}, 89 {"PAM430", alphabet.Protein}, 90 {"PAM440", alphabet.Protein}, 91 {"PAM450", alphabet.Protein}, 92 {"PAM460", alphabet.Protein}, 93 {"PAM470", alphabet.Protein}, 94 {"PAM480", alphabet.Protein}, 95 {"PAM490", alphabet.Protein}, 96 {"PAM50", alphabet.Protein}, 97 {"PAM500", alphabet.Protein}, 98 {"PAM60", alphabet.Protein}, 99 {"PAM70", alphabet.Protein}, 100 {"PAM80", alphabet.Protein}, 101 {"PAM80.cdi", alphabet.Protein}, 102 {"PAM90", alphabet.Protein}, 103 } 104 105 func main() { 106 fmt.Fprintln(os.Stdout, `// DO NOT EDIT. This file was autogenerated by make.go. 107 108 // Copyright ©2013 The bíogo Authors. All rights reserved. 109 // Use of this source code is governed by a BSD-style 110 // license that can be found in the LICENSE file. 111 112 // Package matrix provides a variety of alignment scoring matrices for sequence alignment. 113 package matrix 114 115 // All alignment scoring matrices are organised to allow direct lookup using alphabets 116 // defined in biogo/alphabet. Gap penalties are set to zero for all matrices and the I/L 117 // single letter amino acid code, "J", is included but not defined for all the protein 118 // scoring matrices. 119 var (`) 120 for i, m := range matrices { 121 if i != 0 { 122 fmt.Fprintln(os.Stdout) 123 } 124 err := genCode(os.Stdout, m.file, m.alpha) 125 if err != nil { 126 log.Fatalf("Failed to create matrix source for %s: %v", m, err) 127 } 128 } 129 fmt.Println(")") 130 } 131 132 func genCode(w io.Writer, m string, a alphabet.Alphabet) error { 133 f, err := os.Open(m) 134 if err != nil { 135 return err 136 } 137 b, err := ioutil.ReadAll(f) 138 if err != nil { 139 return err 140 } 141 s := string(b) 142 var ( 143 ind = a.LetterIndex() 144 ref []string 145 perm []int 146 mat [][]int 147 row int 148 lastBlank bool 149 ) 150 for _, l := range strings.Split(s, "\n") { 151 nsl := noSpace(l) 152 switch { 153 case len(l) == 0: 154 if !lastBlank { 155 lastBlank = true 156 fmt.Fprintln(w, "\t//") 157 } 158 case l[0] == ' ': 159 ref = strings.Fields(nsl) 160 perm = make([]int, a.Len()) 161 for i, l := range ref { 162 li := ind[l[0]] 163 if li < 0 { 164 continue 165 } 166 perm[li] = i 167 } 168 mat = make([][]int, a.Len()) 169 for j := range mat { 170 mat[j] = make([]int, a.Len()) 171 } 172 fallthrough 173 case l[0] == '#': 174 lastBlank = false 175 fmt.Fprintf(w, "\t// %s\n", l) 176 default: 177 lastBlank = false 178 fmt.Fprintf(w, "\t// %s\n", l) 179 for col, f := range strings.Fields(nsl)[1:] { 180 mat[ind[ref[row][0]]][ind[ref[col][0]]], err = strconv.Atoi(f) 181 if err != nil { 182 return err 183 } 184 } 185 row++ 186 } 187 } 188 fmt.Fprintf(w, "\t%s = [][]int{\n\t\t/* ", strings.Replace(m, ".", "_", -1)) 189 for j := range mat { 190 fmt.Printf("%c ", toUpper(a.Letter(j))) 191 } 192 fmt.Fprintln(w, "*/") 193 for i := range mat { 194 fmt.Printf("\t\t/* %c */ {", toUpper(a.Letter(i))) 195 for j, e := range mat[i] { 196 fmt.Fprint(w, e) 197 if j < len(mat[i])-1 { 198 fmt.Print(", ") 199 } 200 } 201 fmt.Fprintln(w, "},") 202 } 203 fmt.Fprintln(w, "\t}") 204 return nil 205 } 206 207 func toUpper(l alphabet.Letter) alphabet.Letter { 208 if l >= 'a' { 209 return l &^ ' ' 210 } 211 return l 212 } 213 214 func noSpace(s string) string { 215 var b []byte 216 for i := 0; i < len(s); i++ { 217 if s[i] == ' ' { 218 if i > 0 && s[i-1] != ' ' { 219 b = append(b, ' ') 220 } 221 continue 222 } 223 b = append(b, s[i]) 224 } 225 return string(b) 226 }