github.com/rmera/gochem@v0.7.1/files.go (about) 1 /* 2 * files.go, part of gochem. 3 * 4 * 5 * Copyright 2012 Raul Mera rauldotmeraatusachdotcl 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU Lesser General Public License as 9 * published by the Free Software Foundation; either version 2.1 of the 10 * License, or (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General 18 * Public License along with this program. If not, see 19 * <http://www.gnu.org/licenses/>. 20 * 21 * goChem is developed at Universidad de Santiago de Chile (USACH) 22 * 23 * 24 */ 25 26 package chem 27 28 import ( 29 "bufio" 30 "fmt" 31 "io" 32 "log" 33 "os" 34 "strconv" 35 "strings" 36 37 v3 "github.com/rmera/gochem/v3" 38 ) 39 40 //PDB_read family 41 42 //A map between 3-letters name for aminoacidic residues to the corresponding 1-letter names. 43 var three2OneLetter = map[string]byte{ 44 "SER": 'S', 45 "THR": 'T', 46 "ASN": 'N', 47 "GLN": 'Q', 48 "SEC": 'U', //Selenocysteine! 49 "CYS": 'C', 50 "GLY": 'G', 51 "PRO": 'P', 52 "ALA": 'A', 53 "VAL": 'V', 54 "ILE": 'I', 55 "LEU": 'L', 56 "MET": 'M', 57 "PHE": 'F', 58 "TYR": 'Y', 59 "TRP": 'W', 60 "ARG": 'R', 61 "HIS": 'H', 62 "LYS": 'K', 63 "ASP": 'D', 64 "GLU": 'E', 65 } 66 67 //This tries to guess a chemical element symbol from a PDB atom name. Mostly based on AMBER names. 68 //It only deals with some common bio-elements. 69 func symbolFromName(name string) (string, error) { 70 symbol := "" 71 if len(name) == 1 { 72 symbol = name //should work 73 } else if len(name) == 4 || name[0] == 'H' { //I thiiink only Hs can have 4-char names in amber. 74 symbol = "H" 75 //it name has more than one character but less than four. 76 } else if name[0] == 'C' { 77 if name[0:2] == "CU" { 78 symbol = "Cu" 79 } else if name == "CO" { 80 symbol = "Co" 81 } else if name == "CL" { 82 symbol = "Cl" 83 } else { 84 symbol = "C" 85 } 86 } else if name[0] == 'B' { 87 if name == "BE" { 88 symbol = "Be" 89 } 90 } else if name[0] == 'N' { 91 if name == "NA" { 92 symbol = "Na" 93 } else { 94 symbol = "N" 95 } 96 } else if name[0] == 'O' { 97 symbol = "O" 98 } else if name[0] == 'P' { 99 symbol = "P" 100 } else if name[0] == 'S' { 101 if name == "SE" { 102 symbol = "Se" 103 } else { 104 symbol = "S" 105 } 106 } else if name[0:2] == "ZN" { 107 symbol = "Zn" 108 } 109 if symbol == "" { 110 return name, CError{fmt.Sprintf("Couldn't guess symbol from PDB name. Will set the 'symbol' field to the name %s", name), []string{"symbolFromName"}} 111 } 112 return symbol, nil 113 } 114 115 // read_full_pdb_line parses a valid ATOM or HETATM line of a PDB file, returns an Atom 116 // object with the info except for the coordinates and b-factors, which are returned 117 // separately as an array of 3 float64 and a float64, respectively 118 func read_full_pdb_line(line string, read_additional bool, contlines int) (*Atom, []float64, float64, error) { 119 err := make([]error, 7, 7) //accumulate errors to check at the end of the readed line. 120 coords := make([]float64, 3, 3) 121 atom := new(Atom) 122 atom.Het = strings.HasPrefix(line, "HETATM") //this is called twice in the worst case. should fix 123 atom.ID, err[0] = strconv.Atoi(strings.TrimSpace(line[6:12])) 124 atom.Name = strings.TrimSpace(line[12:16]) 125 atom.Char16 = line[16] 126 //PDB says that pos. 17 is for other thing but I see that is 127 //used for residue name in many cases*/ 128 atom.MolName = line[17:20] 129 atom.MolName1 = three2OneLetter[atom.MolName] 130 atom.Chain = string(line[21]) 131 atom.MolID, err[1] = strconv.Atoi(strings.TrimSpace(line[22:30])) 132 //Here we shouldn't need TrimSpace, but I keep it just in case someone 133 // doesn's use all the fields when writting a PDB*/ 134 coords[0], err[2] = strconv.ParseFloat(strings.TrimSpace(line[30:38]), 64) 135 coords[1], err[3] = strconv.ParseFloat(strings.TrimSpace(line[38:46]), 64) 136 coords[2], err[4] = strconv.ParseFloat(strings.TrimSpace(line[46:54]), 64) 137 var bfactor float64 138 //Every correct PDB should include occupancy and b-factor, but _of course_ writing 139 //correct PDBs is too hard for some programs (and by "some programs" I mean OPLS LigParGen. Get it together, guys). 140 //so I add this conditional to allow goChem to still read these wrong PDB files. 141 if len(line) >= 60 { 142 atom.Occupancy, err[5] = strconv.ParseFloat(strings.TrimSpace(line[54:60]), 64) 143 bfactor, err[6] = strconv.ParseFloat(strings.TrimSpace(line[60:66]), 64) 144 } 145 //we try to read the additional only if indicated and if it is there 146 // In this part we don't catch errors. If something is missing we 147 // just ommit it 148 if read_additional && len(line) >= 79 { 149 atom.Symbol = strings.TrimSpace(line[76:78]) 150 atom.Symbol = strings.Title(strings.ToLower(atom.Symbol)) 151 if len(line) >= 80 { 152 var errcharge error 153 atom.Charge, errcharge = strconv.ParseFloat(strings.TrimSpace(line[78:78]), 64) 154 if errcharge == nil { 155 156 if strings.Contains(line[79:79], "-") { 157 atom.Charge = -1.0 * atom.Charge 158 } 159 } else { 160 //we dont' report an error here, just set the charge to 0 (the default) 161 atom.Charge = 0.0 162 } 163 } 164 } 165 166 //This part tries to guess the symbol from the atom name, if it has not been read 167 //No error checking here, just fills symbol with the empty string the function returns 168 var symbolerr error 169 if len(atom.Symbol) == 0 { 170 atom.Symbol, symbolerr = symbolFromName(atom.Name) 171 } 172 173 for i := range err { 174 if err[i] != nil { 175 //Here I should add the line number to the returned error. 176 return nil, nil, 0, CError{err[i].Error(), []string{"strconv.Atoi/ParseFloat", "read_full_pdb_line"}} 177 } 178 } 179 if atom.Symbol != "" { 180 atom.Mass = symbolMass[atom.Symbol] //Not error checking 181 } 182 //if we couldn't read the symbol, we'll still return the atom and coords 183 //but with an error 184 return atom, coords, bfactor, symbolerr 185 } 186 187 //read_onlycoords_pdb_line parses an ATOM/HETATM PDB line returning only the coordinates and b-factors 188 func read_onlycoords_pdb_line(line string, contlines int) ([]float64, float64, error) { 189 coords := make([]float64, 3, 3) 190 err := make([]error, 4, 4) 191 var bfactor float64 192 coords[0], err[0] = strconv.ParseFloat(strings.TrimSpace(line[30:38]), 64) 193 coords[1], err[1] = strconv.ParseFloat(strings.TrimSpace(line[38:46]), 64) 194 coords[2], err[2] = strconv.ParseFloat(strings.TrimSpace(line[46:54]), 64) 195 bfactor, err[3] = strconv.ParseFloat(strings.TrimSpace(line[60:66]), 64) 196 for i := range err { 197 if err[i] != nil { 198 //Here I should add the line number to the returned error. 199 return nil, 0, CError{err[i].Error(), []string{"strconv.ParseFloat", "read_onlycoords_pdb_line"}} 200 201 } 202 } 203 return coords, bfactor, nil 204 } 205 206 //PDBRRead reads a pdb file from an io.Reader. Returns a Molecule. If there is one frame in the PDB 207 // the coordinates array will be of lenght 1. It also returns an error which is not 208 // really well set up right now. 209 //read_additional is now "deprecated", it will be set to true regardless. I have made it into 210 func PDBRead(pdb io.Reader, read_additional ...bool) (*Molecule, error) { 211 bufiopdb := bufio.NewReader(pdb) 212 mol, err := pdbBufIORead(bufiopdb, read_additional...) 213 return mol, errDecorate(err, "PDBReaderREad") 214 } 215 216 //PDBFileRead reads a pdb file from an io.Reader. Returns a Molecule. If there is one frame in the PDB 217 // the coordinates array will be of lenght 1. It also returns an error which is not 218 // really well set up right now. read_additional is now deprecated. The reader will just read 219 func PDBFileRead(pdbname string, read_additional ...bool) (*Molecule, error) { 220 pdbfile, err := os.Open(pdbname) 221 if err != nil { 222 //fmt.Println("Unable to open file!!") 223 return nil, err 224 } 225 defer pdbfile.Close() 226 pdb := bufio.NewReader(pdbfile) 227 mol, err := pdbBufIORead(pdb, read_additional...) 228 return mol, err 229 } 230 231 //pdbBufIORead reads the atomic entries for a PDB bufio.IO, reads a pdb file from an io.Reader. 232 //Returns a Molecule. If there is one frame in the PDB the coordinates array will be of lenght 1. 233 //It also returns an error which is not really well set up right now. 234 //The read_additional_opt allows not reading the last fields of a PDB, if you know they are wrong. 235 //if true (the default), the fields are read if they are available. Otherwise we attempt to figure 236 //out the symbol from the atom name, which doesn't always work. 237 func pdbBufIORead(pdb *bufio.Reader, read_additional_opt ...bool) (*Molecule, error) { 238 read_additional := true 239 if len(read_additional_opt) > 0 { 240 read_additional = read_additional_opt[0] 241 } 242 var symbolerrorlogged bool 243 molecule := make([]*Atom, 0) 244 modelnumber := 0 //This is the number of frames read 245 coords := make([][]float64, 1, 1) 246 coords[0] = make([]float64, 0) 247 bfactors := make([][]float64, 1, 1) 248 bfactors[0] = make([]float64, 0) 249 first_model := true //are we reading the first model? if not we only save coordinates 250 contlines := 1 //count the lines read to better report errors 251 for { 252 line, err := pdb.ReadString('\n') 253 if err != nil { 254 //fmt.Println("PDB reading complete") /***change this to stderr************/ 255 break 256 // contlines++ //count all the lines even if empty. This is unreachable but I'm not sure at this point if it's better this way! goChem does read PDBs correctly as far as I can see. 257 } 258 if len(line) < 4 { 259 continue 260 } 261 //here we start actually reading 262 /*There might be a bug for not copying the string (symbol, name, etc) but just assigning the slice 263 * which is a reference. Check!*/ 264 var c = make([]float64, 3, 3) 265 var bfactemp float64 //temporary bfactor 266 var atomtmp *Atom 267 // var foo string // not really needed 268 if strings.HasPrefix(line, "ATOM") || strings.HasPrefix(line, "HETATM") { 269 if !first_model { 270 c, bfactemp, err = read_onlycoords_pdb_line(line, contlines) 271 if err != nil { 272 return nil, errDecorate(err, "pdbBufIORead") 273 } 274 } else { 275 atomtmp = new(Atom) 276 atomtmp, c, bfactemp, err = read_full_pdb_line(line, read_additional, contlines) 277 if err != nil { 278 //It would be better to pass this along, but that would create a big mess 279 //I think, at least for now, I have to just keep it here. 280 if strings.Contains(err.Error(), "Couldn't guess symbol from PDB name") { 281 if !symbolerrorlogged { //to avoid logging this many times 282 log.Printf("Error: %s. If the structure contains coarse-grained beads, this error is meaningless. Will continue", err.Error()) 283 symbolerrorlogged = true 284 } 285 err = nil 286 } else { 287 288 return nil, errDecorate(err, "pdbBufIORead") 289 } 290 } 291 //atom data other than coords is the same in all models so just read for the first. 292 molecule = append(molecule, atomtmp) 293 } 294 //coords are appended for all the models 295 //we add the coords to the latest frame of coordinaates 296 coords[len(coords)-1] = append(coords[len(coords)-1], c[0], c[1], c[2]) 297 bfactors[len(bfactors)-1] = append(bfactors[len(bfactors)-1], bfactemp) 298 } else if strings.HasPrefix(line, "MODEL") { 299 modelnumber++ //,_=strconv.Atoi(strings.TrimSpace(line[6:])) 300 if modelnumber > 1 { //will be one for the first model, 2 for the second. 301 first_model = false 302 coords = append(coords, make([]float64, 0)) //new bunch of coords for a new frame 303 bfactors = append(bfactors, make([]float64, 0)) 304 } 305 } 306 } 307 //This could be done faster if done in the same loop where the coords are read 308 //Instead of having another loop just for them. 309 top := NewTopology(0, 1, molecule) 310 var err error 311 frames := len(coords) 312 mcoords := make([]*v3.Matrix, frames, frames) //Final thing to return 313 for i := 0; i < frames; i++ { 314 mcoords[i], err = v3.NewMatrix(coords[i]) 315 if err != nil { 316 return nil, errDecorate(err, "pdbBufIORead") 317 } 318 } 319 //if something happened during the process 320 if err != nil { 321 return nil, errDecorate(err, "pdbBufIORead") 322 } 323 returned, err := NewMolecule(mcoords, top, bfactors) 324 return returned, errDecorate(err, "pdbBufIORead") 325 } 326 327 //End PDB_read family 328 329 //correctBfactors check that coords and bfactors have the same number of elements. 330 func correctBfactors(coords []*v3.Matrix, bfactors [][]float64) bool { 331 if len(coords) != len(bfactors) || bfactors == nil { 332 return false 333 } 334 for key, coord := range coords { 335 cr, _ := coord.Dims() 336 br := len(bfactors[key]) 337 if cr != br { 338 return false 339 } 340 } 341 return true 342 } 343 344 //writePDBLine writes a line in PDB format from the data passed as a parameters. It takes the chain of the previous atom 345 //and returns the written line, the chain of the just-written atom, and error or nil. 346 func writePDBLine(atom *Atom, coord *v3.Matrix, bfact float64, chainprev string) (string, string, error) { 347 var ter string 348 var out string 349 if atom.Chain != chainprev { 350 ter = fmt.Sprint(out, "TER\n") 351 chainprev = atom.Chain 352 } 353 first := "ATOM" 354 if atom.Het { 355 first = "HETATM" 356 } 357 formatstring := "%-6s%5d %-3s %-4s%1s%4d %8.3f%8.3f%8.3f%6.2f%6.2f %2s \n" 358 //4 chars for the atom name are used when hydrogens are included. 359 //This has not been tested 360 if len(atom.Name) == 4 { 361 formatstring = strings.Replace(formatstring, "%-3s ", "%-4s", 1) 362 } else if len(atom.Name) > 4 { 363 return "", chainprev, CError{"Cant print PDB line", []string{"writePDBLine"}} 364 } 365 //"%-6s%5d %-3s %3s %1c%4d %8.3f%8.3f%8.3f%6.2f%6.2f %2s \n" 366 out = fmt.Sprintf(formatstring, first, atom.ID, atom.Name, atom.MolName, atom.Chain, 367 atom.MolID, coord.At(0, 0), coord.At(0, 1), coord.At(0, 2), atom.Occupancy, bfact, atom.Symbol) 368 out = strings.Join([]string{ter, out}, "") 369 return out, chainprev, nil 370 } 371 372 //PDBFileWrite writes a PDB for the molecule mol and the coordinates Coords to a file name pdbname. 373 func PDBFileWrite(pdbname string, coords *v3.Matrix, mol Atomer, Bfactors []float64) error { 374 out, err := os.Create(pdbname) 375 if err != nil { 376 return CError{err.Error(), []string{"os.Create", "PDBFileWrite"}} 377 } 378 defer out.Close() 379 fmt.Fprintf(out, "REMARK WRITTEN WITH GOCHEM :-) \n") 380 err = PDBWrite(out, coords, mol, Bfactors) 381 if err != nil { 382 return errDecorate(err, "PDBFileWrite") 383 } 384 return nil 385 } 386 387 //PDBWrite writes a PDB formatted sequence of bytes to an io.Writer for a given reference, coordinate set and bfactor set, which must match each other. Returns error or nil. 388 func PDBWrite(out io.Writer, coords *v3.Matrix, mol Atomer, bfact []float64) error { 389 err := pdbWrite(out, coords, mol, bfact) 390 if err != nil { 391 errDecorate(err, "PDBWrite") 392 } 393 _, err = out.Write([]byte{'\n'}) //This function is just a wrapper to add the newline to what pdbWrite does. 394 if err != nil { 395 return CError{"Failed to write in io.Writer", []string{"io.Write.Write", "PDBWrite"}} 396 397 } 398 return nil 399 } 400 401 func pdbWrite(out io.Writer, coords *v3.Matrix, mol Atomer, bfact []float64) error { 402 if bfact == nil { 403 bfact = make([]float64, mol.Len()) 404 } 405 cr, _ := coords.Dims() 406 br := len(bfact) 407 if cr != mol.Len() || cr != br { 408 return CError{"Ref and Coords and/or Bfactors dont have the same number of atoms", []string{"pdbWrite"}} 409 } 410 chainprev := mol.Atom(0).Chain //this is to know when the chain changes. 411 var outline string 412 var err error 413 iowriteError := func(err error) error { 414 return CError{"Failed to write in io.Writer" + err.Error(), []string{"io.Write.Write", "pdbWrite"}} 415 } 416 for i := 0; i < mol.Len(); i++ { 417 // r,c:=coords.Dims() 418 // fmt.Println("IIIIIIIIIIIi", i,coords,r,c, "lllllll") 419 writecoord := coords.VecView(i) 420 outline, chainprev, err = writePDBLine(mol.Atom(i), writecoord, bfact[i], chainprev) 421 if err != nil { 422 return errDecorate(err, "pdbWrite "+fmt.Sprintf("Could not print PDB line: %d", i)) 423 } 424 _, err := out.Write([]byte(outline)) 425 if err != nil { 426 return iowriteError(err) 427 } 428 } 429 _, err = out.Write([]byte("TER\n")) // New Addition, should help to recognize the end of the chain. 430 _, err = out.Write([]byte("END")) //no newline, this is in case the write is part of a PDB and one needs to write "ENDMDEL". 431 if err != nil { 432 return iowriteError(err) 433 } 434 return nil 435 } 436 437 //PDBStringWrite writes a string in PDB format for a given reference, coordinate set and bfactor set, which must match each other 438 //returns the written string and error or nil. 439 func PDBStringWrite(coords *v3.Matrix, mol Atomer, bfact []float64) (string, error) { 440 if bfact == nil { 441 bfact = make([]float64, mol.Len()) 442 } 443 cr, _ := coords.Dims() 444 br := len(bfact) 445 if cr != mol.Len() || cr != br { 446 return "", CError{"Ref and Coords and/or Bfactors dont have the same number of atoms", []string{"PDBStringWrite"}} 447 } 448 chainprev := mol.Atom(0).Chain //this is to know when the chain changes. 449 var outline string 450 var outstring string 451 var err error 452 for i := 0; i < mol.Len(); i++ { 453 // r,c:=coords.Dims() 454 // fmt.Println("IIIIIIIIIIIi", i,coords,r,c, "lllllll") 455 writecoord := coords.VecView(i) 456 outline, chainprev, err = writePDBLine(mol.Atom(i), writecoord, bfact[i], chainprev) 457 if err != nil { 458 return "", errDecorate(err, "PDBStringWrite "+fmt.Sprintf("Could not print PDB line: %d", i)) 459 } 460 outstring = strings.Join([]string{outstring, outline}, "") 461 } 462 outstring = strings.Join([]string{outstring, "END\n"}, "") 463 return outstring, nil 464 } 465 466 //MultiPDBWrite writes a multiPDB for the molecule mol and the various coordinate sets in CandB, to the io.Writer given. 467 //CandB is a list of lists of *matrix.DenseMatrix. If it has 2 elements or more, the second will be used as 468 //Bfactors. If it has one element, all b-factors will be zero. 469 //Returns an error if fails, or nil if succeeds. 470 func MultiPDBWrite(out io.Writer, Coords []*v3.Matrix, mol Atomer, Bfactors [][]float64) error { 471 if !correctBfactors(Coords, Bfactors) { 472 Bfactors = make([][]float64, len(Coords), len(Coords)) 473 } 474 iowriterError := func(err error) error { 475 return CError{"Failed to write in io.Writer" + err.Error(), []string{"io.Writer.Write", "MultiPDBWrite"}} 476 } 477 478 _, err := out.Write([]byte("REMARK WRITTEN WITH GOCHEM :-)")) //The model number starts with one 479 if err != nil { 480 return iowriterError(err) 481 } 482 //OK now the real business. 483 for j := range Coords { 484 _, err := out.Write([]byte(fmt.Sprintf("MODEL %d\n", j+1))) //The model number starts with one 485 if err != nil { 486 return iowriterError(err) 487 } 488 err = pdbWrite(out, Coords[j], mol, Bfactors[j]) 489 if err != nil { 490 return errDecorate(err, "MultiPDBWrite") 491 } 492 _, err = out.Write([]byte("MDL\n")) 493 if err != nil { 494 return iowriterError(err) 495 } 496 497 } 498 499 _, err = out.Write([]byte("END\n")) 500 if err != nil { 501 return iowriterError(err) 502 } 503 504 return nil 505 } 506 507 /***End of PDB part***/ 508 509 //XYZFileRead Reads an xyz or multixyz file (as produced by Turbomole). Returns a Molecule and error or nil. 510 func XYZFileRead(xyzname string) (*Molecule, error) { 511 xyzfile, err := os.Open(xyzname) 512 if err != nil { 513 //fmt.Println("Unable to open file!!") 514 return nil, CError{err.Error(), []string{"os.Open", "XYZFileRead"}} 515 } 516 defer xyzfile.Close() 517 mol, err := XYZRead(xyzfile) 518 if err != nil { 519 err = errDecorate(err, "XYZFileRead "+fmt.Sprintf(strings.Join([]string{"error in file ", xyzname}, ""))) 520 } 521 return mol, err 522 523 } 524 525 //XYZRead Reads an xyz or multixyz formatted bufio.Reader (as produced by Turbomole). Returns a Molecule and error or nil. 526 func XYZRead(xyzp io.Reader) (*Molecule, error) { 527 snaps := 1 528 xyz := bufio.NewReader(xyzp) 529 var err error 530 var top *Topology 531 var molecule []*Atom 532 Coords := make([]*v3.Matrix, 1, 1) 533 Data := make([]string, 1, 1) 534 535 for { 536 //When we read the first snapshot we collect also the topology data, later 537 //only coords are collected. 538 if snaps == 1 { 539 Coords[0], molecule, Data[0], err = xyzReadSnap(xyz, nil, true) 540 if err != nil { 541 return nil, errDecorate(err, "XYZRead") 542 } 543 top = NewTopology(0, 1, molecule) 544 if err != nil { 545 return nil, errDecorate(err, "XYZRead") 546 } 547 snaps++ 548 continue 549 } 550 tmpcoords, _, data, err := xyzReadSnap(xyz, nil, false) 551 if err != nil { 552 //An error here simply means that there are no more snapshots 553 errm := err.Error() 554 if strings.Contains(errm, "Empty") || strings.Contains(errm, "header") { 555 err = nil 556 break 557 } 558 return nil, errDecorate(err, "XYZRead") 559 } 560 Coords = append(Coords, tmpcoords) 561 Data = append(Data, data) 562 } 563 bfactors := make([][]float64, len(Coords), len(Coords)) 564 for key, _ := range bfactors { 565 bfactors[key] = make([]float64, top.Len()) 566 } 567 returned, err := NewMolecule(Coords, top, bfactors) 568 returned.XYZFileData = Data 569 return returned, errDecorate(err, "XYZRead") 570 } 571 572 //XYZTraj is a trajectory-like representation of an XYZ File. 573 type XYZTraj struct { 574 natoms int 575 xyz *bufio.Reader //The DCD file 576 frames int 577 xyzfile *os.File 578 readable bool 579 firstframe *v3.Matrix 580 } 581 582 //Readable returns true if the trajectory is fit to be read, false otherwise. 583 func (X *XYZTraj) Readable() bool { 584 return X.readable 585 } 586 587 func (X *XYZTraj) Len() int { 588 return X.natoms 589 } 590 591 //xyztrajerror returns a LastFrameError if the given error message contains certain keywords. 592 //otherwise, returns the original error. 593 func (X *XYZTraj) xyztrajerror(err error) error { 594 errm := err.Error() 595 X.xyzfile.Close() 596 X.readable = false 597 if strings.Contains(errm, "Empty") || strings.Contains(errm, "header") { 598 return newlastFrameError("", X.frames) 599 } else { 600 return err 601 } 602 603 } 604 605 //Next reads the next snapshot of the trajectory into coords, or discards it, if coords 606 //is nil. It can take a box slice of floats, but won't do anything with it 607 //(only for compatibility with the Traj interface. 608 func (X *XYZTraj) Next(coords *v3.Matrix, box ...[]float64) error { 609 if coords == nil { 610 _, _, _, err := xyzReadSnap(X.xyz, coords, false) 611 if err != nil { 612 //An error here probably means that there are no more snapshots 613 return X.xyztrajerror(err) 614 } 615 X.frames++ 616 return nil 617 } 618 if X.frames == 0 { 619 coords.Copy(X.firstframe) //slow, but I don't want to mess with the pointer I got. 620 X.frames++ 621 X.firstframe = nil 622 return nil 623 } 624 _, _, _, err := xyzReadSnap(X.xyz, coords, false) 625 if err != nil { 626 //An error here probably means that there are no more snapshots 627 return X.xyztrajerror(err) 628 } 629 630 X.frames++ 631 return err 632 } 633 634 //Reads a multi-xyz file. Returns the first snapshot as a molecule, and the other ones as a XYZTraj 635 func XYZFileAsTraj(xyzname string) (*Molecule, *XYZTraj, error) { 636 xyzfile, err := os.Open(xyzname) 637 if err != nil { 638 //fmt.Println("Unable to open file!!") 639 return nil, nil, CError{err.Error(), []string{"os.Open", "XYZFileRead"}} 640 } 641 xyz := bufio.NewReader(xyzfile) 642 //the molecule first 643 coords, atoms, _, err := xyzReadSnap(xyz, nil, true) 644 top := NewTopology(0, 1, atoms) 645 bfactors := make([][]float64, 1, 1) 646 bfactors[0] = make([]float64, top.Len()) 647 returned, err := NewMolecule([]*v3.Matrix{coords}, top, bfactors) 648 //now the traj 649 traj := new(XYZTraj) 650 traj.xyzfile = xyzfile 651 traj.xyz = xyz 652 traj.natoms = returned.Len() 653 traj.readable = true 654 traj.firstframe = coords 655 return returned, traj, nil 656 } 657 658 //xyzReadSnap reads an xyz file snapshot from a bufio.Reader, returns a slice of Atom 659 //objects, which will be nil if ReadTopol is false, 660 // a slice of matrix.DenseMatrix and an error or nil. 661 func xyzReadSnap(xyz *bufio.Reader, toplace *v3.Matrix, ReadTopol bool) (*v3.Matrix, []*Atom, string, error) { 662 line, err := xyz.ReadString('\n') 663 if err != nil { 664 return nil, nil, "", CError{fmt.Sprintf("Empty XYZ File: %s", err.Error()), []string{"bufio.Reader.ReadString", "xyzReadSnap"}} 665 } 666 natoms, err := strconv.Atoi(strings.TrimSpace(line)) 667 if err != nil { 668 return nil, nil, "", CError{fmt.Sprintf("Wrong header for an XYZ file %s", err.Error()), []string{"strconv.Atoi", "xyzReadSnap"}} 669 } 670 var molecule []*Atom 671 if ReadTopol { 672 molecule = make([]*Atom, natoms, natoms) 673 } 674 var coords []float64 675 if toplace == nil { 676 coords = make([]float64, natoms*3, natoms*3) 677 } else { 678 coords = toplace.RawSlice() 679 } 680 data, err := xyz.ReadString('\n') //The text in "data" could be anything, including just "\n" 681 if err != nil { 682 return nil, nil, "", CError{fmt.Sprintf("Ill formatted XYZ file: %s", err.Error()), []string{"bufio.Reader.ReadString", "xyzReadSnap"}} 683 684 } 685 errs := make([]error, 3, 3) 686 for i := 0; i < natoms; i++ { 687 line, errs[0] = xyz.ReadString('\n') 688 if errs[0] != nil { //inefficient, (errs[1] can be checked once before), but clearer. 689 if strings.Contains(errs[0].Error(), "EOF") && i == natoms-1 { //This allows that an XYZ ends without a newline 690 errs[0] = nil 691 } else { 692 break 693 } 694 } 695 fields := strings.Fields(line) 696 if len(fields) < 4 { 697 errs[0] = fmt.Errorf("Line number %d ill formed: %s", i, line) 698 break 699 } 700 if ReadTopol { 701 molecule[i] = new(Atom) 702 molecule[i].Symbol = strings.Title(fields[0]) 703 molecule[i].Mass = symbolMass[molecule[i].Symbol] 704 molecule[i].MolName = "UNK" 705 molecule[i].Name = molecule[i].Symbol 706 } 707 coords[i*3], errs[0] = strconv.ParseFloat(fields[1], 64) 708 coords[i*3+1], errs[1] = strconv.ParseFloat(fields[2], 64) 709 coords[i*3+2], errs[2] = strconv.ParseFloat(fields[3], 64) 710 } 711 //This could be done faster if done in the same loop where the coords are read 712 //Instead of having another loop just for them. 713 for _, i := range errs { 714 if i != nil { 715 // fmt.Println("line", line, k) 716 return nil, nil, "", CError{i.Error(), []string{"strconv.ParseFloat", "xyzReadSnap"}} 717 } 718 } 719 //this should be fine even if I had a toplace matrix. Both toplace and mcoord should just point to the same data. 720 mcoords, err := v3.NewMatrix(coords) 721 return mcoords, molecule, data, errDecorate(err, "xyzReadSnap") 722 } 723 724 //XYZWrite writes the mol Ref and the Coord coordinates in an XYZ file with name xyzname which will 725 //be created fot that. If the file exist it will be overwritten. 726 func XYZFileWrite(xyzname string, Coords *v3.Matrix, mol Atomer) error { 727 out, err := os.Create(xyzname) 728 if err != nil { 729 return CError{err.Error(), []string{"os.Create", "XYZFileWrite"}} 730 } 731 defer out.Close() 732 err = XYZWrite(out, Coords, mol) 733 if err != nil { 734 return errDecorate(err, "XYZFileWrite") 735 } 736 return nil 737 } 738 739 //XYZStringWrite writes the mol Ref and the Coord coordinates in an XYZ-formatted string. 740 func XYZStringWrite(Coords *v3.Matrix, mol Atomer) (string, error) { 741 var out string 742 if mol.Len() != Coords.NVecs() { 743 return "", CError{"Ref and Coords dont have the same number of atoms", []string{"XYZStringWrite"}} 744 } 745 c := make([]float64, 3, 3) 746 out = fmt.Sprintf("%-4d\n\n", mol.Len()) 747 //towrite := Coords.Arrays() //An array of array with the data in the matrix 748 for i := 0; i < mol.Len(); i++ { 749 //c := towrite[i] //coordinates for the corresponding atoms 750 c = Coords.Row(c, i) 751 temp := fmt.Sprintf("%-2s %12.6f%12.6f%12.6f \n", mol.Atom(i).Symbol, c[0], c[1], c[2]) 752 out = strings.Join([]string{out, temp}, "") 753 } 754 return out, nil 755 } 756 757 //XYZWrite writes the mol Ref and the Coords coordinates to a io.Writer, in the XYZ format. 758 func XYZWrite(out io.Writer, Coords *v3.Matrix, mol Atomer) error { 759 iowriterError := func(err error) error { 760 return CError{"Failed to write in io.Writer" + err.Error(), []string{"io.Writer.Write", "XYZWrite"}} 761 } 762 if mol.Len() != Coords.NVecs() { 763 return CError{"Ref and Coords dont have the same number of atoms", []string{"XYZWrite"}} 764 } 765 c := make([]float64, 3, 3) 766 _, err := out.Write([]byte(fmt.Sprintf("%-4d\n\n", mol.Len()))) 767 if err != nil { 768 return iowriterError(err) 769 } 770 //towrite := Coords.Arrays() //An array of array with the data in the matrix 771 for i := 0; i < mol.Len(); i++ { 772 //c := towrite[i] //coordinates for the corresponding atoms 773 c = Coords.Row(c, i) 774 temp := fmt.Sprintf("%-2s %12.6f%12.6f%12.6f \n", mol.Atom(i).Symbol, c[0], c[1], c[2]) 775 _, err := out.Write([]byte(temp)) 776 if err != nil { 777 return iowriterError(err) 778 } 779 } 780 return nil 781 } 782 783 //GroFileRead reads a file in the Gromacs gro format, returning a molecule. 784 func GroFileRead(groname string) (*Molecule, error) { 785 grofile, err := os.Open(groname) 786 if err != nil { 787 //fmt.Println("Unable to open file!!") 788 return nil, CError{err.Error(), []string{"os.Open", "GroFileRead"}} 789 } 790 defer grofile.Close() 791 snaps := 1 792 gro := bufio.NewReader(grofile) 793 var top *Topology 794 var molecule []*Atom 795 Coords := make([]*v3.Matrix, 1, 1) 796 797 for { 798 //When we read the first snapshot we collect also the topology data, later 799 //only coords are collected. 800 if snaps == 1 { 801 Coords[0], molecule, err = groReadSnap(gro, true) 802 if err != nil { 803 return nil, errDecorate(err, "GroFileRead") 804 } 805 top = NewTopology(0, 1, molecule) 806 if err != nil { 807 return nil, errDecorate(err, "GroFileRead") 808 } 809 snaps++ 810 continue 811 } 812 //fmt.Println("how manytimes?") ///////////////////// 813 tmpcoords, _, err := groReadSnap(gro, false) 814 if err != nil { 815 break //We just ignore errors after the first snapshot, and simply read as many snapshots as we can. 816 /* 817 //An error here may just mean that there are no more snapshots 818 errm := err.Error() 819 if strings.Contains(errm, "Empty") || strings.Contains(errm, "EOF") { 820 err = nil 821 break 822 } 823 return nil, errDecorate(err, "GroRead") 824 */ 825 } 826 Coords = append(Coords, tmpcoords) 827 } 828 returned, err := NewMolecule(Coords, top, nil) 829 // fmt.Println("2 return!", top.Atom(1), returned.Coords[0].VecView(2)) /////////////////////// 830 return returned, errDecorate(err, "GroRead") 831 } 832 833 func groReadSnap(gro *bufio.Reader, ReadTopol bool) (*v3.Matrix, []*Atom, error) { 834 nm2A := 10.0 835 chains := "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 836 line, err := gro.ReadString('\n') //we don't care about this line,but it has to be there 837 if err != nil { 838 return nil, nil, CError{fmt.Sprintf("Empty gro File: %s", err.Error()), []string{"bufio.Reader.ReadString", "groReadSnap"}} 839 } 840 line, err = gro.ReadString('\n') 841 if err != nil { 842 return nil, nil, CError{fmt.Sprintf("Malformed gro File: %s", err.Error()), []string{"bufio.Reader.ReadString", "groReadSnap"}} 843 } 844 845 natoms, err := strconv.Atoi(strings.TrimSpace(line)) 846 if err != nil { 847 return nil, nil, CError{fmt.Sprintf("Wrong header for a gro file %s", err.Error()), []string{"strconv.Atoi", "groReadSnap"}} 848 } 849 var molecule []*Atom 850 if ReadTopol { 851 molecule = make([]*Atom, 0, natoms) 852 } 853 coords := make([]float64, 0, natoms*3) 854 prevres := 0 855 chainindex := 0 856 for i := 0; i < natoms; i++ { 857 line, err = gro.ReadString('\n') 858 if err != nil { 859 return nil, nil, CError{fmt.Sprintf("Failure to read gro File: %s", err.Error()), []string{"bufio.Reader.ReadString", "groReadSnap"}} 860 } 861 fields := strings.Fields(line) 862 if len(fields) < 4 { 863 break //meaning this line contains the unit cell vectors, and it is the last line of the snapshot 864 } 865 if ReadTopol { 866 atom, c, err := read_gro_line(line) 867 if err != nil { 868 return nil, nil, CError{fmt.Sprintf("Failure to read gro File: %s", err.Error()), []string{"bufio.Reader.ReadString", "groReadSnap"}} 869 } 870 if atom.MolID < prevres { 871 chainindex++ 872 } 873 prevres = atom.MolID 874 if chainindex >= len(chains) { 875 chainindex = 0 //more chains inthe molecule than letters in the alphabet! 876 } 877 atom.Chain = string(chains[chainindex]) 878 molecule = append(molecule, atom) 879 coords = append(coords, c...) 880 // fmt.Println(atom, c) ////////////////// 881 continue 882 883 } 884 c := make([]float64, 3, 3) 885 for i := 0; i < 3; i++ { 886 c[i], err = strconv.ParseFloat(strings.TrimSpace(line[20+(i*8):28+(i*8)]), 64) 887 if err != nil { 888 return nil, nil, err 889 } 890 c[i] = c[i] * nm2A //gro uses nm, goChem uses A. 891 } 892 coords = append(coords, c...) 893 894 } 895 mcoords, err := v3.NewMatrix(coords) 896 // fmt.Println(molecule) //, mcoords) //////////////////////// 897 return mcoords, molecule, nil 898 } 899 900 //read_gro_line Parses a valid ATOM or HETATM line of a PDB file, returns an Atom 901 // object with the info except for the coordinates and b-factors, which are returned 902 // separately as an array of 3 float64 and a float64, respectively 903 func read_gro_line(line string) (*Atom, []float64, error) { 904 coords := make([]float64, 3, 3) 905 atom := new(Atom) 906 nm2A := 10.0 907 var err error 908 atom.MolID, err = strconv.Atoi(strings.TrimSpace(line[0:5])) 909 if err != nil { 910 return nil, nil, err 911 } 912 atom.MolName = strings.TrimSpace(line[5:10]) 913 atom.MolName1 = three2OneLetter[atom.MolName] 914 atom.Name = strings.TrimSpace(line[10:15]) 915 atom.ID, err = strconv.Atoi(strings.TrimSpace(line[15:20])) 916 // fmt.Printf("%s|%s|%s|%s|\n", line[0:5], line[5:10], line[10:15], line[15:20]) //////////// 917 if err != nil { 918 return nil, nil, err 919 } 920 for i := 0; i < 3; i++ { 921 coords[i], err = strconv.ParseFloat(strings.TrimSpace(line[20+(i*8):28+(i*8)]), 64) 922 if err != nil { 923 return nil, nil, err 924 } 925 coords[i] = coords[i] * nm2A //gro uses nm, goChem uses A. 926 } 927 928 atom.Symbol, _ = symbolFromName(atom.Name) 929 return atom, coords, nil 930 } 931 932 //GoFileWrite writes the molecule described by mol and Coords into a file in the Gromacs 933 //gro format. If Coords has more than one elements, it will write a multi-state file. 934 func GroFileWrite(outname string, Coords []*v3.Matrix, mol Atomer) error { 935 out, err := os.Create(outname) 936 if err != nil { 937 return CError{"Failed to write open file" + err.Error(), []string{"os.Create", "GroFileWrite"}} 938 } 939 defer out.Close() 940 for _, v := range Coords { 941 err := GroSnapWrite(v, mol, out) 942 if err != nil { 943 return errDecorate(err, "GoFileWrite") 944 } 945 } 946 return nil 947 } 948 949 //GroSnapWrite writes a single snapshot of a molecule to an io.Writer, in the Gro format. 950 func GroSnapWrite(coords *v3.Matrix, mol Atomer, out io.Writer) error { 951 A2nm := 0.1 952 iowriterError := func(err error) error { 953 return CError{"Failed to write in io.Writer" + err.Error(), []string{"io.Writer.Write", "GroSnapWrite"}} 954 } 955 if mol.Len() != coords.NVecs() { 956 return CError{"Ref and Coords dont have the same number of atoms", []string{"GroSnapWrite"}} 957 } 958 c := make([]float64, 3, 3) 959 _, err := out.Write([]byte(fmt.Sprintf("Written with goChem :-)\n%-4d\n", mol.Len()))) 960 if err != nil { 961 return iowriterError(err) 962 } 963 //towrite := Coords.Arrays() //An array of array with the data in the matrix 964 for i := 0; i < mol.Len(); i++ { 965 //c := towrite[i] //coordinates for the corresponding atoms 966 c = coords.Row(c, i) 967 at := mol.Atom(i) 968 //velocities are set to 0 969 temp := fmt.Sprintf("%5d%-5s%5s%5d%8.3f%8.3f%8.3f%8.4f%8.4f%8.4f\n", at.MolID, at.MolName, at.Name, at.ID, c[0]*A2nm, c[1]*A2nm, c[2]*A2nm, 0.0, 0.0, 0.0) 970 _, err := out.Write([]byte(temp)) 971 if err != nil { 972 return iowriterError(err) 973 } 974 975 } 976 //the box vectors at the end of the snappshot 977 _, err = out.Write([]byte("0.0 0.0 0.0\n")) 978 if err != nil { 979 return iowriterError(err) 980 } 981 return nil 982 }