github.com/rmera/gochem@v0.7.1/files.go (about)

     1  /*
     2   * files.go, part of gochem.
     3   *
     4   *
     5   * Copyright 2012 Raul Mera rauldotmeraatusachdotcl
     6   *
     7   * This program is free software; you can redistribute it and/or modify
     8   * it under the terms of the GNU Lesser General Public License as
     9   * published by the Free Software Foundation; either version 2.1 of the
    10   * License, or (at your option) any later version.
    11   *
    12   * This program is distributed in the hope that it will be useful,
    13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    15   * GNU General Public License for more details.
    16   *
    17   * You should have received a copy of the GNU Lesser General
    18   * Public License along with this program.  If not, see
    19   * <http://www.gnu.org/licenses/>.
    20   *
    21   * goChem is developed at Universidad de Santiago de Chile (USACH)
    22   *
    23   *
    24   */
    25  
    26  package chem
    27  
    28  import (
    29  	"bufio"
    30  	"fmt"
    31  	"io"
    32  	"log"
    33  	"os"
    34  	"strconv"
    35  	"strings"
    36  
    37  	v3 "github.com/rmera/gochem/v3"
    38  )
    39  
    40  //PDB_read family
    41  
    42  //A map between 3-letters name for aminoacidic residues to the corresponding 1-letter names.
    43  var three2OneLetter = map[string]byte{
    44  	"SER": 'S',
    45  	"THR": 'T',
    46  	"ASN": 'N',
    47  	"GLN": 'Q',
    48  	"SEC": 'U', //Selenocysteine!
    49  	"CYS": 'C',
    50  	"GLY": 'G',
    51  	"PRO": 'P',
    52  	"ALA": 'A',
    53  	"VAL": 'V',
    54  	"ILE": 'I',
    55  	"LEU": 'L',
    56  	"MET": 'M',
    57  	"PHE": 'F',
    58  	"TYR": 'Y',
    59  	"TRP": 'W',
    60  	"ARG": 'R',
    61  	"HIS": 'H',
    62  	"LYS": 'K',
    63  	"ASP": 'D',
    64  	"GLU": 'E',
    65  }
    66  
    67  //This tries to guess a chemical element symbol from a PDB atom name. Mostly based on AMBER names.
    68  //It only deals with some common bio-elements.
    69  func symbolFromName(name string) (string, error) {
    70  	symbol := ""
    71  	if len(name) == 1 {
    72  		symbol = name //should work
    73  	} else if len(name) == 4 || name[0] == 'H' { //I thiiink only Hs can have 4-char names in amber.
    74  		symbol = "H"
    75  		//it name has more than one character but less than four.
    76  	} else if name[0] == 'C' {
    77  		if name[0:2] == "CU" {
    78  			symbol = "Cu"
    79  		} else if name == "CO" {
    80  			symbol = "Co"
    81  		} else if name == "CL" {
    82  			symbol = "Cl"
    83  		} else {
    84  			symbol = "C"
    85  		}
    86  	} else if name[0] == 'B' {
    87  		if name == "BE" {
    88  			symbol = "Be"
    89  		}
    90  	} else if name[0] == 'N' {
    91  		if name == "NA" {
    92  			symbol = "Na"
    93  		} else {
    94  			symbol = "N"
    95  		}
    96  	} else if name[0] == 'O' {
    97  		symbol = "O"
    98  	} else if name[0] == 'P' {
    99  		symbol = "P"
   100  	} else if name[0] == 'S' {
   101  		if name == "SE" {
   102  			symbol = "Se"
   103  		} else {
   104  			symbol = "S"
   105  		}
   106  	} else if name[0:2] == "ZN" {
   107  		symbol = "Zn"
   108  	}
   109  	if symbol == "" {
   110  		return name, CError{fmt.Sprintf("Couldn't guess symbol from PDB name. Will set the 'symbol' field to the name %s", name), []string{"symbolFromName"}}
   111  	}
   112  	return symbol, nil
   113  }
   114  
   115  // read_full_pdb_line parses a valid ATOM or HETATM line of a PDB file, returns an Atom
   116  // object with the info except for the coordinates and b-factors, which  are returned
   117  // separately as an array of 3 float64 and a float64, respectively
   118  func read_full_pdb_line(line string, read_additional bool, contlines int) (*Atom, []float64, float64, error) {
   119  	err := make([]error, 7, 7) //accumulate errors to check at the end of the readed line.
   120  	coords := make([]float64, 3, 3)
   121  	atom := new(Atom)
   122  	atom.Het = strings.HasPrefix(line, "HETATM") //this is called twice in the worst case. should fix
   123  	atom.ID, err[0] = strconv.Atoi(strings.TrimSpace(line[6:12]))
   124  	atom.Name = strings.TrimSpace(line[12:16])
   125  	atom.Char16 = line[16]
   126  	//PDB says that pos. 17 is for other thing but I see that is
   127  	//used for residue name in many cases*/
   128  	atom.MolName = line[17:20]
   129  	atom.MolName1 = three2OneLetter[atom.MolName]
   130  	atom.Chain = string(line[21])
   131  	atom.MolID, err[1] = strconv.Atoi(strings.TrimSpace(line[22:30]))
   132  	//Here we shouldn't need TrimSpace, but I keep it just in case someone
   133  	// doesn's use all the fields when writting a PDB*/
   134  	coords[0], err[2] = strconv.ParseFloat(strings.TrimSpace(line[30:38]), 64)
   135  	coords[1], err[3] = strconv.ParseFloat(strings.TrimSpace(line[38:46]), 64)
   136  	coords[2], err[4] = strconv.ParseFloat(strings.TrimSpace(line[46:54]), 64)
   137  	var bfactor float64
   138  	//Every correct PDB should include occupancy and b-factor, but _of course_ writing
   139  	//correct PDBs is too hard for some programs (and by "some programs" I mean OPLS LigParGen. Get it together, guys).
   140  	//so I add this conditional to allow goChem to still read these wrong PDB files.
   141  	if len(line) >= 60 {
   142  		atom.Occupancy, err[5] = strconv.ParseFloat(strings.TrimSpace(line[54:60]), 64)
   143  		bfactor, err[6] = strconv.ParseFloat(strings.TrimSpace(line[60:66]), 64)
   144  	}
   145  	//we try to read the additional only if indicated and if it is there
   146  	// In this part we don't catch errors. If something is missing we
   147  	// just ommit it
   148  	if read_additional && len(line) >= 79 {
   149  		atom.Symbol = strings.TrimSpace(line[76:78])
   150  		atom.Symbol = strings.Title(strings.ToLower(atom.Symbol))
   151  		if len(line) >= 80 {
   152  			var errcharge error
   153  			atom.Charge, errcharge = strconv.ParseFloat(strings.TrimSpace(line[78:78]), 64)
   154  			if errcharge == nil {
   155  
   156  				if strings.Contains(line[79:79], "-") {
   157  					atom.Charge = -1.0 * atom.Charge
   158  				}
   159  			} else {
   160  				//we dont' report an error here, just set the charge to 0 (the default)
   161  				atom.Charge = 0.0
   162  			}
   163  		}
   164  	}
   165  
   166  	//This part tries to guess the symbol from the atom name, if it has not been read
   167  	//No error checking here, just fills symbol with the empty string the function returns
   168  	var symbolerr error
   169  	if len(atom.Symbol) == 0 {
   170  		atom.Symbol, symbolerr = symbolFromName(atom.Name)
   171  	}
   172  
   173  	for i := range err {
   174  		if err[i] != nil {
   175  			//Here I should add the line number to the returned error.
   176  			return nil, nil, 0, CError{err[i].Error(), []string{"strconv.Atoi/ParseFloat", "read_full_pdb_line"}}
   177  		}
   178  	}
   179  	if atom.Symbol != "" {
   180  		atom.Mass = symbolMass[atom.Symbol] //Not error checking
   181  	}
   182  	//if we couldn't read the symbol, we'll still return the atom and coords
   183  	//but with an error
   184  	return atom, coords, bfactor, symbolerr
   185  }
   186  
   187  //read_onlycoords_pdb_line parses an ATOM/HETATM PDB line returning only the coordinates and b-factors
   188  func read_onlycoords_pdb_line(line string, contlines int) ([]float64, float64, error) {
   189  	coords := make([]float64, 3, 3)
   190  	err := make([]error, 4, 4)
   191  	var bfactor float64
   192  	coords[0], err[0] = strconv.ParseFloat(strings.TrimSpace(line[30:38]), 64)
   193  	coords[1], err[1] = strconv.ParseFloat(strings.TrimSpace(line[38:46]), 64)
   194  	coords[2], err[2] = strconv.ParseFloat(strings.TrimSpace(line[46:54]), 64)
   195  	bfactor, err[3] = strconv.ParseFloat(strings.TrimSpace(line[60:66]), 64)
   196  	for i := range err {
   197  		if err[i] != nil {
   198  			//Here I should add the line number to the returned error.
   199  			return nil, 0, CError{err[i].Error(), []string{"strconv.ParseFloat", "read_onlycoords_pdb_line"}}
   200  
   201  		}
   202  	}
   203  	return coords, bfactor, nil
   204  }
   205  
   206  //PDBRRead reads a pdb file from an io.Reader. Returns a Molecule. If there is one frame in the PDB
   207  // the coordinates array will be of lenght 1. It also returns an error which is not
   208  // really well set up right now.
   209  //read_additional is now "deprecated", it will be set to true regardless. I have made it into
   210  func PDBRead(pdb io.Reader, read_additional ...bool) (*Molecule, error) {
   211  	bufiopdb := bufio.NewReader(pdb)
   212  	mol, err := pdbBufIORead(bufiopdb, read_additional...)
   213  	return mol, errDecorate(err, "PDBReaderREad")
   214  }
   215  
   216  //PDBFileRead reads a pdb file from an io.Reader. Returns a Molecule. If there is one frame in the PDB
   217  // the coordinates array will be of lenght 1. It also returns an error which is not
   218  // really well set up right now. read_additional is now deprecated. The reader will just read
   219  func PDBFileRead(pdbname string, read_additional ...bool) (*Molecule, error) {
   220  	pdbfile, err := os.Open(pdbname)
   221  	if err != nil {
   222  		//fmt.Println("Unable to open file!!")
   223  		return nil, err
   224  	}
   225  	defer pdbfile.Close()
   226  	pdb := bufio.NewReader(pdbfile)
   227  	mol, err := pdbBufIORead(pdb, read_additional...)
   228  	return mol, err
   229  }
   230  
   231  //pdbBufIORead reads the atomic entries for a PDB bufio.IO, reads a pdb file from an io.Reader.
   232  //Returns a Molecule. If there is one frame in the PDB the coordinates array will be of lenght 1.
   233  //It also returns an error which is not really well set up right now.
   234  //The read_additional_opt allows not reading the last fields of a PDB, if you know they are wrong.
   235  //if true (the default), the fields are read if they are available. Otherwise we attempt to figure
   236  //out the symbol from the atom name, which doesn't always work.
   237  func pdbBufIORead(pdb *bufio.Reader, read_additional_opt ...bool) (*Molecule, error) {
   238  	read_additional := true
   239  	if len(read_additional_opt) > 0 {
   240  		read_additional = read_additional_opt[0]
   241  	}
   242  	var symbolerrorlogged bool
   243  	molecule := make([]*Atom, 0)
   244  	modelnumber := 0 //This is the number of frames read
   245  	coords := make([][]float64, 1, 1)
   246  	coords[0] = make([]float64, 0)
   247  	bfactors := make([][]float64, 1, 1)
   248  	bfactors[0] = make([]float64, 0)
   249  	first_model := true //are we reading the first model? if not we only save coordinates
   250  	contlines := 1      //count the lines read to better report errors
   251  	for {
   252  		line, err := pdb.ReadString('\n')
   253  		if err != nil {
   254  			//fmt.Println("PDB reading complete") /***change this to stderr************/
   255  			break
   256  			//	contlines++ //count all the lines even if empty. This is unreachable but I'm not sure at this point if it's better this way! goChem does read PDBs correctly as far as I can see.
   257  		}
   258  		if len(line) < 4 {
   259  			continue
   260  		}
   261  		//here we start actually reading
   262  		/*There might be a bug for not copying the string (symbol, name, etc) but just assigning the slice
   263  		 * which is a reference. Check!*/
   264  		var c = make([]float64, 3, 3)
   265  		var bfactemp float64 //temporary bfactor
   266  		var atomtmp *Atom
   267  		//	var foo string // not really needed
   268  		if strings.HasPrefix(line, "ATOM") || strings.HasPrefix(line, "HETATM") {
   269  			if !first_model {
   270  				c, bfactemp, err = read_onlycoords_pdb_line(line, contlines)
   271  				if err != nil {
   272  					return nil, errDecorate(err, "pdbBufIORead")
   273  				}
   274  			} else {
   275  				atomtmp = new(Atom)
   276  				atomtmp, c, bfactemp, err = read_full_pdb_line(line, read_additional, contlines)
   277  				if err != nil {
   278  					//It would be better to pass this along, but that would create a big mess
   279  					//I think, at least for now, I have to just keep it here.
   280  					if strings.Contains(err.Error(), "Couldn't guess symbol from PDB name") {
   281  						if !symbolerrorlogged { //to avoid logging this many times
   282  							log.Printf("Error: %s. If the structure contains coarse-grained beads, this error is meaningless. Will continue", err.Error())
   283  							symbolerrorlogged = true
   284  						}
   285  						err = nil
   286  					} else {
   287  
   288  						return nil, errDecorate(err, "pdbBufIORead")
   289  					}
   290  				}
   291  				//atom data other than coords is the same in all models so just read for the first.
   292  				molecule = append(molecule, atomtmp)
   293  			}
   294  			//coords are appended for all the models
   295  			//we add the coords to the latest frame of coordinaates
   296  			coords[len(coords)-1] = append(coords[len(coords)-1], c[0], c[1], c[2])
   297  			bfactors[len(bfactors)-1] = append(bfactors[len(bfactors)-1], bfactemp)
   298  		} else if strings.HasPrefix(line, "MODEL") {
   299  			modelnumber++        //,_=strconv.Atoi(strings.TrimSpace(line[6:]))
   300  			if modelnumber > 1 { //will be one for the first model, 2 for the second.
   301  				first_model = false
   302  				coords = append(coords, make([]float64, 0)) //new bunch of coords for a new frame
   303  				bfactors = append(bfactors, make([]float64, 0))
   304  			}
   305  		}
   306  	}
   307  	//This could be done faster if done in the same loop where the coords are read
   308  	//Instead of having another loop just for them.
   309  	top := NewTopology(0, 1, molecule)
   310  	var err error
   311  	frames := len(coords)
   312  	mcoords := make([]*v3.Matrix, frames, frames) //Final thing to return
   313  	for i := 0; i < frames; i++ {
   314  		mcoords[i], err = v3.NewMatrix(coords[i])
   315  		if err != nil {
   316  			return nil, errDecorate(err, "pdbBufIORead")
   317  		}
   318  	}
   319  	//if something happened during the process
   320  	if err != nil {
   321  		return nil, errDecorate(err, "pdbBufIORead")
   322  	}
   323  	returned, err := NewMolecule(mcoords, top, bfactors)
   324  	return returned, errDecorate(err, "pdbBufIORead")
   325  }
   326  
   327  //End PDB_read family
   328  
   329  //correctBfactors check that coords and bfactors have the same number of elements.
   330  func correctBfactors(coords []*v3.Matrix, bfactors [][]float64) bool {
   331  	if len(coords) != len(bfactors) || bfactors == nil {
   332  		return false
   333  	}
   334  	for key, coord := range coords {
   335  		cr, _ := coord.Dims()
   336  		br := len(bfactors[key])
   337  		if cr != br {
   338  			return false
   339  		}
   340  	}
   341  	return true
   342  }
   343  
   344  //writePDBLine writes a line in PDB format from the data passed as a parameters. It takes the chain of the previous atom
   345  //and returns the written line, the chain of the just-written atom, and error or nil.
   346  func writePDBLine(atom *Atom, coord *v3.Matrix, bfact float64, chainprev string) (string, string, error) {
   347  	var ter string
   348  	var out string
   349  	if atom.Chain != chainprev {
   350  		ter = fmt.Sprint(out, "TER\n")
   351  		chainprev = atom.Chain
   352  	}
   353  	first := "ATOM"
   354  	if atom.Het {
   355  		first = "HETATM"
   356  	}
   357  	formatstring := "%-6s%5d  %-3s %-4s%1s%4d    %8.3f%8.3f%8.3f%6.2f%6.2f          %2s  \n"
   358  	//4 chars for the atom name are used when hydrogens are included.
   359  	//This has not been tested
   360  	if len(atom.Name) == 4 {
   361  		formatstring = strings.Replace(formatstring, "%-3s ", "%-4s", 1)
   362  	} else if len(atom.Name) > 4 {
   363  		return "", chainprev, CError{"Cant print PDB line", []string{"writePDBLine"}}
   364  	}
   365  	//"%-6s%5d  %-3s %3s %1c%4d    %8.3f%8.3f%8.3f%6.2f%6.2f          %2s  \n"
   366  	out = fmt.Sprintf(formatstring, first, atom.ID, atom.Name, atom.MolName, atom.Chain,
   367  		atom.MolID, coord.At(0, 0), coord.At(0, 1), coord.At(0, 2), atom.Occupancy, bfact, atom.Symbol)
   368  	out = strings.Join([]string{ter, out}, "")
   369  	return out, chainprev, nil
   370  }
   371  
   372  //PDBFileWrite writes a PDB for the molecule mol and the coordinates Coords to a file name pdbname.
   373  func PDBFileWrite(pdbname string, coords *v3.Matrix, mol Atomer, Bfactors []float64) error {
   374  	out, err := os.Create(pdbname)
   375  	if err != nil {
   376  		return CError{err.Error(), []string{"os.Create", "PDBFileWrite"}}
   377  	}
   378  	defer out.Close()
   379  	fmt.Fprintf(out, "REMARK WRITTEN WITH GOCHEM :-) \n")
   380  	err = PDBWrite(out, coords, mol, Bfactors)
   381  	if err != nil {
   382  		return errDecorate(err, "PDBFileWrite")
   383  	}
   384  	return nil
   385  }
   386  
   387  //PDBWrite writes a PDB formatted sequence of bytes to an io.Writer for a given reference, coordinate set and bfactor set, which must match each other. Returns error or nil.
   388  func PDBWrite(out io.Writer, coords *v3.Matrix, mol Atomer, bfact []float64) error {
   389  	err := pdbWrite(out, coords, mol, bfact)
   390  	if err != nil {
   391  		errDecorate(err, "PDBWrite")
   392  	}
   393  	_, err = out.Write([]byte{'\n'}) //This function is just a wrapper to add the newline to what pdbWrite does.
   394  	if err != nil {
   395  		return CError{"Failed to write in io.Writer", []string{"io.Write.Write", "PDBWrite"}}
   396  
   397  	}
   398  	return nil
   399  }
   400  
   401  func pdbWrite(out io.Writer, coords *v3.Matrix, mol Atomer, bfact []float64) error {
   402  	if bfact == nil {
   403  		bfact = make([]float64, mol.Len())
   404  	}
   405  	cr, _ := coords.Dims()
   406  	br := len(bfact)
   407  	if cr != mol.Len() || cr != br {
   408  		return CError{"Ref and Coords and/or Bfactors dont have the same number of atoms", []string{"pdbWrite"}}
   409  	}
   410  	chainprev := mol.Atom(0).Chain //this is to know when the chain changes.
   411  	var outline string
   412  	var err error
   413  	iowriteError := func(err error) error {
   414  		return CError{"Failed to write in io.Writer" + err.Error(), []string{"io.Write.Write", "pdbWrite"}}
   415  	}
   416  	for i := 0; i < mol.Len(); i++ {
   417  		//	r,c:=coords.Dims()
   418  		//	fmt.Println("IIIIIIIIIIIi", i,coords,r,c, "lllllll")
   419  		writecoord := coords.VecView(i)
   420  		outline, chainprev, err = writePDBLine(mol.Atom(i), writecoord, bfact[i], chainprev)
   421  		if err != nil {
   422  			return errDecorate(err, "pdbWrite "+fmt.Sprintf("Could not print PDB line: %d", i))
   423  		}
   424  		_, err := out.Write([]byte(outline))
   425  		if err != nil {
   426  			return iowriteError(err)
   427  		}
   428  	}
   429  	_, err = out.Write([]byte("TER\n")) // New Addition, should help to recognize the end of the chain.
   430  	_, err = out.Write([]byte("END"))   //no newline, this is in case the write is part of a PDB and one needs to write "ENDMDEL".
   431  	if err != nil {
   432  		return iowriteError(err)
   433  	}
   434  	return nil
   435  }
   436  
   437  //PDBStringWrite writes a string in PDB format for a given reference, coordinate set and bfactor set, which must match each other
   438  //returns the written string and error or nil.
   439  func PDBStringWrite(coords *v3.Matrix, mol Atomer, bfact []float64) (string, error) {
   440  	if bfact == nil {
   441  		bfact = make([]float64, mol.Len())
   442  	}
   443  	cr, _ := coords.Dims()
   444  	br := len(bfact)
   445  	if cr != mol.Len() || cr != br {
   446  		return "", CError{"Ref and Coords and/or Bfactors dont have the same number of atoms", []string{"PDBStringWrite"}}
   447  	}
   448  	chainprev := mol.Atom(0).Chain //this is to know when the chain changes.
   449  	var outline string
   450  	var outstring string
   451  	var err error
   452  	for i := 0; i < mol.Len(); i++ {
   453  		//	r,c:=coords.Dims()
   454  		//	fmt.Println("IIIIIIIIIIIi", i,coords,r,c, "lllllll")
   455  		writecoord := coords.VecView(i)
   456  		outline, chainprev, err = writePDBLine(mol.Atom(i), writecoord, bfact[i], chainprev)
   457  		if err != nil {
   458  			return "", errDecorate(err, "PDBStringWrite "+fmt.Sprintf("Could not print PDB line: %d", i))
   459  		}
   460  		outstring = strings.Join([]string{outstring, outline}, "")
   461  	}
   462  	outstring = strings.Join([]string{outstring, "END\n"}, "")
   463  	return outstring, nil
   464  }
   465  
   466  //MultiPDBWrite writes a multiPDB  for the molecule mol and the various coordinate sets in CandB, to the io.Writer given.
   467  //CandB is a list of lists of *matrix.DenseMatrix. If it has 2 elements or more, the second will be used as
   468  //Bfactors. If it has one element, all b-factors will be zero.
   469  //Returns an error if fails, or nil if succeeds.
   470  func MultiPDBWrite(out io.Writer, Coords []*v3.Matrix, mol Atomer, Bfactors [][]float64) error {
   471  	if !correctBfactors(Coords, Bfactors) {
   472  		Bfactors = make([][]float64, len(Coords), len(Coords))
   473  	}
   474  	iowriterError := func(err error) error {
   475  		return CError{"Failed to write in io.Writer" + err.Error(), []string{"io.Writer.Write", "MultiPDBWrite"}}
   476  	}
   477  
   478  	_, err := out.Write([]byte("REMARK WRITTEN WITH GOCHEM :-)")) //The model number starts with one
   479  	if err != nil {
   480  		return iowriterError(err)
   481  	}
   482  	//OK now the real business.
   483  	for j := range Coords {
   484  		_, err := out.Write([]byte(fmt.Sprintf("MODEL %d\n", j+1))) //The model number starts with one
   485  		if err != nil {
   486  			return iowriterError(err)
   487  		}
   488  		err = pdbWrite(out, Coords[j], mol, Bfactors[j])
   489  		if err != nil {
   490  			return errDecorate(err, "MultiPDBWrite")
   491  		}
   492  		_, err = out.Write([]byte("MDL\n"))
   493  		if err != nil {
   494  			return iowriterError(err)
   495  		}
   496  
   497  	}
   498  
   499  	_, err = out.Write([]byte("END\n"))
   500  	if err != nil {
   501  		return iowriterError(err)
   502  	}
   503  
   504  	return nil
   505  }
   506  
   507  /***End of PDB part***/
   508  
   509  //XYZFileRead Reads an xyz or multixyz file (as produced by Turbomole). Returns a Molecule and error or nil.
   510  func XYZFileRead(xyzname string) (*Molecule, error) {
   511  	xyzfile, err := os.Open(xyzname)
   512  	if err != nil {
   513  		//fmt.Println("Unable to open file!!")
   514  		return nil, CError{err.Error(), []string{"os.Open", "XYZFileRead"}}
   515  	}
   516  	defer xyzfile.Close()
   517  	mol, err := XYZRead(xyzfile)
   518  	if err != nil {
   519  		err = errDecorate(err, "XYZFileRead "+fmt.Sprintf(strings.Join([]string{"error in file ", xyzname}, "")))
   520  	}
   521  	return mol, err
   522  
   523  }
   524  
   525  //XYZRead Reads an xyz or multixyz formatted bufio.Reader (as produced by Turbomole). Returns a Molecule and error or nil.
   526  func XYZRead(xyzp io.Reader) (*Molecule, error) {
   527  	snaps := 1
   528  	xyz := bufio.NewReader(xyzp)
   529  	var err error
   530  	var top *Topology
   531  	var molecule []*Atom
   532  	Coords := make([]*v3.Matrix, 1, 1)
   533  	Data := make([]string, 1, 1)
   534  
   535  	for {
   536  		//When we read the first snapshot we collect also the topology data, later
   537  		//only coords are collected.
   538  		if snaps == 1 {
   539  			Coords[0], molecule, Data[0], err = xyzReadSnap(xyz, nil, true)
   540  			if err != nil {
   541  				return nil, errDecorate(err, "XYZRead")
   542  			}
   543  			top = NewTopology(0, 1, molecule)
   544  			if err != nil {
   545  				return nil, errDecorate(err, "XYZRead")
   546  			}
   547  			snaps++
   548  			continue
   549  		}
   550  		tmpcoords, _, data, err := xyzReadSnap(xyz, nil, false)
   551  		if err != nil {
   552  			//An error here simply means that there are no more snapshots
   553  			errm := err.Error()
   554  			if strings.Contains(errm, "Empty") || strings.Contains(errm, "header") {
   555  				err = nil
   556  				break
   557  			}
   558  			return nil, errDecorate(err, "XYZRead")
   559  		}
   560  		Coords = append(Coords, tmpcoords)
   561  		Data = append(Data, data)
   562  	}
   563  	bfactors := make([][]float64, len(Coords), len(Coords))
   564  	for key, _ := range bfactors {
   565  		bfactors[key] = make([]float64, top.Len())
   566  	}
   567  	returned, err := NewMolecule(Coords, top, bfactors)
   568  	returned.XYZFileData = Data
   569  	return returned, errDecorate(err, "XYZRead")
   570  }
   571  
   572  //XYZTraj is a trajectory-like representation of an XYZ File.
   573  type XYZTraj struct {
   574  	natoms     int
   575  	xyz        *bufio.Reader //The DCD file
   576  	frames     int
   577  	xyzfile    *os.File
   578  	readable   bool
   579  	firstframe *v3.Matrix
   580  }
   581  
   582  //Readable returns true if the trajectory is fit to be read, false otherwise.
   583  func (X *XYZTraj) Readable() bool {
   584  	return X.readable
   585  }
   586  
   587  func (X *XYZTraj) Len() int {
   588  	return X.natoms
   589  }
   590  
   591  //xyztrajerror returns a LastFrameError if the given error message contains certain keywords.
   592  //otherwise, returns the original error.
   593  func (X *XYZTraj) xyztrajerror(err error) error {
   594  	errm := err.Error()
   595  	X.xyzfile.Close()
   596  	X.readable = false
   597  	if strings.Contains(errm, "Empty") || strings.Contains(errm, "header") {
   598  		return newlastFrameError("", X.frames)
   599  	} else {
   600  		return err
   601  	}
   602  
   603  }
   604  
   605  //Next reads the next snapshot of the trajectory into coords, or discards it, if coords
   606  //is nil. It can take a box slice of floats, but won't do anything with it
   607  //(only for compatibility with the Traj interface.
   608  func (X *XYZTraj) Next(coords *v3.Matrix, box ...[]float64) error {
   609  	if coords == nil {
   610  		_, _, _, err := xyzReadSnap(X.xyz, coords, false)
   611  		if err != nil {
   612  			//An error here probably means that there are no more snapshots
   613  			return X.xyztrajerror(err)
   614  		}
   615  		X.frames++
   616  		return nil
   617  	}
   618  	if X.frames == 0 {
   619  		coords.Copy(X.firstframe) //slow, but I don't want to mess with the pointer I got.
   620  		X.frames++
   621  		X.firstframe = nil
   622  		return nil
   623  	}
   624  	_, _, _, err := xyzReadSnap(X.xyz, coords, false)
   625  	if err != nil {
   626  		//An error here probably means that there are no more snapshots
   627  		return X.xyztrajerror(err)
   628  	}
   629  
   630  	X.frames++
   631  	return err
   632  }
   633  
   634  //Reads a multi-xyz file. Returns the first snapshot as a molecule, and the other ones as a XYZTraj
   635  func XYZFileAsTraj(xyzname string) (*Molecule, *XYZTraj, error) {
   636  	xyzfile, err := os.Open(xyzname)
   637  	if err != nil {
   638  		//fmt.Println("Unable to open file!!")
   639  		return nil, nil, CError{err.Error(), []string{"os.Open", "XYZFileRead"}}
   640  	}
   641  	xyz := bufio.NewReader(xyzfile)
   642  	//the molecule first
   643  	coords, atoms, _, err := xyzReadSnap(xyz, nil, true)
   644  	top := NewTopology(0, 1, atoms)
   645  	bfactors := make([][]float64, 1, 1)
   646  	bfactors[0] = make([]float64, top.Len())
   647  	returned, err := NewMolecule([]*v3.Matrix{coords}, top, bfactors)
   648  	//now the traj
   649  	traj := new(XYZTraj)
   650  	traj.xyzfile = xyzfile
   651  	traj.xyz = xyz
   652  	traj.natoms = returned.Len()
   653  	traj.readable = true
   654  	traj.firstframe = coords
   655  	return returned, traj, nil
   656  }
   657  
   658  //xyzReadSnap reads an xyz file snapshot from a bufio.Reader, returns a slice of Atom
   659  //objects, which will be nil if ReadTopol is false,
   660  // a slice of matrix.DenseMatrix and an error or nil.
   661  func xyzReadSnap(xyz *bufio.Reader, toplace *v3.Matrix, ReadTopol bool) (*v3.Matrix, []*Atom, string, error) {
   662  	line, err := xyz.ReadString('\n')
   663  	if err != nil {
   664  		return nil, nil, "", CError{fmt.Sprintf("Empty XYZ File: %s", err.Error()), []string{"bufio.Reader.ReadString", "xyzReadSnap"}}
   665  	}
   666  	natoms, err := strconv.Atoi(strings.TrimSpace(line))
   667  	if err != nil {
   668  		return nil, nil, "", CError{fmt.Sprintf("Wrong header for an XYZ file %s", err.Error()), []string{"strconv.Atoi", "xyzReadSnap"}}
   669  	}
   670  	var molecule []*Atom
   671  	if ReadTopol {
   672  		molecule = make([]*Atom, natoms, natoms)
   673  	}
   674  	var coords []float64
   675  	if toplace == nil {
   676  		coords = make([]float64, natoms*3, natoms*3)
   677  	} else {
   678  		coords = toplace.RawSlice()
   679  	}
   680  	data, err := xyz.ReadString('\n') //The text in "data" could be anything, including just "\n"
   681  	if err != nil {
   682  		return nil, nil, "", CError{fmt.Sprintf("Ill formatted XYZ file: %s", err.Error()), []string{"bufio.Reader.ReadString", "xyzReadSnap"}}
   683  
   684  	}
   685  	errs := make([]error, 3, 3)
   686  	for i := 0; i < natoms; i++ {
   687  		line, errs[0] = xyz.ReadString('\n')
   688  		if errs[0] != nil { //inefficient, (errs[1] can be checked once before), but clearer.
   689  			if strings.Contains(errs[0].Error(), "EOF") && i == natoms-1 { //This allows that an XYZ ends without a newline
   690  				errs[0] = nil
   691  			} else {
   692  				break
   693  			}
   694  		}
   695  		fields := strings.Fields(line)
   696  		if len(fields) < 4 {
   697  			errs[0] = fmt.Errorf("Line number %d ill formed: %s", i, line)
   698  			break
   699  		}
   700  		if ReadTopol {
   701  			molecule[i] = new(Atom)
   702  			molecule[i].Symbol = strings.Title(fields[0])
   703  			molecule[i].Mass = symbolMass[molecule[i].Symbol]
   704  			molecule[i].MolName = "UNK"
   705  			molecule[i].Name = molecule[i].Symbol
   706  		}
   707  		coords[i*3], errs[0] = strconv.ParseFloat(fields[1], 64)
   708  		coords[i*3+1], errs[1] = strconv.ParseFloat(fields[2], 64)
   709  		coords[i*3+2], errs[2] = strconv.ParseFloat(fields[3], 64)
   710  	}
   711  	//This could be done faster if done in the same loop where the coords are read
   712  	//Instead of having another loop just for them.
   713  	for _, i := range errs {
   714  		if i != nil {
   715  			//	fmt.Println("line", line, k)
   716  			return nil, nil, "", CError{i.Error(), []string{"strconv.ParseFloat", "xyzReadSnap"}}
   717  		}
   718  	}
   719  	//this should be fine even if I had a toplace matrix. Both toplace and mcoord should just point to the same data.
   720  	mcoords, err := v3.NewMatrix(coords)
   721  	return mcoords, molecule, data, errDecorate(err, "xyzReadSnap")
   722  }
   723  
   724  //XYZWrite writes the mol Ref and the Coord coordinates in an XYZ file with name xyzname which will
   725  //be created fot that. If the file exist it will be overwritten.
   726  func XYZFileWrite(xyzname string, Coords *v3.Matrix, mol Atomer) error {
   727  	out, err := os.Create(xyzname)
   728  	if err != nil {
   729  		return CError{err.Error(), []string{"os.Create", "XYZFileWrite"}}
   730  	}
   731  	defer out.Close()
   732  	err = XYZWrite(out, Coords, mol)
   733  	if err != nil {
   734  		return errDecorate(err, "XYZFileWrite")
   735  	}
   736  	return nil
   737  }
   738  
   739  //XYZStringWrite writes the mol Ref and the Coord coordinates in an XYZ-formatted string.
   740  func XYZStringWrite(Coords *v3.Matrix, mol Atomer) (string, error) {
   741  	var out string
   742  	if mol.Len() != Coords.NVecs() {
   743  		return "", CError{"Ref and Coords dont have the same number of atoms", []string{"XYZStringWrite"}}
   744  	}
   745  	c := make([]float64, 3, 3)
   746  	out = fmt.Sprintf("%-4d\n\n", mol.Len())
   747  	//towrite := Coords.Arrays() //An array of array with the data in the matrix
   748  	for i := 0; i < mol.Len(); i++ {
   749  		//c := towrite[i] //coordinates for the corresponding atoms
   750  		c = Coords.Row(c, i)
   751  		temp := fmt.Sprintf("%-2s  %12.6f%12.6f%12.6f \n", mol.Atom(i).Symbol, c[0], c[1], c[2])
   752  		out = strings.Join([]string{out, temp}, "")
   753  	}
   754  	return out, nil
   755  }
   756  
   757  //XYZWrite writes the mol Ref and the Coords coordinates to a io.Writer, in the XYZ format.
   758  func XYZWrite(out io.Writer, Coords *v3.Matrix, mol Atomer) error {
   759  	iowriterError := func(err error) error {
   760  		return CError{"Failed to write in io.Writer" + err.Error(), []string{"io.Writer.Write", "XYZWrite"}}
   761  	}
   762  	if mol.Len() != Coords.NVecs() {
   763  		return CError{"Ref and Coords dont have the same number of atoms", []string{"XYZWrite"}}
   764  	}
   765  	c := make([]float64, 3, 3)
   766  	_, err := out.Write([]byte(fmt.Sprintf("%-4d\n\n", mol.Len())))
   767  	if err != nil {
   768  		return iowriterError(err)
   769  	}
   770  	//towrite := Coords.Arrays() //An array of array with the data in the matrix
   771  	for i := 0; i < mol.Len(); i++ {
   772  		//c := towrite[i] //coordinates for the corresponding atoms
   773  		c = Coords.Row(c, i)
   774  		temp := fmt.Sprintf("%-2s  %12.6f%12.6f%12.6f \n", mol.Atom(i).Symbol, c[0], c[1], c[2])
   775  		_, err := out.Write([]byte(temp))
   776  		if err != nil {
   777  			return iowriterError(err)
   778  		}
   779  	}
   780  	return nil
   781  }
   782  
   783  //GroFileRead reads a file in the Gromacs gro format, returning a molecule.
   784  func GroFileRead(groname string) (*Molecule, error) {
   785  	grofile, err := os.Open(groname)
   786  	if err != nil {
   787  		//fmt.Println("Unable to open file!!")
   788  		return nil, CError{err.Error(), []string{"os.Open", "GroFileRead"}}
   789  	}
   790  	defer grofile.Close()
   791  	snaps := 1
   792  	gro := bufio.NewReader(grofile)
   793  	var top *Topology
   794  	var molecule []*Atom
   795  	Coords := make([]*v3.Matrix, 1, 1)
   796  
   797  	for {
   798  		//When we read the first snapshot we collect also the topology data, later
   799  		//only coords are collected.
   800  		if snaps == 1 {
   801  			Coords[0], molecule, err = groReadSnap(gro, true)
   802  			if err != nil {
   803  				return nil, errDecorate(err, "GroFileRead")
   804  			}
   805  			top = NewTopology(0, 1, molecule)
   806  			if err != nil {
   807  				return nil, errDecorate(err, "GroFileRead")
   808  			}
   809  			snaps++
   810  			continue
   811  		}
   812  		//fmt.Println("how manytimes?") /////////////////////
   813  		tmpcoords, _, err := groReadSnap(gro, false)
   814  		if err != nil {
   815  			break //We just ignore errors after the first snapshot, and simply read as many snapshots as we can.
   816  			/*
   817  				//An error here may just mean that there are no more snapshots
   818  				errm := err.Error()
   819  				if strings.Contains(errm, "Empty") || strings.Contains(errm, "EOF") {
   820  					err = nil
   821  					break
   822  				}
   823  				return nil, errDecorate(err, "GroRead")
   824  			*/
   825  		}
   826  		Coords = append(Coords, tmpcoords)
   827  	}
   828  	returned, err := NewMolecule(Coords, top, nil)
   829  	//	fmt.Println("2 return!", top.Atom(1), returned.Coords[0].VecView(2)) ///////////////////////
   830  	return returned, errDecorate(err, "GroRead")
   831  }
   832  
   833  func groReadSnap(gro *bufio.Reader, ReadTopol bool) (*v3.Matrix, []*Atom, error) {
   834  	nm2A := 10.0
   835  	chains := "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
   836  	line, err := gro.ReadString('\n') //we don't care about this line,but it has to be there
   837  	if err != nil {
   838  		return nil, nil, CError{fmt.Sprintf("Empty gro File: %s", err.Error()), []string{"bufio.Reader.ReadString", "groReadSnap"}}
   839  	}
   840  	line, err = gro.ReadString('\n')
   841  	if err != nil {
   842  		return nil, nil, CError{fmt.Sprintf("Malformed gro File: %s", err.Error()), []string{"bufio.Reader.ReadString", "groReadSnap"}}
   843  	}
   844  
   845  	natoms, err := strconv.Atoi(strings.TrimSpace(line))
   846  	if err != nil {
   847  		return nil, nil, CError{fmt.Sprintf("Wrong header for a gro file %s", err.Error()), []string{"strconv.Atoi", "groReadSnap"}}
   848  	}
   849  	var molecule []*Atom
   850  	if ReadTopol {
   851  		molecule = make([]*Atom, 0, natoms)
   852  	}
   853  	coords := make([]float64, 0, natoms*3)
   854  	prevres := 0
   855  	chainindex := 0
   856  	for i := 0; i < natoms; i++ {
   857  		line, err = gro.ReadString('\n')
   858  		if err != nil {
   859  			return nil, nil, CError{fmt.Sprintf("Failure to read gro File: %s", err.Error()), []string{"bufio.Reader.ReadString", "groReadSnap"}}
   860  		}
   861  		fields := strings.Fields(line)
   862  		if len(fields) < 4 {
   863  			break //meaning this line contains the unit cell vectors, and it is the last line of the snapshot
   864  		}
   865  		if ReadTopol {
   866  			atom, c, err := read_gro_line(line)
   867  			if err != nil {
   868  				return nil, nil, CError{fmt.Sprintf("Failure to read gro File: %s", err.Error()), []string{"bufio.Reader.ReadString", "groReadSnap"}}
   869  			}
   870  			if atom.MolID < prevres {
   871  				chainindex++
   872  			}
   873  			prevres = atom.MolID
   874  			if chainindex >= len(chains) {
   875  				chainindex = 0 //more chains inthe molecule than letters in the alphabet!
   876  			}
   877  			atom.Chain = string(chains[chainindex])
   878  			molecule = append(molecule, atom)
   879  			coords = append(coords, c...)
   880  			//	fmt.Println(atom, c) //////////////////
   881  			continue
   882  
   883  		}
   884  		c := make([]float64, 3, 3)
   885  		for i := 0; i < 3; i++ {
   886  			c[i], err = strconv.ParseFloat(strings.TrimSpace(line[20+(i*8):28+(i*8)]), 64)
   887  			if err != nil {
   888  				return nil, nil, err
   889  			}
   890  			c[i] = c[i] * nm2A //gro uses nm, goChem uses A.
   891  		}
   892  		coords = append(coords, c...)
   893  
   894  	}
   895  	mcoords, err := v3.NewMatrix(coords)
   896  	//	fmt.Println(molecule) //, mcoords) ////////////////////////
   897  	return mcoords, molecule, nil
   898  }
   899  
   900  //read_gro_line Parses a valid ATOM or HETATM line of a PDB file, returns an Atom
   901  // object with the info except for the coordinates and b-factors, which  are returned
   902  // separately as an array of 3 float64 and a float64, respectively
   903  func read_gro_line(line string) (*Atom, []float64, error) {
   904  	coords := make([]float64, 3, 3)
   905  	atom := new(Atom)
   906  	nm2A := 10.0
   907  	var err error
   908  	atom.MolID, err = strconv.Atoi(strings.TrimSpace(line[0:5]))
   909  	if err != nil {
   910  		return nil, nil, err
   911  	}
   912  	atom.MolName = strings.TrimSpace(line[5:10])
   913  	atom.MolName1 = three2OneLetter[atom.MolName]
   914  	atom.Name = strings.TrimSpace(line[10:15])
   915  	atom.ID, err = strconv.Atoi(strings.TrimSpace(line[15:20]))
   916  	//	fmt.Printf("%s|%s|%s|%s|\n", line[0:5], line[5:10], line[10:15], line[15:20]) ////////////
   917  	if err != nil {
   918  		return nil, nil, err
   919  	}
   920  	for i := 0; i < 3; i++ {
   921  		coords[i], err = strconv.ParseFloat(strings.TrimSpace(line[20+(i*8):28+(i*8)]), 64)
   922  		if err != nil {
   923  			return nil, nil, err
   924  		}
   925  		coords[i] = coords[i] * nm2A //gro uses nm, goChem uses A.
   926  	}
   927  
   928  	atom.Symbol, _ = symbolFromName(atom.Name)
   929  	return atom, coords, nil
   930  }
   931  
   932  //GoFileWrite writes the molecule described by mol and Coords into a file in the Gromacs
   933  //gro format. If Coords has more than one elements, it will write a multi-state file.
   934  func GroFileWrite(outname string, Coords []*v3.Matrix, mol Atomer) error {
   935  	out, err := os.Create(outname)
   936  	if err != nil {
   937  		return CError{"Failed to write open file" + err.Error(), []string{"os.Create", "GroFileWrite"}}
   938  	}
   939  	defer out.Close()
   940  	for _, v := range Coords {
   941  		err := GroSnapWrite(v, mol, out)
   942  		if err != nil {
   943  			return errDecorate(err, "GoFileWrite")
   944  		}
   945  	}
   946  	return nil
   947  }
   948  
   949  //GroSnapWrite writes a single snapshot of a molecule to an io.Writer, in the Gro format.
   950  func GroSnapWrite(coords *v3.Matrix, mol Atomer, out io.Writer) error {
   951  	A2nm := 0.1
   952  	iowriterError := func(err error) error {
   953  		return CError{"Failed to write in io.Writer" + err.Error(), []string{"io.Writer.Write", "GroSnapWrite"}}
   954  	}
   955  	if mol.Len() != coords.NVecs() {
   956  		return CError{"Ref and Coords dont have the same number of atoms", []string{"GroSnapWrite"}}
   957  	}
   958  	c := make([]float64, 3, 3)
   959  	_, err := out.Write([]byte(fmt.Sprintf("Written with goChem :-)\n%-4d\n", mol.Len())))
   960  	if err != nil {
   961  		return iowriterError(err)
   962  	}
   963  	//towrite := Coords.Arrays() //An array of array with the data in the matrix
   964  	for i := 0; i < mol.Len(); i++ {
   965  		//c := towrite[i] //coordinates for the corresponding atoms
   966  		c = coords.Row(c, i)
   967  		at := mol.Atom(i)
   968  		//velocities are set to 0
   969  		temp := fmt.Sprintf("%5d%-5s%5s%5d%8.3f%8.3f%8.3f%8.4f%8.4f%8.4f\n", at.MolID, at.MolName, at.Name, at.ID, c[0]*A2nm, c[1]*A2nm, c[2]*A2nm, 0.0, 0.0, 0.0)
   970  		_, err := out.Write([]byte(temp))
   971  		if err != nil {
   972  			return iowriterError(err)
   973  		}
   974  
   975  	}
   976  	//the box vectors at the end of the snappshot
   977  	_, err = out.Write([]byte("0.0 0.0 0.0\n"))
   978  	if err != nil {
   979  		return iowriterError(err)
   980  	}
   981  	return nil
   982  }