Source code for chemcoord.zmat_functions

from __future__ import with_statement
from __future__ import division
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals

try:
    import itertools.imap as map
except ImportError:
    pass

import numpy as np
import pandas as pd
import math as m
from . import _common_class
from . import export
from . import constants
from . import utilities
from ._exceptions import PhysicalMeaningError

@export
[docs]class Zmat(_common_class.common_methods): """The main class for dealing with internal coordinates. """
[docs] def __init__(self, init): """How to initialize a Zmat instance. Args: init (pd.DataFrame): A Dataframe with at least the columns ``['atom', 'bond_with', 'bond', 'angle_with', 'angle', 'dihedral_with', 'dihedral']``. Where ``'atom'`` is a string for the elementsymbol. Returns: Zmat: A new zmat instance. """ try: tmp = init._to_Zmat() self.frame = tmp.frame.copy() self.shape = self.frame.shape self.n_atoms = self.shape[0] try: # self.__bond_dic = tmp.__bond_dic pass except AttributeError: pass except AttributeError: # Create from pd.DataFrame if not self._is_physical(init.columns): raise PhysicalMeaningError('There are columns missing for a meaningful description of a molecule') self.frame = init.copy() self.shape = self.frame.shape self.n_atoms = self.shape[0]
def copy(self): molecule = self.__class__(self.frame) try: # molecule.__bond_dic = self.__bond_dic pass except AttributeError: pass return molecule def _to_Zmat(self): return self.copy()
[docs] def build_list(self): """Return the buildlist which is necessary to create this Zmat Args: None Returns: np.array: Buildlist """ columns = ['temporary_index', 'bond_with', 'angle_with', 'dihedral_with'] tmp = self.insert(0, 'temporary_index', self.index) buildlist = tmp[:, columns].values.astype('int64') buildlist[0, 1:] = 0 buildlist[1, 2:] = 0 buildlist[2, 3:] = 0 return buildlist
[docs] def change_numbering(self, new_index=None, inplace=False): """Change numbering to a new index. Changes the numbering of index and all dependent numbering (bond_with...) to a new_index. The user has to make sure that the new_index consists of distinct elements. Args: new_index (list): If None the new_index is taken from 1 to the number of atoms. Returns: Zmat: Reindexed version of the zmatrix. """ output = self if inplace else self.copy() old_index = output.index if (new_index is None): new_index = range(1, zmat_frame.shape[0]+1) else: new_index = new_index assert len(new_index) == len(old_index) output.index = new_index output[:, ['bond_with', 'angle_with', 'dihedral_with']] = \ output[:, ['bond_with', 'angle_with', 'dihedral_with']].replace(old_index, new_index) if not inplace: return output
[docs] def to_xyz(self, SN_NeRF=False): """Transforms to cartesian space. Args: SN_NeRF (bool): Use the **Self-Normalizing Natural Extension Reference Frame** algorithm [1]_. In theory this means 30 % less floating point operations, but since this module is in python, floating point operations are not the rate determining step. Nevertheless it is a more elegant method than the "intuitive" conversion. Could make a difference in the future when certain functions will be implemented in ``Fortran``. Returns: Cartesian: Reindexed version of the zmatrix. .. [1] Parsons J, Holmes JB, Rojas JM, Tsai J, Strauss CE (2005). Practical conversion from torsion space to Cartesian space for in silico protein synthesis. J Comput Chem. 26(10) , 1063-8. `doi:10.1002/jcc.20237 <http://dx.doi.org/10.1002/jcc.20237>`_ """ # zmat = self.zmat_frame.copy() # n_atoms = self.n_atoms xyz_frame = pd.DataFrame( columns=['atom', 'x', 'y', 'z'], dtype='float', index=self.index) # Cannot import globally in python 2, so we will only import here. # It is not a beautiful hack, but works for now! # See: # stackoverflow.com/questions/17226016/simple-cross-import-in-python from . import xyz_functions molecule = xyz_functions.Cartesian(xyz_frame) buildlist = self.build_list() normalize = utilities.normalize rotation_matrix = utilities.rotation_matrix def add_first_atom(): index = buildlist[0, 0] # Change of nonlocal variables molecule[index, :] = [self[index, 'atom'], 0., 0., 0.] def add_second_atom(): index = buildlist[1, 0] atom, bond = self[index, ['atom', 'bond']] # Change of nonlocal variables molecule[index, :] = [atom, bond, 0., 0.] def add_third_atom(): index, bond_with, angle_with = buildlist[2, :3] atom, bond, angle = self[index, ['atom', 'bond', 'angle']] angle = m.radians(angle) # vb is the vector of the atom bonding to, # va is the vector of the angle defining atom, vb, va = molecule.location([bond_with, angle_with]) # Vector pointing from vb to va BA = va - vb # Vector of length distance d = bond * normalize(BA) # Rotate d by the angle around the z-axis d = np.dot(rotation_matrix([0, 0, 1], angle), d) # Add d to the position of q to get the new coordinates of the atom p = vb + d # Change of nonlocal variables molecule[index, :] = [atom] + list(p) def add_atom(row): index, bond_with, angle_with, dihedral_with = buildlist[row, :] atom, bond, angle, dihedral = self[index, ['atom', 'bond', 'angle', 'dihedral']] angle, dihedral = map(m.radians, (angle, dihedral)) # vb is the vector of the atom bonding to, # va is the vector of the angle defining atom, # vd is the vector of the dihedral defining atom vb, va, vd = molecule.location( [bond_with, angle_with, dihedral_with]) if np.isclose(m.degrees(angle), 180.): AB = vb - va ab = normalize(AB) d = bond * ab p = vb + d molecule[index, :] = [atom] + list(p) else: AB = vb - va DA = vd - va n1 = normalize(np.cross(DA, AB)) ab = normalize(AB) # Vector of length distance pointing along the x-axis d = bond * -ab # Rotate d by the angle around the n1 axis d = np.dot(rotation_matrix(n1, angle), d) d = np.dot(rotation_matrix(ab, dihedral), d) # Add d to the position of q to get the new coordinates # of the atom p = vb + d # Change of nonlocal variables molecule[index, :] = [atom] + list(p) def add_atom_SN_NeRF(row): normalize = utilities.normalize # TODO python2 compatibility # raise NotImplementedError( # "This functionality has not been implemented yet!") # index = None # Should be added index, bond_with, angle_with, dihedral_with = buildlist[row, :] atom, bond, angle, dihedral = self[index, ['atom', 'bond', 'angle', 'dihedral']] angle, dihedral = map(m.radians, (angle, dihedral)) bond_with, angle_with, dihedral_with = buildlist[row, 1:] vb, va, vd = molecule.location([bond_with, angle_with, dihedral_with]) # The next steps implements the so called SN-NeRF algorithm. # In their paper they use a different definition of the angle. # This means, that I use sometimes cos instead of sin and other # minor changes # Compare with the paper: # Parsons J, Holmes JB, Rojas JM, Tsai J, Strauss CE.: # Practical conversion from torsion space to Cartesian space for # in silico protein synthesis. # J Comput Chem. 2005 Jul 30;26(10):1063-8. # PubMed PMID: 15898109 # Theoretically it uses 30 % less floating point operations. # Since the python overhead is the limiting step, you won't see # any difference. But it is more elegant ;). if np.isclose(m.degrees(angle), 180.): AB = vb - va ab = normalize(AB) d = bond * ab p = vb + d molecule[index, :] = [atom] + list(p) else: D2 = bond * np.array([ - np.cos(angle), np.cos(dihedral) * np.sin(angle), np.sin(dihedral) * np.sin(angle) ], dtype=float) ab = normalize(vb - va) da = (va - vd) n = normalize(np.cross(da, ab)) M = np.array([ ab, np.cross(n, ab), n]) D = np.dot(np.transpose(M), D2) + vb molecule[index, :] = [atom] + list(D) if self.n_atoms == 1: add_first_atom() elif self.n_atoms == 2: add_first_atom() add_second_atom() elif self.n_atoms == 3: add_first_atom() add_second_atom() add_third_atom() elif self.n_atoms > 3: add_first_atom() add_second_atom() add_third_atom() if SN_NeRF: for row in range(3, self.n_atoms): add_atom_SN_NeRF(row) else: for row in range(3, self.n_atoms): add_atom(row) assert not molecule.frame.isnull().values.any(), \ ('Serious bug while converting, please report an error' 'on the Github page with your coordinate files') return molecule
@classmethod
[docs] def read_zmat(cls, inputfile, implicit_index=True): """Reads a zmat file. Lines beginning with ``#`` are ignored. Args: inputfile (str): implicit_index (bool): If this option is true the first column has to be the element symbols for the atoms. The row number is used to determine the index. Returns: Zmat: """ if implicit_index: zmat_frame = pd.read_table( inputfile, comment='#', delim_whitespace=True, names=[ 'atom', 'bond_with', 'bond', 'angle_with', 'angle', 'dihedral_with', 'dihedral'], ) n_atoms = zmat_frame.shape[0] zmat_frame.index = range(1, n_atoms+1) else: zmat_frame = pd.read_table( inputfile, comment='#', delim_whitespace=True, names=[ 'temp_index', 'atom', 'bond_with', 'bond', 'angle_with', 'angle', 'dihedral_with', 'dihedral'], ) zmat_frame.set_index('temp_index', drop=True, inplace=True) zmat_frame.index.name = None return cls(zmat_frame)
[docs] def write(self, outputfile, implicit_index=True): """Writes the zmatrix into a file. .. note:: Since it permamently writes a file, this function is strictly speaking **not sideeffect free**. The frame to be written is of course not changed. Args: outputfile (str): implicit_index (bool): If implicit_index is set, the zmat indexing is changed to range(1, number_atoms+1). Besides the index is omitted while writing which means, that the index is given implicitly by the row number. Returns: None: None """ # The following functions are necessary to deal with the fact, # that pandas does not support "NaN" for integers. # It was written by the user LondonRob at StackExchange: # http://stackoverflow.com/questions/25789354/ # exporting-ints-with-missing-values-to-csv-in-pandas/31208873#31208873 # Begin of the copied code snippet EPSILON = 1e-9 def _lost_precision(s): """ The total amount of precision lost over Series `s` during conversion to int64 dtype """ try: return (s - s.fillna(0).astype(np.int64)).sum() except ValueError: return np.nan def _nansafe_integer_convert(s): """ Convert Series `s` to an object type with `np.nan` represented as an empty string "" """ if _lost_precision(s) < EPSILON: # Here's where the magic happens as_object = s.fillna(0).astype(np.int64).astype(np.object) as_object[s.isnull()] = "" return as_object else: return s def nansafe_to_csv(df, *args, **kwargs): """ Write `df` to a csv file, allowing for missing values in integer columns Uses `_lost_precision` to test whether a column can be converted to an integer data type without losing precision. Missing values in integer columns are represented as empty fields in the resulting csv. """ df.apply(_nansafe_integer_convert).to_csv(*args, **kwargs) # End of the copied code snippet if implicit_index: zmat_frame = self.change_numbering().zmat_frame nansafe_to_csv( zmat_frame.loc[:, 'atom':], outputfile, sep=str(' '), index=False, header=False, mode='w' ) else: nansafe_to_csv( self.zmat_frame, outputfile, sep=str(' '), index=True, header=False, mode='w' )