Source code for chemcoord.xyz_functions

from __future__ import with_statement
from __future__ import division
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
try:
    # import itertools.imap as map
    import itertools.izip as zip
except ImportError:
    pass
import numpy as np
import pandas as pd
import copy
import collections
from . import _common_class
from . import test
from ._exceptions import PhysicalMeaningError
from . import constants
from . import utilities
# from . import zmat_functions
from . import export
from . import settings
import io
from io import open


def pick(my_set):
    """Returns one element from a set.
    """
    assert type(my_set) is set, 'Pick can be applied only on sets.'
    x = my_set.pop()
    my_set.add(x)
    return x


@export
[docs]class Cartesian(_common_class.common_methods): """The main class for dealing with cartesian Coordinates. """
[docs] def __init__(self, init): """How to initialize a Cartesian instance. Args: frame (pd.DataFrame): A Dataframe with at least the columns ``['atom', 'x', 'y', 'z']``. Where ``'atom'`` is a string for the elementsymbol. Returns: Cartesian: A new cartesian instance. """ try: tmp = init._to_Cartesian() self.frame = tmp.frame.copy() self.shape = self.frame.shape self.n_atoms = self.shape[0] try: self.__bond_dic = tmp.__bond_dic except AttributeError: pass except AttributeError: # Create from pd.DataFrame if not self._is_physical(init.columns): raise PhysicalMeaningError('There are columns missing for a \ meaningful description of a molecule') self.frame = init.copy() self.shape = self.frame.shape self.n_atoms = self.shape[0]
def copy(self): molecule = self.__class__(self.frame) try: molecule.__bond_dic = self.__bond_dic except AttributeError: pass return molecule def _to_Cartesian(self): return self.copy() def _to_ase_Atoms(self): import ase atoms = ''.join(self[:, 'atom']) positions = self.location() return ase.Atoms(atoms, positions) def _give_distance_array(self): """Returns a frame with a column for the distance from origin. """ convert_to = { 'frame': dict(zip(range(self.n_atoms), self.index)), 'array': dict(zip(self.index, range(self.n_atoms)))} location_array = self[:, ['x', 'y', 'z']].values.astype(float) A = np.expand_dims(location_array, axis=1) B = np.expand_dims(location_array, axis=0) C = A - B return_array = np.linalg.norm(C, axis=2) return return_array, convert_to def _overlap(self, bond_size_dic, include=None): """Calculates the overlap of van der Vaals radii. Do not confuse it with overlap of atomic orbitals. This is just a "poor man's overlap" of van der Vaals radii. Args: bond_size_dic (dic): A dictionary mapping from the indices of atoms (integers) to their van der Vaals radius. include (list): The indices between which the overlap should be calculated. If ``None``, the whole index is taken. Returns: tuple: **First element**: overlap_array: A (n_atoms, n_atoms) array that contains the overlap between every atom given in the frame. **Second element**: convert_to: A nested dictionary that gives the possibility to convert the indices from the frame to the overlap_array and back. """ include = self.index if (include is None) else include def summed_bond_size_array(bond_size_dic): bond_size_vector = np.array([bond_size_dic[i] for i in include]) A = np.expand_dims(bond_size_vector, axis=1) B = np.expand_dims(bond_size_vector, axis=0) C = A + B return C bond_size_array = summed_bond_size_array(bond_size_dic) distance_array, convert_to = self[include, :]._give_distance_array() overlap_array = bond_size_array - distance_array return overlap_array, convert_to
[docs] def get_bonds( self, modified_properties=None, maximum_edge_length=25, difference_edge=6, use_valency=False, use_lookup=False, set_lookup=True, divide_et_impera=True, atomic_radius_data=settings.atomic_radius_data): """Returns a dictionary representing the bonds. .. warning:: This function is **not sideeffect free**, since it assigns the output to a variable ``self.__bond_dic`` if ``set_lookup`` is ``True`` (which is the default). This is necessary for performance reasons. The Cartesian().get_bonds() method will use or not use a lookup depending on ``use_lookup``. Greatly increases performance if True, but could introduce bugs in certain situations. Just imagine a situation where the ``Cartesian().frame`` is changed manually. If you apply lateron a method e.g. ``to_zmat()`` that makes use of ``get_bonds()`` the dictionary of the bonds may not represent the actual situation anymore. You have two possibilities to cope with this problem. Either you just re-execute ``get_bonds`` on your specific instance, or you change the ``internally_use_lookup`` option in the settings submodule. Please note that the internal use of the lookup variable greatly improves performance. Args: modified_properties (dic): If you want to change the van der Vaals radius or valency of one or more specific atoms, pass a dictionary that looks like:: modified_properties = {index1 : {'atomic_radius' : 1.5, 'valency' : 8}, ...} For global changes use the constants.py module. maximum_edge_length (float): Maximum length of one edge of a cuboid if ``divide_et_impera`` is ``True``. difference_edge (float): use_valency (bool): If ``True`` atoms can't have more bonds than their valency. This means that the bonds, exceeding the number of valency, with lowest overlap will be cut, although the van der Waals radii overlap. use_lookup (bool): set_lookup (bool): divide_et_impera (bool): Since the calculation of overlaps or distances between atoms scale with :math:`O(n^2)`, it is recommended to split the molecule in smaller cuboids and calculate the bonds in each cuboid. The scaling becomes then :math:`O(n\log(n))`. This approach can lead to problems if ``use_valency`` is ``True``. Bonds from one cuboid to another can not be counted for the valency.. This means that in certain situations some atoms can be oversaturated, although ``use_valency`` is ``True``. atomic_radius_data (str): Defines which column of :attr:`constants.elements` is used. The default is ``atomic_radius_cc`` and can be changed with :attr:`settings.atomic_radius_data`. Compare with :func:`add_data`. Returns: dict: Dictionary mapping from an atom index to the indices of atoms bonded to. """ def preparation_of_variables(modified_properties): bond_dic = dict( zip(self.index, [set([]) for _ in range(self.n_atoms)])) molecule2 = self.add_data(['valency', atomic_radius_data]) valency_dic = dict(zip( molecule2.index, molecule2[:, 'valency'].astype('int64'))) atomic_radius_dic = dict(zip( molecule2.index, molecule2[:, atomic_radius_data])) if modified_properties is None: pass else: for key in modified_properties: valency_dic[key] = modified_properties[key]['valency'] atomic_radius_dic[key] = modified_properties[key][ 'atomic_radius'] return bond_dic, valency_dic, atomic_radius_dic def get_bonds_local( self, bond_dic, valency_dic, atomic_radius_dic, use_valency, index_of_cube=self.index): overlap_array, convert_to = self._overlap( atomic_radius_dic, index_of_cube) np.fill_diagonal(overlap_array, -1.) bin_overlap_array = overlap_array > 0 actual_valency = np.sum(bin_overlap_array, axis=1) theoretical_valency = np.array([valency_dic[key] for key in index_of_cube]) excess_valency = (actual_valency - theoretical_valency) indices_of_oversaturated_atoms = np.nonzero(excess_valency > 0)[0] oversaturated_converted = [ convert_to['frame'][index] for index in indices_of_oversaturated_atoms] if use_valency & (len(indices_of_oversaturated_atoms) > 0): if settings.show_warnings['valency']: warning_string = ( 'Warning: You specified use_valency=True ' 'and provided a geometry with over saturated ' 'atoms. This means that the bonds with lowest ' 'overlap will be cut, although the van der ' "Waals radii overlap. If you don't want to see " "this warning go to settings.py and edit the " "dictionary. Or execute " "cc.settings.show_warnings['valency'] = False." "The problematic indices are:\n") \ + oversaturated_converted.__repr__() print(warning_string) select = np.nonzero(overlap_array[ indices_of_oversaturated_atoms, :]) for index in indices_of_oversaturated_atoms: atoms_bonded_to = np.nonzero( bin_overlap_array[index, :])[0] temp_frame = pd.Series(overlap_array[ index, atoms_bonded_to], index=atoms_bonded_to) temp_frame.sort_values(inplace=True, ascending=False) cut_bonds_to = temp_frame.iloc[ (theoretical_valency[index]):].index overlap_array[index, [cut_bonds_to]] = -1 overlap_array[[cut_bonds_to], index] = -1 bin_overlap_array = overlap_array > 0 if (not use_valency) & (len(indices_of_oversaturated_atoms) > 0): if settings.show_warnings['valency']: warning_string = ( "Warning: You specified use_valency=False (or " "used the default) and provided a geometry with " "over saturated atoms. This means that bonds are " "not cut even if their number exceeds the valency. " "If you don't want to see this warning go to " "settings.py and edit the dictionary. Or execute " "cc.settings.show_warnings['valency'] = False. " "The problematic indices are:\n" ) + oversaturated_converted.__repr__() print(warning_string) def update_dic(bin_overlap_array): a, b = np.nonzero(bin_overlap_array) a, b = ( [convert_to['frame'][key] for key in a], [convert_to['frame'][key] for key in b]) for row, index in enumerate(a): bond_dic[index] |= set([b[row]]) return bond_dic update_dic(bin_overlap_array) return bond_dic def complete_calculation(divide_et_impera): bond_dic, valency_dic, atomic_radius_dic = \ preparation_of_variables(modified_properties) if divide_et_impera: cuboid_dic = self._divide_et_impera( maximum_edge_length, difference_edge) for number, key in enumerate(cuboid_dic): get_bonds_local( self, bond_dic, valency_dic, atomic_radius_dic, use_valency, index_of_cube=cuboid_dic[key][1]) else: get_bonds_local( self, bond_dic, valency_dic, atomic_radius_dic, use_valency) return bond_dic if use_lookup: try: bond_dic = self.__bond_dic except AttributeError: bond_dic = complete_calculation(divide_et_impera) if set_lookup: self.__bond_dic = bond_dic else: bond_dic = complete_calculation(divide_et_impera) if set_lookup: self.__bond_dic = bond_dic return bond_dic
[docs] def _divide_et_impera(self, maximum_edge_length=25., difference_edge=6.): """Returns a molecule split into cuboids. If your algorithm scales with :math:`O(n^2)`. You can use this function as a preprocessing step to make it scaling with :math:`O(n\log(n))`. Args: maximum_edge_length (float): Maximum length of one edge of a cuboid. difference_edge (float): Returns: dict: A dictionary mapping from a 3 tuple of integers to a 2 tuple of sets. The 3 tuple gives the integer numbered coordinates of the cuboids. The first set contains the indices of atoms lying in the cube with a maximum edge length of ``maximum_edge_length``. They are pairwise disjunct and are referred to as small cuboids. The second set contains the indices of atoms lying in the cube with ``maximum_edge_length + difference_edge``. They are a bit larger than the small cuboids and overlap with ``difference_edge / 2``. """ coordinates = ['x', 'y', 'z'] sorted_series = dict(zip( coordinates, [ self[:, axis].sort_values().copy() for axis in coordinates])) convert = dict( (axis, dict(zip(range(self.n_atoms), sorted_series[axis].index))) for axis in coordinates) sorted_arrays = dict( (key, sorted_series[key].values.astype(float)) for key in coordinates) list_of_cuboid_tuples = [] minimum = ( np.array([sorted_arrays[key][0] for key in coordinates]) - np.array([0.01, 0.01, 0.01])) maximum = ( np.array([sorted_arrays[key][-1] for key in coordinates]) + np.array([0.01, 0.01, 0.01])) extent = maximum - minimum steps = np.ceil(extent / maximum_edge_length).astype(int) cube_dic = {} if np.array_equal(steps, np.array([1, 1, 1])): small_cube_index = self.index big_cube_index = small_cube_index cube_dic[(0, 0, 0)] = [small_cube_index, big_cube_index] else: cuboid_diagonal = extent / steps steps = dict((axis, steps[number]) for number, axis in enumerate(coordinates)) edge_small = dict( (axis, cuboid_diagonal[number]) for number, axis in enumerate(coordinates)) edge_big = dict( (axis, (edge_small[axis] + difference_edge)) for axis in coordinates) origin_array = np.empty((steps['x'], steps['y'], steps['z'], 3)) for x_counter in range(steps['x']): for y_counter in range(steps['y']): for z_counter in range(steps['z']): origin_array[x_counter, y_counter, z_counter] = ( minimum + cuboid_diagonal / 2 + np.dot( np.diag([x_counter, y_counter, z_counter]), cuboid_diagonal)) origin1D = {} origin1D['x'] = dict( (counter, origin_array[counter, 0, 0, 0]) for counter in range(steps['x'])) origin1D['y'] = dict( (counter, origin_array[0, counter, 0, 1]) for counter in range(steps['y'])) origin1D['z'] = dict( (counter, origin_array[0, 0, counter, 2]) for counter in range(steps['z'])) indices = dict(zip(coordinates, [{}, {}, {}])) for axis in coordinates: for counter in range(steps[axis]): intervall_small = [ origin1D[axis][counter] - edge_small[axis] / 2, origin1D[axis][counter] + edge_small[axis] / 2] intervall_big = [ origin1D[axis][counter] - edge_big[axis] / 2, origin1D[axis][counter] + edge_big[axis] / 2] bool_vec_small = np.logical_and( intervall_small[0] <= sorted_arrays[axis], sorted_arrays[axis] < intervall_small[1]) bool_vec_big = np.logical_and( intervall_big[0] <= sorted_arrays[axis], sorted_arrays[axis] < intervall_big[1]) index_small = set(np.nonzero(bool_vec_small)[0]) index_small = set( convert[axis][index] for index in index_small) index_big = set(np.nonzero(bool_vec_big)[0]) index_big = set( convert[axis][index] for index in index_big) indices[axis][counter] = [index_small, index_big] for x_counter in range(steps['x']): for y_counter in range(steps['y']): for z_counter in range(steps['z']): small_cube_index = ( indices['x'][x_counter][0] & indices['y'][y_counter][0] & indices['z'][z_counter][0]) big_cube_index = ( indices['x'][x_counter][1] & indices['y'][y_counter][1] & indices['z'][z_counter][1]) cube_dic[(x_counter, y_counter, z_counter)] = ( small_cube_index, big_cube_index) def test_output(cube_dic): for key in cube_dic.keys(): try: assert ( cube_dic[key][0] & cube_dic[previous_key][0] == set([])), \ ('I am sorry Dave. I made a mistake.' 'Report a bug please.') except UnboundLocalError: pass finally: previous_key = key # slows down performance too much # test_output(cube_dic) return cube_dic
[docs] def connected_to( self, index_of_atom, exclude=None, give_only_index=False, follow_bonds=None): """Returns a Cartesian of atoms connected to the specified one. Connected means that a path along covalent bonds exists. Args: index_of_atom (int): exclude (list): Indices in this list are omitted. give_only_index (bool): If ``True`` a set of indices is returned. Otherwise a new Cartesian instance. follow_bonds (int): This option determines how many branches the algorithm follows. If ``None`` it stops after reaching the end in every branch. If you have a single molecule this usually means, that the whole molecule is recovered. Returns: A set of indices or a new Cartesian instance. """ bond_dic = self.get_bonds(use_lookup=True) exclude = set([]) if (exclude is None) else set(exclude) previous_atoms = ( (set([index_of_atom]) | set(bond_dic[index_of_atom])) - exclude) fixed_atoms = set([index_of_atom]) | previous_atoms def new_shell(bond_dic, previous_atoms, fixed_atoms, exclude): before_inserting = len(fixed_atoms) new_atoms = set([]) for index in previous_atoms: new_atoms = new_atoms | ( (bond_dic[index] - exclude) - fixed_atoms) fixed_atoms |= new_atoms after_inserting = len(fixed_atoms) changed = (before_inserting != after_inserting) return new_atoms, fixed_atoms, changed if follow_bonds is None: changed = True while changed: previous_atoms, fixed_atoms, changed = new_shell( bond_dic, previous_atoms, fixed_atoms, exclude) else: assert follow_bonds >= 0, 'follow_bonds has to be positive' fixed_atoms = set( [index_of_atom]) if (follow_bonds == 0) else fixed_atoms for _ in range(follow_bonds - 1): previous_atoms, fixed_atoms, changed = new_shell( bond_dic, previous_atoms, fixed_atoms, exclude) if give_only_index: to_return = fixed_atoms else: to_return = self[fixed_atoms, :] return to_return
[docs] def _preserve_bonds(self, sliced_cartesian): """Is called after cutting geometric shapes. If you want to change the rules how bonds are preserved, when applying e.g. :meth:`Cartesian.cutsphere` this is the function you have to modify. It is recommended to inherit from the Cartesian class to tailor it for your project, instead of modifying the source code of ChemCoord. Args: sliced_frame (Cartesian): Returns: Cartesian: """ included_atoms_set = set(sliced_cartesian.index) assert included_atoms_set.issubset(set(self.index)), \ 'The sliced Cartesian has to be a subset of the bigger frame' included_atoms_list = list(included_atoms_set) bond_dic = self.get_bonds(use_lookup=True) new_atoms = set([]) for atom in included_atoms_set: new_atoms = new_atoms | bond_dic[atom] new_atoms = new_atoms - included_atoms_set while not new_atoms == set([]): index_of_interest = new_atoms.pop() included_atoms_set = ( included_atoms_set | self.connected_to( index_of_interest, exclude=included_atoms_set, give_only_index=True)) new_atoms = new_atoms - included_atoms_set molecule = self[included_atoms_set, :] return molecule
[docs] def cutsphere( self, radius=15., origin=[0., 0., 0.], outside_sliced=True, preserve_bonds=False): """Cuts a sphere specified by origin and radius. Args: radius (float): origin (list): Please note that you can also pass an integer. In this case it is interpreted as the index of the atom which is taken as origin. outside_sliced (bool): Atoms outside/inside the sphere are cut out. preserve_bonds (bool): Do not cut covalent bonds. Returns: Cartesian: """ try: origin[0] except (TypeError, IndexError): origin = self.location(int(origin)) molecule = self.distance_to(origin) if outside_sliced: molecule = molecule[molecule[:, 'distance'] < radius, :] else: molecule = molecule[molecule[:, 'distance'] > radius, :] if preserve_bonds: molecule = self._preserve_bonds(molecule) return molecule
[docs] def cutcuboid( self, a=20, b=None, c=None, origin=[0, 0, 0], outside_sliced=True, preserve_bonds=False): """Cuts a cuboid specified by edge and radius. Args: a (float): Value of the a edge. b (float): Value of the b edge. Takes value of a if None. c (float): Value of the c edge. Takes value of a if None. origin (list): Please note that you can also pass an integer. In this case it is interpreted as the index of the atom which is taken as origin. outside_sliced (bool): Atoms outside/inside the sphere are cut out. preserve_bonds (bool): Do not cut covalent bonds. Returns: Cartesian: """ try: origin[0] except (TypeError, IndexError): origin = self.location(int(origin)) b = a if b is None else b c = a if c is None else c origin = dict(zip(['x', 'y', 'z'], list(origin))) boolean_vector = ( (np.abs((self[:, 'x'] - origin['x'])) < a / 2) & (np.abs((self[:, 'y'] - origin['y'])) < b / 2) & (np.abs((self[:, 'z'] - origin['z'])) < c / 2) ) if outside_sliced: molecule = self[boolean_vector, :] else: molecule = self[~boolean_vector, :] if preserve_bonds: molecule = self._preserve_bonds(molecule) return molecule
[docs] def topologic_center(self): """Returns the average location. Args: None Returns: np.array: """ location_array = self.location() return np.mean(location_array, axis=0)
[docs] def barycenter(self): """Returns the mass weighted average location. Args: None Returns: np.array: """ mass_molecule = self.add_data('mass') mass_vector = mass_molecule[:, 'mass'].values.astype('float64') location_array = mass_molecule.location() barycenter = np.mean(location_array * mass_vector[:, None], axis=0) return barycenter
[docs] def move( self, vector=[0, 0, 0], matrix=np.identity(3), indices=None, copy=False): """Move a Cartesian. The Cartesian is first rotated, mirrored... by the matrix and afterwards translated by the vector Args: vector (np.array): default is np.zeros(3) matrix (np.array): default is np.identity(3) indices (list): Indices to be moved. copy (bool): Atoms are copied or translated to the new location. Returns: Cartesian: """ output = self.copy() indices = self.index if (indices is None) else indices vectors = output[indices, ['x', 'y', 'z']] vectors = np.dot(np.array(matrix), vectors.T).T vectors = vectors + np.array(vector) if copy: max_index = self.index.max() index_for_copied_atoms = range( max_index + 1, max_index + len(indices) + 1 ) temp = self[indices, :].copy() temp.index = index_for_copied_atoms temp[index_for_copied_atoms , ['x', 'y', 'z']] = vectors output = output.append(temp) else: output[indices, ['x', 'y', 'z']] = vectors return output
[docs] def bond_lengths(self, buildlist, start_row=0): """Return the distances between given atoms. In order to know more about the buildlist, go to :func:`to_zmat`. Args: buildlist (np.array): start_row (int): Returns: list: Vector of the distances between the first and second atom of every entry in the buildlist. """ # check sanity of input buildlist = np.array(buildlist) try: buildlist.shape[1] except IndexError: buildlist = buildlist[None, :] else: pass buildlist = np.array(buildlist) own_location = self.location(buildlist[start_row:, 0]) bond_with_location = self.location(buildlist[start_row:, 1]) distance_vector = own_location - bond_with_location distance_list = np.linalg.norm(distance_vector, axis=1) return distance_list
[docs] def angle_degrees(self, buildlist, start_row=0): """Return the angles between given atoms. In order to know more about the buildlist, go to :func:`to_zmat`. Args: buildlist (list): start_row (int): Returns: list: List of the angle between the first, second and third atom of every entry in the buildlist. """ # check sanity of input buildlist = np.array(buildlist) try: buildlist.shape[1] except IndexError: buildlist = buildlist[None, :] else: pass buildlist = np.array(buildlist) own_location = self.location(buildlist[start_row:, 0]) bond_with_location = self.location(buildlist[start_row:, 1]) angle_with_location = self.location(buildlist[start_row:, 2]) BI, BA = ( own_location - bond_with_location, angle_with_location - bond_with_location) bi, ba = ( BI / np.linalg.norm(BI, axis=1)[:, None], BA / np.linalg.norm(BA, axis=1)[:, None]) dot_product = np.sum(bi * ba, axis=1) dot_product[np.isclose(dot_product, 1)] = 1 dot_product[np.isclose(dot_product, -1)] = -1 angles = np.degrees(np.arccos(dot_product)) return angles
[docs] def dihedral_degrees(self, buildlist, start_row=0): """Return the angles between given atoms. In order to know more about the buildlist, go to :func:`to_zmat`. Args: buildlist (list): start_row (int): Returns: list: List of the dihedral between the first, second, third and fourth atom of every entry in the buildlist. """ # check sanity of input buildlist = np.array(buildlist) try: buildlist.shape[1] except IndexError: buildlist = buildlist[None, :] else: pass own_location = self.location(buildlist[start_row:, 0]) bond_with_location = self.location(buildlist[start_row:, 1]) angle_with_location = self.location(buildlist[start_row:, 2]) dihedral_with_location = self.location(buildlist[start_row:, 3]) length = buildlist[start_row:].shape[0] DA = dihedral_with_location - angle_with_location AB = angle_with_location - bond_with_location BI = bond_with_location - own_location N1 = np.cross(DA, AB, axis=1) N2 = np.cross(AB, BI, axis=1) n1 = N1 / np.linalg.norm(N1, axis=1)[:, None] n2 = N2 / np.linalg.norm(N2, axis=1)[:, None] dot_product = np.sum(n1 * n2, axis=1) dot_product[np.isclose(dot_product, 1)] = 1 dot_product[np.isclose(dot_product, -1)] = -1 dihedrals = np.degrees(np.arccos(dot_product)) # the next lines are to test the direction of rotation. # is a dihedral really 90 or 270 degrees? test_where_to_modify = ( np.sum(AB * np.cross(n1, n2, axis=1), axis=1) > 0) where_to_modify = np.nonzero(test_where_to_modify)[0] sign = np.full(length, 1, dtype='float64') sign[where_to_modify] = -1 to_add = np.full(length, 0, dtype='float64') to_add[where_to_modify] = 360 dihedrals = to_add + sign * dihedrals return dihedrals
[docs] def fragmentate(self, give_only_index=False): """Get the indices of non bonded parts in the molecule. Args: give_only_index (bool): If ``True`` a set of indices is returned. Otherwise a new Cartesian instance. Returns: list: A list of sets of indices or new Cartesian instances. """ list_fragment_indices = [] still_to_check = set(self.index) while still_to_check != set([]): indices = self.connected_to( pick(still_to_check), give_only_index=True) still_to_check = still_to_check - indices list_fragment_indices.append(indices) if give_only_index: value_to_return = list_fragment_indices else: value_to_return = [self[indices, :] for indices in list_fragment_indices] return value_to_return
[docs] def get_fragment(self, list_of_indextuples, give_only_index=False): """Get the indices of the atoms in a fragment. The list_of_indextuples contains all bondings from the molecule to the fragment. ``[(1,3), (2,4)]`` means for example that the fragment is connected over two bonds. The first bond is from atom 1 in the molecule to atom 3 in the fragment. The second bond is from atom 2 in the molecule to atom 4 in the fragment. Args: list_of_indextuples (list): give_only_index (bool): If ``True`` a set of indices is returned. Otherwise a new Cartesian instance. Returns: A set of indices or a new Cartesian instance. """ exclude = [tuple[0] for tuple in list_of_indextuples] index_of_atom = list_of_indextuples[0][1] fragment_index = self.connected_to( index_of_atom, exclude=exclude, give_only_index=True) if give_only_index: value_to_return = fragment_index else: value_to_return = self[fragment_index, :] return value_to_return
[docs] def _get_buildlist(self, fixed_buildlist=None): """Create a buildlist for a Zmatrix. Args: fixed_buildlist (np.array): It is possible to provide the beginning of the buildlist. The rest is "figured" out automatically. Returns: np.array: buildlist """ buildlist = np.zeros((self.n_atoms, 4)).astype('int64') if not fixed_buildlist is None: buildlist[:fixed_buildlist.shape[0], :] = fixed_buildlist start_row = fixed_buildlist.shape[0] already_built = set(fixed_buildlist[:, 0]) to_be_built = set(self.index) - already_built convert_index = dict(zip(buildlist[:, 0], range(start_row))) else: start_row = 0 already_built, to_be_built = set([]), set(self.index) convert_index = {} bond_dic = self.get_bonds(use_lookup=True) topologic_center = self.topologic_center() distance_to_topologic_center = self.distance_to(topologic_center) def update(already_built, to_be_built, new_atoms_set): """NOT SIDEEFFECT FREE """ for index in new_atoms_set: already_built.add(index) to_be_built.remove(index) return already_built, to_be_built def from_topologic_center( self, row_in_buildlist, already_built, to_be_built, first_time=False, third_time=False ): index_of_new_atom = distance_to_topologic_center[to_be_built, 'distance'].idxmin() buildlist[row_in_buildlist, 0] = index_of_new_atom convert_index[index_of_new_atom] = row_in_buildlist if not first_time: bond_with = self.distance_to( index_of_new_atom, already_built)[:, 'distance'].idxmin() angle_with = self.distance_to( bond_with, already_built - set([bond_with]) )[:,'distance'].idxmin() buildlist[row_in_buildlist, 1:3] = [ bond_with, angle_with] if not third_time: dihedral_with = self.distance_to( bond_with, already_built - set([bond_with, angle_with]) )[:, 'distance'].idxmin() buildlist[row_in_buildlist, 1:] = [ bond_with, angle_with, dihedral_with] new_row_to_modify = row_in_buildlist + 1 already_built.add(index_of_new_atom) to_be_built.remove(index_of_new_atom) return new_row_to_modify, already_built, to_be_built def second_atom(self, already_built, to_be_built): new_atoms_set = bond_dic[buildlist[0, 0]] - already_built if new_atoms_set != set([]): index_of_new_atom = pick(new_atoms_set) convert_index[index_of_new_atom] = 1 buildlist[1, 0] = index_of_new_atom buildlist[1, 1] = pick(already_built) else: new_row_to_modify, already_built, to_be_built = \ from_topologic_center( self, row_in_buildlist, already_built, to_be_built) if len(new_atoms_set) > 1: use_index = buildlist[0, 0] else: use_index = buildlist[1, 0] new_row_to_modify = 2 already_built.add(index_of_new_atom) to_be_built.remove(index_of_new_atom) return new_row_to_modify, already_built, to_be_built, use_index # TODO (3, 1) if atoms are not connected to it # TODO find nearest atom of 90 def third_atom(self, already_built, to_be_built, use_index): new_atoms_set = bond_dic[use_index] - already_built if new_atoms_set != set([]): index_of_new_atom = pick(new_atoms_set) convert_index[index_of_new_atom] = 2 buildlist[2, 0] = index_of_new_atom buildlist[2, 1] = use_index buildlist[2, 2] = pick(already_built - set([use_index])) if ( self.angle_degrees(buildlist[2, :]) < 10 or self.angle_degrees(buildlist[2, :]) > 170): try: index_of_new_atom = pick( new_atoms_set - set([index_of_new_atom])) convert_index[index_of_new_atom] = 2 buildlist[2, 0] = index_of_new_atom buildlist[2, 1] = use_index buildlist[2, 2] = pick( already_built - set([use_index])) except KeyError: pass already_built.add(index_of_new_atom) to_be_built.remove(index_of_new_atom) else: new_row_to_modify, already_built, to_be_built = \ from_topologic_center( self, 2, already_built, to_be_built, third_time=True) # The two is hardcoded because of third atom. if len(new_atoms_set) > 1: use_index = use_index else: use_index = buildlist[2, 0] new_row_to_modify = 3 return new_row_to_modify, already_built, to_be_built, use_index def pick_new_atoms( self, row_in_buildlist, already_built, to_be_built, use_given_index=None): """Get the indices of new atoms to be put in buildlist. Tries to get the atoms bonded to the one, that was last inserted into the buildlist. If the last atom is the end of a branch, it looks for the index of an atom that is the nearest atom to the topologic center and not built in yet. .. note:: It modifies the buildlist array which is global to this function. Args: row_in_buildlist (int): The row which has to be filled at least. Returns: list: List of modified rows. """ if use_given_index is None: new_atoms_set = ( bond_dic[buildlist[row_in_buildlist-1, 0]] - already_built) if new_atoms_set != set([]): update(already_built, to_be_built, new_atoms_set) new_row_to_modify = row_in_buildlist + len(new_atoms_set) new_atoms_list = list(new_atoms_set) bond_with = buildlist[row_in_buildlist - 1, 0] angle_with = buildlist[convert_index[bond_with], 1] dihedral_with = buildlist[convert_index[bond_with], 2] buildlist[ row_in_buildlist: new_row_to_modify, 0] = new_atoms_list buildlist[ row_in_buildlist: new_row_to_modify, 1] = bond_with buildlist[ row_in_buildlist: new_row_to_modify, 2] = angle_with buildlist[ row_in_buildlist: new_row_to_modify, 3] = dihedral_with for key, value in zip( new_atoms_list, range(row_in_buildlist, new_row_to_modify)): convert_index[key] = value else: new_row_to_modify, already_built, to_be_built = \ from_topologic_center( self, row_in_buildlist, already_built, to_be_built) else: new_atoms_set = bond_dic[use_given_index] - already_built new_row_to_modify = row_in_buildlist + len(new_atoms_set) new_atoms_list = list(new_atoms_set) bond_with = use_given_index angle_with, dihedral_with = ( already_built - set([use_given_index])) buildlist[ row_in_buildlist: new_row_to_modify, 0] = new_atoms_list buildlist[ row_in_buildlist: new_row_to_modify, 1] = bond_with buildlist[ row_in_buildlist: new_row_to_modify, 2] = angle_with buildlist[ row_in_buildlist: new_row_to_modify, 3] = dihedral_with update(already_built, to_be_built, new_atoms_set) for key, value in zip( new_atoms_list, range(row_in_buildlist, new_row_to_modify)): convert_index[key] = value return new_row_to_modify, already_built, to_be_built row = start_row if 0 <= row <= 2: if row == 0 & 0 < buildlist.shape[0]: row, already_built, to_be_built = from_topologic_center( self, row, already_built, to_be_built, first_time=True) if row == 1 & 1 < buildlist.shape[0]: row, already_built, to_be_built, use_index = second_atom( self, already_built, to_be_built) if row == 2 & 2 < buildlist.shape[0]: row, already_built, to_be_built, use_index = third_atom( self, already_built, to_be_built, use_index) if row < buildlist.shape[0]: row, already_built, to_be_built = pick_new_atoms( self, row, already_built, to_be_built, use_index) while row < buildlist.shape[0]: row, already_built, to_be_built = pick_new_atoms( self, row, already_built, to_be_built) return buildlist
[docs] def _clean_dihedral(self, buildlist_to_check): """Reindexes the dihedral defining atom if colinear. Args: buildlist (np.array): Returns: np.array: modified_buildlist """ buildlist = buildlist_to_check.copy() bond_dic = self.get_bonds(use_lookup=True) angles = self.angle_degrees(buildlist[3:, 1:]) test_vector = np.logical_or(170 < angles, angles < 10) problematic_indices = np.nonzero(test_vector)[0] converged = True if len(problematic_indices) == 0 else False # look for index + 3 because index start directly at dihedrals for index in problematic_indices: try: already_tested = set([]) found = False while not found: new_dihedral = pick( (bond_dic[buildlist[index + 3, 2]] - set(buildlist[index + 3, [0, 1, 3]])) - set(buildlist[(index + 3):, 0]) - already_tested) already_tested.add(new_dihedral) temp_buildlist = buildlist[index + 3] temp_buildlist[3] = new_dihedral angle = self.angle_degrees(temp_buildlist[1:]) found = True if 10 < angle < 170 else False except KeyError: origin = buildlist[index + 3, 2] could_be_reference = set(buildlist[: index + 3, 0]) sorted_Cartesian = self.distance_to( origin, indices_of_other_atoms=could_be_reference) sorted_Cartesian.frame = \ sorted_Cartesian.frame.sort_values(by='distance') buildlist_for_new_dihedral_with = np.empty( (len(could_be_reference), 3), dtype='int64') bond_with, angle_with = buildlist[index + 3, [1, 2]] buildlist_for_new_dihedral_with[:, 0] = bond_with buildlist_for_new_dihedral_with[:, 1] = angle_with dihedral_with = sorted_Cartesian.index buildlist_for_new_dihedral_with[:, 2] = dihedral_with angles = self.angle_degrees(buildlist_for_new_dihedral_with) test_vector = np.logical_and(170 > angles, angles > 10) new_dihedral = dihedral_with[np.nonzero(test_vector)[0][0]] buildlist[index + 3, 3] = new_dihedral return buildlist
[docs] def _build_zmat(self, buildlist): """Creates the zmatrix from a buildlist. Args: buildlist (np.array): Returns: Zmat: A new instance of ``Zmat``. """ indexlist = buildlist[:, 0] default_columns = [ 'atom', 'bond_with', 'bond', 'angle_with', 'angle', 'dihedral_with', 'dihedral'] additional_columns = list( set(self.columns) - set(['atom', 'x', 'y', 'z'])) zmat_frame = pd.DataFrame( columns=default_columns + additional_columns, dtype='float', index=indexlist) zmat_frame.loc[:, additional_columns] = self[indexlist, additional_columns] bonds = self.bond_lengths(buildlist, start_row=1) angles = self.angle_degrees(buildlist, start_row=2) dihedrals = self.dihedral_degrees(buildlist, start_row=3) zmat_frame.loc[indexlist, 'atom'] = self[indexlist, 'atom'] zmat_frame.loc[indexlist[1:], 'bond_with'] = buildlist[1:, 1] zmat_frame.loc[indexlist[1:], 'bond'] = bonds zmat_frame.loc[indexlist[2:], 'angle_with'] = buildlist[2:, 2] zmat_frame.loc[indexlist[2:], 'angle'] = angles zmat_frame.loc[indexlist[3:], 'dihedral_with'] = buildlist[3:, 3] zmat_frame.loc[indexlist[3:], 'dihedral'] = dihedrals return zmat_functions.Zmat(zmat_frame)
# TODO docstring
[docs] def to_zmat( self, buildlist=None, fragment_list=None, check_linearity=True): """Transform to internal coordinates. Transforming to internal coordinates involves basically three steps: 1. Define an order of how to build. 2. Check for problematic local linearity. In this algorithm an angle with ``170 < angle < 10`` is assumed to be linear. This is not the mathematical definition, but makes it safer against "floating point noise" 3. Calculate the bond lengths, angles and dihedrals using the references defined in step 1 and 2. In the first two steps a so called ``buildlist`` is created. This is basically a ``np.array`` of shape ``(n_atoms, 4)`` and integer type. The four columns are ``['own_index', 'bond_with', 'angle_with', 'dihedral_with']``. This means that usually the upper right triangle can be any number, because for example the first atom has no other atom as reference. It is important to know, that getting the buildlist is a very costly step since the algoritym tries to make some guesses based on the connectivity to create a "chemical" zmatrix. If you create several zmatrices based on the same references you can save the buildlist of a zmatrix with :meth:`Zmat.build_list`. If you then pass the buildlist as argument to ``to_zmat``, then the algorithm directly starts with step 3. Another thing is that you can specify fragments. For this purpose the function :meth:`Cartesian.get_fragment` is quite handy. An element of fragment_list looks like:: (fragment, connections) Fragment is a ``Cartesian`` instance and connections is a ``(3, 4)`` numpy integer array, that defines how the fragment is connected to the molecule. Args: buildlist (np.array): fragment_list (list): check_linearity (bool): Returns: Zmat: A new instance of ``Zmat``. """ if buildlist is None: if fragment_list is None: buildlist = self._get_buildlist() else: def create_big_molecule(self, fragment_list): def prepare_variables(self, fragment_list): buildlist = np.empty((self.n_atoms, 4), dtype='int64') fragment_index = set([]) for fragment_tpl in fragment_list: fragment_index |= set( fragment_tpl[0].index) big_molecule_index = ( set(self.index) - fragment_index) return buildlist, big_molecule_index buildlist, big_molecule_index = prepare_variables( self, fragment_list) big_molecule = self[big_molecule_index, :] row = big_molecule.n_atoms buildlist[: row, :] = big_molecule._get_buildlist() return buildlist, big_molecule, row def add_fragment( self, fragment_tpl, big_molecule, buildlist, row): next_row = row + fragment_tpl[0].n_atoms buildlist[row: next_row, :] = \ fragment_tpl[0]._get_buildlist( fragment_tpl[1]) return buildlist, big_molecule, row buildlist, big_molecule, row = create_big_molecule( self, fragment_list) for fragment_tpl in fragment_list: buildlist, big_molecule, row = add_fragment( self, fragment_tpl, big_molecule, buildlist, row) if check_linearity: buildlist = self._clean_dihedral(buildlist) zmat = self._build_zmat(buildlist) return zmat
[docs] def inertia(self): """Calculates the inertia tensor and transforms along rotation axes. This function calculates the inertia tensor and returns a 4-tuple. Args: None Returns: dict: The returned dictionary has four possible keys: ``transformed_Cartesian``: A frame that is transformed to the basis spanned by the eigenvectors of the inertia tensor. The x-axis is the axis with the lowest inertia moment, the z-axis the one with the highest. Contains also a column for the mass ``diag_inertia_tensor``: A vector containing the sorted inertia moments after diagonalization. ``inertia_tensor``: The inertia tensor in the old basis. ``eigenvectors``: The eigenvectors of the inertia tensor in the old basis. """ Cartesian_mass = self.add_data('mass') Cartesian_mass = Cartesian_mass.move(vector=-self.barycenter()) frame_mass = Cartesian_mass.frame coordinates = ['x', 'y', 'z'] def kronecker(i, j): """Please note, that it also compares e.g. strings. """ if i == j: return 1 else: return 0 inertia_tensor = np.zeros([3, 3]) for row_index, row_coordinate in enumerate(coordinates): for column_index, column_coordinate in enumerate(coordinates): inertia_tensor[row_index, column_index] = ( frame_mass.loc[:, 'mass'] * ( kronecker(row_index, column_index) * (frame_mass.loc[:, coordinates]**2).sum(axis=1) - (frame_mass.loc[:, row_coordinate] * frame_mass.loc[:, column_coordinate]) )).sum() diag_inertia_tensor, eigenvectors = np.linalg.eig(inertia_tensor) # Sort ascending sorted_index = np.argsort(diag_inertia_tensor) diag_inertia_tensor = diag_inertia_tensor[sorted_index] eigenvectors = eigenvectors[:, sorted_index] new_basis = eigenvectors new_basis = utilities.orthormalize(new_basis) old_basis = np.identity(3) Cartesian_mass = self.basistransform(new_basis, old_basis) dic_of_values = dict(zip([ 'transformed_Cartesian', 'diag_inertia_tensor', 'inertia_tensor', 'eigenvectors'], [Cartesian_mass, diag_inertia_tensor, inertia_tensor, eigenvectors] )) return dic_of_values
[docs] def basistransform( self, new_basis, old_basis=np.identity(3), rotate_only=True): """Transforms the frame to a new basis. This function transforms the cartesian coordinates from an old basis to a new one. Please note that old_basis and new_basis are supposed to have full Rank and consist of three linear independent vectors. If rotate_only is True, it is asserted, that both bases are orthonormal and right handed. Besides all involved matrices are transposed instead of inverted. In some applications this may require the function :func:`utilities.orthonormalize` as a previous step. Args: old_basis (np.array): new_basis (np.array): rotate_only (bool): Returns: Cartesian: The transformed molecule. """ frame = self.frame.copy() old_basis = np.array(old_basis) new_basis = np.array(new_basis) # tuples are extracted row wise # For this reason you need to transpose e.g. ex is the first column # from new_basis if rotate_only: ex, ey, ez = np.transpose(old_basis) v1, v2, v3 = np.transpose(new_basis) assert np.allclose( np.dot(old_basis, np.transpose(old_basis)), np.identity(3)), 'old basis not orthonormal' assert np.allclose( np.dot(new_basis, np.transpose(new_basis)), np.identity(3)), 'new_basis not orthonormal' assert np.allclose( np.cross(ex, ey), ez), 'old_basis not righthanded' assert np.allclose( np.cross(v1, v2), v3), 'new_basis not righthanded' basistransformation = np.dot(new_basis, np.transpose(old_basis)) test_basis = np.dot(np.transpose(basistransformation), new_basis) new_cartesian = self.move(matrix=np.transpose(basistransformation)) else: basistransformation = np.dot(new_basis, np.linalg.inv(old_basis)) test_basis = np.dot(np.linalg.inv(basistransformation), new_basis) new_cartesian = self.move( matrix=np.linalg.inv(basistransformation)) assert np.isclose( test_basis, old_basis).all(), 'transformation did not work' return new_cartesian
[docs] def location(self, indexlist=None): """Returns the location of an atom. You can pass an indexlist or an index. Args: frame (pd.dataframe): indexlist (list): If indexlist is None, the complete index is used. Returns: np.array: A matrix of 3D rowvectors of the location of the atoms specified by indexlist. In the case of one index given a 3D vector is returned one index. """ indexlist = self.index if indexlist is None else indexlist array = self[indexlist, ['x', 'y', 'z']].values.astype(float) return array
[docs] def distance_to(self, origin=[0,0,0], indices_of_other_atoms=None, sort=False): """Returns a Cartesian with a column for the distance from origin. """ try: origin[0] except (TypeError, IndexError): origin = self.location(int(origin)) if indices_of_other_atoms is None: indices_of_other_atoms = self.index origin = np.array(origin, dtype=float) output = self[indices_of_other_atoms, :].copy() other_locations = output.location() output[:, 'distance'] = np.linalg.norm(other_locations - origin, axis=1) if sort: output.sort_values(by='distance', inplace=True) return output
[docs] def change_numbering(self, rename_dict, inplace=False): """Returns the reindexed version of Cartesian. Args: rename_dict (dict): A dictionary mapping integers on integers. Returns: Cartesian: A renamed copy according to the dictionary passed. """ output = self if inplace else self.copy() replace_list = list(rename_dict.keys()) with_list = [rename_dict[key] for key in replace_list] output[:, 'temporary_column'] = output.index output[:, 'temporary_column'].replace(replace_list, with_list, inplace=True) output.set_index('temporary_column', drop=True, inplace=True) output.sort_index(inplace=True) output.index.name = None if not inplace: return output
[docs] def partition_chem_env(self, follow_bonds=4): """This function partitions the molecule into subsets of the same chemical environment. A chemical environment is specified by the number of surrounding atoms of a certain kind around an atom with a certain atomic number represented by a tuple of a string and a frozenset of tuples. The ``follow_bonds`` option determines how many branches the algorithm follows to determine the chemical environment. Example: A carbon atom in ethane has bonds with three hydrogen (atomic number 1) and one carbon atom (atomic number 6). If ``follow_bonds=1`` these are the only atoms we are interested in and the chemical environment is:: ('C', frozenset([('H', 3), ('C', 1)])) If ``follow_bonds=2`` we follow every atom in the chemical enviromment of ``follow_bonds=1`` to their direct neighbours. In the case of ethane this gives:: ('C', frozenset([('H', 6), ('C', 1)])) In the special case of ethane this is the whole molecule; in other cases you can apply this operation recursively and stop after ``follow_bonds`` or after reaching the end of branches. Args: follow_bonds (int): Returns: dict: The output will look like this:: { (element_symbol, frozenset([tuples])) : set([indices]) } A dictionary mapping from a chemical environment to the set of indices of atoms in this environment. """ env_dict = {} def get_chem_env(self, atomseries, index, follow_bonds): indices_of_env_atoms = self.connected_to( index, follow_bonds=follow_bonds, give_only_index=True) indices_of_env_atoms.remove(index) own_symbol, atoms = ( atomseries[index], atomseries[indices_of_env_atoms]) environment = collections.Counter(atoms).most_common() environment = frozenset(environment) return (own_symbol, environment) atomseries = self[:, 'atom'] for index in self.index: chem_env = get_chem_env(self, atomseries, index, follow_bonds) try: env_dict[chem_env].add(index) except KeyError: env_dict[chem_env] = set([index]) return env_dict
[docs] def align(self, Cartesian2, ignore_hydrogens=False): """Aligns two Cartesians. Searches for the optimal rotation matrix that minimizes the RMSD (root mean squared deviation) of ``self`` to Cartesian2. Returns a tuple of copies of ``self`` and ``Cartesian2`` where both are centered around their topologic center and ``Cartesian2`` is aligned along ``self``. Uses the Kabsch algorithm implemented with :func:`utilities.kabsch` Args: Cartesian2 (Cartesian): ignore_hydrogens (bool): Hydrogens are ignored for the RMSD. Returns: tuple: """ molecule1 = self.sort_index() molecule2 = Cartesian2.sort_index() molecule1[:, 'x':'z'] = molecule1[:, 'x':'z'] - molecule1.topologic_center() molecule2[:, 'x':'z'] = molecule2[:, 'x':'z'] - molecule2.topologic_center() if ignore_hydrogens: location1 = molecule1[molecule1[:, 'atom'] != 'H', :].location() location2 = molecule2[molecule2[:, 'atom'] != 'H', :].location() else: location1 = molecule1.location() location2 = molecule2.location() # TODO still to rewrite # U = utilities.kabsch(location2, location1) molecule2[:, ['x', 'y', 'z']] = utilities.rotate(location2, location1) return molecule1, molecule2
[docs] def make_similar(self, Cartesian2, follow_bonds=4, prealign=True): """Similarizes two Cartesians. Returns a reindexed copy of ``Cartesian2`` that minimizes the distance for each atom in the same chemical environemt from ``self`` to ``Cartesian2``. Read more about the definition of the chemical environment in :func:`Cartesian.partition_chem_env` .. warning:: Please check the result with e.g. :func:`Cartesian.move_to()` It is probably necessary to use the function :func:`Cartesian.change_numbering()`. Args: Cartesian2 (Cartesian): max_follow_bonds (int): prealign (bool): The method :func:`Cartesian.align()` is applied before reindexing. Returns: tuple: Aligned copy of ``self`` and aligned + reindexed version of ``Cartesian2`` """ if prealign: molecule1, molecule2_new = self.align(Cartesian2) else: molecule1 = self.copy() # Copy ?? molecule2_new = Cartesian2.copy() partition1 = molecule1.partition_chem_env(follow_bonds) partition2 = molecule2_new.partition_chem_env(follow_bonds) index_dic = {} def make_subset_similar( molecule1, subset1, molecule2, subset2, index_dic): indexlist1 = list(subset1) for index_on_molecule1 in indexlist1: distances_to_atom_on_molecule1 = molecule2_new.distance_to( molecule1.location(index_on_molecule1), subset2, sort=True) index_on_molecule2 = \ distances_to_atom_on_molecule1.frame.iloc[0].name distance_new = distances_to_atom_on_molecule1[index_on_molecule2, 'distance'] location_of_atom2 = distances_to_atom_on_molecule1.location( index_on_molecule2) i = 1 while True: if index_on_molecule2 in index_dic.keys(): location_of_old_atom1 = molecule1.location( index_dic[index_on_molecule2]) distance_old = utilities.distance( location_of_old_atom1, location_of_atom2) if distance_new < distance_old: indexlist1.append(index_dic[index_on_molecule2]) index_dic[index_on_molecule2] = index_on_molecule1 break else: index_on_molecule2 = \ distances_to_atom_on_molecule1.frame.iloc[i].name distance_new = \ distances_to_atom_on_molecule1[index_on_molecule2, 'distance'] location_of_atom2 = \ distances_to_atom_on_molecule1.location( index_on_molecule2) i = i + 1 else: index_dic[index_on_molecule2] = index_on_molecule1 break return index_dic for key in partition1.keys(): assert len(partition1[key]) == len(partition2[key]), \ ( 'You have chemically different molecules, regarding the' 'topology of their connectivity. Perhaps' ' get_bonds(use_valency=False) helps.') index_dic = make_subset_similar( molecule1, partition1[key], molecule2_new, partition2[key], index_dic) new_index = [ index_dic[old_index2] for old_index2 in molecule2_new.index] molecule2_new.index = new_index molecule2_new.sort_index(inplace=True) return molecule1, molecule2_new
[docs] def move_to(self, Cartesian2, step=5, extrapolate=(0, 0)): """Returns list of Cartesians for the movement from self to Cartesian2. Args: Cartesian2 (Cartesian): step (int): extrapolate (tuple): Returns: list: The list contains ``self`` as first and ``Cartesian2`` as last element. The number of intermediate frames is defined by step. Please note, that for this reason: len(list) = (step + 1). The numbers in extrapolate define how many frames are appended to the left and right of the list continuing the movement. """ difference = Cartesian2[:, ['x', 'y', 'z']] - self[:, ['x', 'y', 'z']] step_frame = difference.copy() / step Cartesian_list = [] temp_Cartesian = self.copy() for t in range(-extrapolate[0], step + 1 + extrapolate[1]): temp_Cartesian[:, ['x', 'y', 'z']] = ( self[:, ['x', 'y', 'z']] + step_frame.loc[:, ['x', 'y', 'z']] * t ) Cartesian_list.append(temp_Cartesian) return list_of_cartesians
[docs] def write(self, outputfile, sort_index=True): """Writes the Cartesian into a file. If sort_index is true, the frame is sorted by the index before writing. .. note:: Since it permamently writes a file, this function is strictly speaking **not sideeffect free**. The frame to be written is of course not changed. Args: outputfile (str): sort_index (bool): Returns: None: None """ frame = self.frame[['atom', 'x', 'y', 'z']].copy() if sort_index: frame = frame.sort_index() n_atoms = frame.shape[0] with open(outputfile, mode='w') as f: f.write(str(n_atoms) + 2 * '\n') frame.to_csv( outputfile, sep=str(' '), index=False, header=False, mode='a' ) else: frame = frame.sort_values(by='atom') n_atoms = frame.shape[0] with open(outputfile, mode='w') as f: f.write(str(n_atoms) + 2 * '\n') frame.to_csv( outputfile, sep=str(' '), # https://github.com/pydata/pandas/issues/6035 index=False, header=False, mode='a' )
@classmethod
[docs] def read_xyz(cls, inputfile, pythonic_index=False, get_bonds=True): """Reads a xyz file. Args: inputfile (str): pythonic_index (bool): Returns: Cartesian: """ frame = pd.read_table( inputfile, skiprows=2, comment='#', delim_whitespace=True, names=['atom', 'x', 'y', 'z']) if not pythonic_index: n_atoms = frame.shape[0] frame.index = range(1, n_atoms+1) molecule = cls(frame) if get_bonds: previous_warnings_bool = settings.show_warnings['valency'] settings.show_warnings['valency'] = False molecule.get_bonds( use_lookup=False, set_lookup=True, use_valency=False) settings.show_warnings['valency'] = previous_warnings_bool return molecule
@classmethod
[docs] def read_molden(cls, inputfile, pythonic_index=False, get_bonds=True): """Reads a molden file. Args: inputfile (str): pythonic_index (bool): Returns: list: A list containing Cartesian is returned. """ f = open(inputfile, 'r') found = False while not found: line = f.readline() if line.strip() == '[N_GEO]': found = True number_of_molecules = int(f.readline().strip()) found = False while not found: line = f.readline() if line.strip() == '[GEOMETRIES] (XYZ)': found = True current_line = f.tell() number_of_atoms = int(f.readline().strip()) f.seek(current_line) for i in range(number_of_molecules): molecule_in = [f.readline() for j in range(number_of_atoms + 2)] molecule_in = ''.join(molecule_in) molecule_in = io.StringIO(molecule_in) molecule = cls.read_xyz(molecule_in, pythonic_index=pythonic_index, get_bonds=get_bonds) try: list_of_cartesians.append(molecule) except NameError: list_of_cartesians = [molecule] f.close() return list_of_cartesians
@staticmethod def _write_molden(cartesian_list, outputfile): """Writes a list of Cartesians into a molden file. .. note:: Since it permamently writes a file, this function is strictly speaking **not sideeffect free**. The frame to be written is of course not changed. Args: cartesian_list (list): outputfile (str): Returns: None: """ framelist = [molecule.frame for molecule in cartesian_list] n_frames = len(framelist) n_atoms = framelist[0].shape[0] string = """[MOLDEN FORMAT] [N_GEO] """ values = n_frames * '1\n' string = ( string + str(n_frames) + '\n[GEOCONV]\nenergy\n' + values + 'max-force\n' + values + 'rms-force\n' + values + '[GEOMETRIES] (XYZ)\n') with open(outputfile, mode='w') as f: f.write(string) for frame in framelist: frame = frame.sort_index() n_atoms = frame.shape[0] with open(outputfile, mode='a') as f: f.write(str(n_atoms) + 2 * '\n') frame.to_csv( outputfile, sep=str(' '), index=False, header=False, mode='a')
[docs]def write_molden(cartesian_list, outputfile): """Writes a list of Cartesians into a molden file. .. note:: Since it permamently writes a file, this function is strictly speaking **not sideeffect free**. The frame to be written is of course not changed. Args: cartesian_list (list): outputfile (str): Returns: None: """ cartesian_list[0]._write_molden(cartesian_list, outputfile)
[docs]def read_xyz(inputfile, pythonic_index=False, get_bonds=True): """Reads a xyz file. .. note:: This function calls in the background :func:`Cartesian.read_xyz`. If you inherited from :class:`Cartesian` to tailor it for your project, you have to use this method as a constructor. Otherwise you can choose. Args: inputfile (str): pythonic_index (bool): Returns: Cartesian: """ molecule = Cartesian.read_xyz( inputfile, pythonic_index=pythonic_index, get_bonds=get_bonds) return molecule
[docs]def read_molden(inputfile, pythonic_index=False, get_bonds=True): """Reads a molden file. Args: inputfile (str): pythonic_index (bool): Returns: list: A list containing Cartesian is returned. """ list_of_cartesians = Cartesian.read_molden( inputfile, pythonic_index=pythonic_index, get_bonds=get_bonds) return list_of_cartesians