from __future__ import with_statement
from __future__ import division
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
try:
# import itertools.imap as map
import itertools.izip as zip
except ImportError:
pass
import numpy as np
import pandas as pd
import copy
import collections
from . import _common_class
from . import test
from ._exceptions import PhysicalMeaningError
from . import constants
from . import utilities
# from . import zmat_functions
from . import export
from . import settings
import io
from io import open
def pick(my_set):
"""Returns one element from a set.
"""
assert type(my_set) is set, 'Pick can be applied only on sets.'
x = my_set.pop()
my_set.add(x)
return x
@export
[docs]class Cartesian(_common_class.common_methods):
"""The main class for dealing with cartesian Coordinates.
"""
[docs] def __init__(self, init):
"""How to initialize a Cartesian instance.
Args:
frame (pd.DataFrame): A Dataframe with at least the
columns ``['atom', 'x', 'y', 'z']``.
Where ``'atom'`` is a string for the elementsymbol.
Returns:
Cartesian: A new cartesian instance.
"""
try:
tmp = init._to_Cartesian()
self.frame = tmp.frame.copy()
self.shape = self.frame.shape
self.n_atoms = self.shape[0]
try:
self.__bond_dic = tmp.__bond_dic
except AttributeError:
pass
except AttributeError:
# Create from pd.DataFrame
if not self._is_physical(init.columns):
raise PhysicalMeaningError('There are columns missing for a \
meaningful description of a molecule')
self.frame = init.copy()
self.shape = self.frame.shape
self.n_atoms = self.shape[0]
def copy(self):
molecule = self.__class__(self.frame)
try:
molecule.__bond_dic = self.__bond_dic
except AttributeError:
pass
return molecule
def _to_Cartesian(self):
return self.copy()
def _to_ase_Atoms(self):
import ase
atoms = ''.join(self[:, 'atom'])
positions = self.location()
return ase.Atoms(atoms, positions)
def _give_distance_array(self):
"""Returns a frame with a column for the distance from origin.
"""
convert_to = {
'frame': dict(zip(range(self.n_atoms), self.index)),
'array': dict(zip(self.index, range(self.n_atoms)))}
location_array = self[:, ['x', 'y', 'z']].values.astype(float)
A = np.expand_dims(location_array, axis=1)
B = np.expand_dims(location_array, axis=0)
C = A - B
return_array = np.linalg.norm(C, axis=2)
return return_array, convert_to
def _overlap(self, bond_size_dic, include=None):
"""Calculates the overlap of van der Vaals radii.
Do not confuse it with overlap of atomic orbitals.
This is just a "poor man's overlap" of van der Vaals radii.
Args:
bond_size_dic (dic): A dictionary mapping from the
indices of atoms (integers) to their van der Vaals
radius.
include (list): The indices between which the overlap
should be calculated. If ``None``, the whole index is
taken.
Returns:
tuple: **First element**: overlap_array:
A (n_atoms, n_atoms) array that contains the overlap
between every atom given in the frame.
**Second element**: convert_to: A nested dictionary
that gives the possibility to convert the indices
from the frame to the overlap_array and back.
"""
include = self.index if (include is None) else include
def summed_bond_size_array(bond_size_dic):
bond_size_vector = np.array([bond_size_dic[i] for i in include])
A = np.expand_dims(bond_size_vector, axis=1)
B = np.expand_dims(bond_size_vector, axis=0)
C = A + B
return C
bond_size_array = summed_bond_size_array(bond_size_dic)
distance_array, convert_to = self[include, :]._give_distance_array()
overlap_array = bond_size_array - distance_array
return overlap_array, convert_to
[docs] def get_bonds(
self,
modified_properties=None,
maximum_edge_length=25,
difference_edge=6,
use_valency=False,
use_lookup=False,
set_lookup=True,
divide_et_impera=True,
atomic_radius_data=settings.atomic_radius_data):
"""Returns a dictionary representing the bonds.
.. warning:: This function is **not sideeffect free**, since it
assigns the output to a variable ``self.__bond_dic`` if
``set_lookup`` is ``True`` (which is the default). This is
necessary for performance reasons.
The Cartesian().get_bonds() method will use or not use a lookup
depending on ``use_lookup``. Greatly increases performance if
True, but could introduce bugs in certain situations.
Just imagine a situation where the ``Cartesian().frame`` is
changed manually. If you apply lateron a method e.g. ``to_zmat()``
that makes use of ``get_bonds()`` the dictionary of the bonds
may not represent the actual situation anymore.
You have two possibilities to cope with this problem.
Either you just re-execute ``get_bonds`` on your specific instance,
or you change the ``internally_use_lookup`` option in the settings
submodule. Please note that the internal use of the lookup variable
greatly improves performance.
Args:
modified_properties (dic): If you want to change the van der
Vaals radius or valency of one or more specific atoms, pass a
dictionary that looks like::
modified_properties = {index1 :
{'atomic_radius' : 1.5, 'valency' : 8}, ...}
For global changes use the constants.py module.
maximum_edge_length (float): Maximum length of one edge of a
cuboid if ``divide_et_impera`` is ``True``.
difference_edge (float):
use_valency (bool): If ``True`` atoms can't have more bonds than
their valency. This means that the bonds, exceeding the number
of valency, with lowest overlap will be cut, although the
van der Waals radii overlap.
use_lookup (bool):
set_lookup (bool):
divide_et_impera (bool): Since the calculation of overlaps or
distances between atoms scale with :math:`O(n^2)`, it is
recommended to split the molecule in smaller cuboids and
calculate the bonds in each cuboid. The scaling becomes
then :math:`O(n\log(n))`. This approach can lead to problems
if ``use_valency`` is ``True``. Bonds from one cuboid to
another can not be counted for the valency.. This means that
in certain situations some atoms can be oversaturated, although
``use_valency`` is ``True``.
atomic_radius_data (str): Defines which column of
:attr:`constants.elements` is used. The default is
``atomic_radius_cc`` and can be changed with
:attr:`settings.atomic_radius_data`. Compare with
:func:`add_data`.
Returns:
dict: Dictionary mapping from an atom index to the indices of atoms
bonded to.
"""
def preparation_of_variables(modified_properties):
bond_dic = dict(
zip(self.index, [set([]) for _ in range(self.n_atoms)]))
molecule2 = self.add_data(['valency', atomic_radius_data])
valency_dic = dict(zip(
molecule2.index, molecule2[:, 'valency'].astype('int64')))
atomic_radius_dic = dict(zip(
molecule2.index, molecule2[:, atomic_radius_data]))
if modified_properties is None:
pass
else:
for key in modified_properties:
valency_dic[key] = modified_properties[key]['valency']
atomic_radius_dic[key] = modified_properties[key][
'atomic_radius']
return bond_dic, valency_dic, atomic_radius_dic
def get_bonds_local(
self,
bond_dic,
valency_dic,
atomic_radius_dic,
use_valency,
index_of_cube=self.index):
overlap_array, convert_to = self._overlap(
atomic_radius_dic, index_of_cube)
np.fill_diagonal(overlap_array, -1.)
bin_overlap_array = overlap_array > 0
actual_valency = np.sum(bin_overlap_array, axis=1)
theoretical_valency = np.array([valency_dic[key]
for key in index_of_cube])
excess_valency = (actual_valency - theoretical_valency)
indices_of_oversaturated_atoms = np.nonzero(excess_valency > 0)[0]
oversaturated_converted = [
convert_to['frame'][index]
for index in indices_of_oversaturated_atoms]
if use_valency & (len(indices_of_oversaturated_atoms) > 0):
if settings.show_warnings['valency']:
warning_string = (
'Warning: You specified use_valency=True '
'and provided a geometry with over saturated '
'atoms. This means that the bonds with lowest '
'overlap will be cut, although the van der '
"Waals radii overlap. If you don't want to see "
"this warning go to settings.py and edit the "
"dictionary. Or execute "
"cc.settings.show_warnings['valency'] = False."
"The problematic indices are:\n") \
+ oversaturated_converted.__repr__()
print(warning_string)
select = np.nonzero(overlap_array[
indices_of_oversaturated_atoms, :])
for index in indices_of_oversaturated_atoms:
atoms_bonded_to = np.nonzero(
bin_overlap_array[index, :])[0]
temp_frame = pd.Series(overlap_array[
index, atoms_bonded_to], index=atoms_bonded_to)
temp_frame.sort_values(inplace=True, ascending=False)
cut_bonds_to = temp_frame.iloc[
(theoretical_valency[index]):].index
overlap_array[index, [cut_bonds_to]] = -1
overlap_array[[cut_bonds_to], index] = -1
bin_overlap_array = overlap_array > 0
if (not use_valency) & (len(indices_of_oversaturated_atoms) > 0):
if settings.show_warnings['valency']:
warning_string = (
"Warning: You specified use_valency=False (or "
"used the default) and provided a geometry with "
"over saturated atoms. This means that bonds are "
"not cut even if their number exceeds the valency. "
"If you don't want to see this warning go to "
"settings.py and edit the dictionary. Or execute "
"cc.settings.show_warnings['valency'] = False. "
"The problematic indices are:\n"
) + oversaturated_converted.__repr__()
print(warning_string)
def update_dic(bin_overlap_array):
a, b = np.nonzero(bin_overlap_array)
a, b = (
[convert_to['frame'][key] for key in a],
[convert_to['frame'][key] for key in b])
for row, index in enumerate(a):
bond_dic[index] |= set([b[row]])
return bond_dic
update_dic(bin_overlap_array)
return bond_dic
def complete_calculation(divide_et_impera):
bond_dic, valency_dic, atomic_radius_dic = \
preparation_of_variables(modified_properties)
if divide_et_impera:
cuboid_dic = self._divide_et_impera(
maximum_edge_length, difference_edge)
for number, key in enumerate(cuboid_dic):
get_bonds_local(
self, bond_dic, valency_dic,
atomic_radius_dic, use_valency,
index_of_cube=cuboid_dic[key][1])
else:
get_bonds_local(
self, bond_dic, valency_dic,
atomic_radius_dic, use_valency)
return bond_dic
if use_lookup:
try:
bond_dic = self.__bond_dic
except AttributeError:
bond_dic = complete_calculation(divide_et_impera)
if set_lookup:
self.__bond_dic = bond_dic
else:
bond_dic = complete_calculation(divide_et_impera)
if set_lookup:
self.__bond_dic = bond_dic
return bond_dic
[docs] def _divide_et_impera(self, maximum_edge_length=25., difference_edge=6.):
"""Returns a molecule split into cuboids.
If your algorithm scales with :math:`O(n^2)`.
You can use this function as a preprocessing step to make it
scaling with :math:`O(n\log(n))`.
Args:
maximum_edge_length (float): Maximum length of one edge
of a cuboid. difference_edge (float):
Returns:
dict: A dictionary mapping from a 3 tuple of integers
to a 2 tuple of sets. The 3 tuple gives the integer
numbered coordinates of the cuboids. The first set
contains the indices of atoms lying in the cube with
a maximum edge length of ``maximum_edge_length``. They
are pairwise disjunct and are referred to as small
cuboids. The second set contains the indices of atoms
lying in the cube with ``maximum_edge_length +
difference_edge``. They are a bit larger than the small
cuboids and overlap with ``difference_edge / 2``.
"""
coordinates = ['x', 'y', 'z']
sorted_series = dict(zip(
coordinates, [
self[:, axis].sort_values().copy()
for axis in coordinates]))
convert = dict(
(axis, dict(zip(range(self.n_atoms), sorted_series[axis].index)))
for axis in coordinates)
sorted_arrays = dict(
(key, sorted_series[key].values.astype(float))
for key in coordinates)
list_of_cuboid_tuples = []
minimum = (
np.array([sorted_arrays[key][0] for key in coordinates])
- np.array([0.01, 0.01, 0.01]))
maximum = (
np.array([sorted_arrays[key][-1] for key in coordinates])
+ np.array([0.01, 0.01, 0.01]))
extent = maximum - minimum
steps = np.ceil(extent / maximum_edge_length).astype(int)
cube_dic = {}
if np.array_equal(steps, np.array([1, 1, 1])):
small_cube_index = self.index
big_cube_index = small_cube_index
cube_dic[(0, 0, 0)] = [small_cube_index, big_cube_index]
else:
cuboid_diagonal = extent / steps
steps = dict((axis, steps[number])
for number, axis in enumerate(coordinates))
edge_small = dict(
(axis, cuboid_diagonal[number])
for number, axis in enumerate(coordinates))
edge_big = dict(
(axis, (edge_small[axis] + difference_edge))
for axis in coordinates)
origin_array = np.empty((steps['x'], steps['y'], steps['z'], 3))
for x_counter in range(steps['x']):
for y_counter in range(steps['y']):
for z_counter in range(steps['z']):
origin_array[x_counter, y_counter, z_counter] = (
minimum + cuboid_diagonal / 2
+ np.dot(
np.diag([x_counter, y_counter, z_counter]),
cuboid_diagonal))
origin1D = {}
origin1D['x'] = dict(
(counter, origin_array[counter, 0, 0, 0])
for counter in range(steps['x']))
origin1D['y'] = dict(
(counter, origin_array[0, counter, 0, 1])
for counter in range(steps['y']))
origin1D['z'] = dict(
(counter, origin_array[0, 0, counter, 2])
for counter in range(steps['z']))
indices = dict(zip(coordinates, [{}, {}, {}]))
for axis in coordinates:
for counter in range(steps[axis]):
intervall_small = [
origin1D[axis][counter] - edge_small[axis] / 2,
origin1D[axis][counter] + edge_small[axis] / 2]
intervall_big = [
origin1D[axis][counter] - edge_big[axis] / 2,
origin1D[axis][counter] + edge_big[axis] / 2]
bool_vec_small = np.logical_and(
intervall_small[0] <= sorted_arrays[axis],
sorted_arrays[axis] < intervall_small[1])
bool_vec_big = np.logical_and(
intervall_big[0] <= sorted_arrays[axis],
sorted_arrays[axis] < intervall_big[1])
index_small = set(np.nonzero(bool_vec_small)[0])
index_small = set(
convert[axis][index] for index in index_small)
index_big = set(np.nonzero(bool_vec_big)[0])
index_big = set(
convert[axis][index] for index in index_big)
indices[axis][counter] = [index_small, index_big]
for x_counter in range(steps['x']):
for y_counter in range(steps['y']):
for z_counter in range(steps['z']):
small_cube_index = (
indices['x'][x_counter][0]
& indices['y'][y_counter][0]
& indices['z'][z_counter][0])
big_cube_index = (
indices['x'][x_counter][1]
& indices['y'][y_counter][1]
& indices['z'][z_counter][1])
cube_dic[(x_counter, y_counter, z_counter)] = (
small_cube_index, big_cube_index)
def test_output(cube_dic):
for key in cube_dic.keys():
try:
assert (
cube_dic[key][0]
& cube_dic[previous_key][0] == set([])), \
('I am sorry Dave. I made a mistake.'
'Report a bug please.')
except UnboundLocalError:
pass
finally:
previous_key = key
# slows down performance too much
# test_output(cube_dic)
return cube_dic
[docs] def connected_to(
self, index_of_atom,
exclude=None,
give_only_index=False,
follow_bonds=None):
"""Returns a Cartesian of atoms connected to the specified
one.
Connected means that a path along covalent bonds exists.
Args:
index_of_atom (int):
exclude (list): Indices in this list are omitted.
give_only_index (bool): If ``True`` a set of indices is
returned. Otherwise a new Cartesian instance.
follow_bonds (int): This option determines how many
branches the algorithm follows. If ``None`` it stops
after reaching the end in every branch. If you have a
single molecule this usually means, that the whole
molecule is recovered.
Returns:
A set of indices or a new Cartesian instance.
"""
bond_dic = self.get_bonds(use_lookup=True)
exclude = set([]) if (exclude is None) else set(exclude)
previous_atoms = (
(set([index_of_atom]) | set(bond_dic[index_of_atom])) - exclude)
fixed_atoms = set([index_of_atom]) | previous_atoms
def new_shell(bond_dic, previous_atoms, fixed_atoms, exclude):
before_inserting = len(fixed_atoms)
new_atoms = set([])
for index in previous_atoms:
new_atoms = new_atoms | (
(bond_dic[index] - exclude) - fixed_atoms)
fixed_atoms |= new_atoms
after_inserting = len(fixed_atoms)
changed = (before_inserting != after_inserting)
return new_atoms, fixed_atoms, changed
if follow_bonds is None:
changed = True
while changed:
previous_atoms, fixed_atoms, changed = new_shell(
bond_dic, previous_atoms, fixed_atoms, exclude)
else:
assert follow_bonds >= 0, 'follow_bonds has to be positive'
fixed_atoms = set(
[index_of_atom]) if (follow_bonds == 0) else fixed_atoms
for _ in range(follow_bonds - 1):
previous_atoms, fixed_atoms, changed = new_shell(
bond_dic, previous_atoms, fixed_atoms, exclude)
if give_only_index:
to_return = fixed_atoms
else:
to_return = self[fixed_atoms, :]
return to_return
[docs] def _preserve_bonds(self, sliced_cartesian):
"""Is called after cutting geometric shapes.
If you want to change the rules how bonds are preserved, when
applying e.g. :meth:`Cartesian.cutsphere` this is the
function you have to modify.
It is recommended to inherit from the Cartesian class to
tailor it for your project, instead of modifying the
source code of ChemCoord.
Args:
sliced_frame (Cartesian):
Returns:
Cartesian:
"""
included_atoms_set = set(sliced_cartesian.index)
assert included_atoms_set.issubset(set(self.index)), \
'The sliced Cartesian has to be a subset of the bigger frame'
included_atoms_list = list(included_atoms_set)
bond_dic = self.get_bonds(use_lookup=True)
new_atoms = set([])
for atom in included_atoms_set:
new_atoms = new_atoms | bond_dic[atom]
new_atoms = new_atoms - included_atoms_set
while not new_atoms == set([]):
index_of_interest = new_atoms.pop()
included_atoms_set = (
included_atoms_set |
self.connected_to(
index_of_interest,
exclude=included_atoms_set,
give_only_index=True))
new_atoms = new_atoms - included_atoms_set
molecule = self[included_atoms_set, :]
return molecule
[docs] def cutsphere(
self,
radius=15.,
origin=[0., 0., 0.],
outside_sliced=True,
preserve_bonds=False):
"""Cuts a sphere specified by origin and radius.
Args:
radius (float):
origin (list): Please note that you can also pass an
integer. In this case it is interpreted as the
index of the atom which is taken as origin.
outside_sliced (bool): Atoms outside/inside the sphere
are cut out.
preserve_bonds (bool): Do not cut covalent bonds.
Returns:
Cartesian:
"""
try:
origin[0]
except (TypeError, IndexError):
origin = self.location(int(origin))
molecule = self.distance_to(origin)
if outside_sliced:
molecule = molecule[molecule[:, 'distance'] < radius, :]
else:
molecule = molecule[molecule[:, 'distance'] > radius, :]
if preserve_bonds:
molecule = self._preserve_bonds(molecule)
return molecule
[docs] def cutcuboid(
self,
a=20,
b=None,
c=None,
origin=[0, 0, 0],
outside_sliced=True,
preserve_bonds=False):
"""Cuts a cuboid specified by edge and radius.
Args:
a (float): Value of the a edge.
b (float): Value of the b edge. Takes value of a if None.
c (float): Value of the c edge. Takes value of a if None.
origin (list): Please note that you can also pass an
integer. In this case it is interpreted as the index
of the atom which is taken as origin.
outside_sliced (bool): Atoms outside/inside the sphere are
cut out.
preserve_bonds (bool): Do not cut covalent bonds.
Returns:
Cartesian:
"""
try:
origin[0]
except (TypeError, IndexError):
origin = self.location(int(origin))
b = a if b is None else b
c = a if c is None else c
origin = dict(zip(['x', 'y', 'z'], list(origin)))
boolean_vector = (
(np.abs((self[:, 'x'] - origin['x'])) < a / 2)
& (np.abs((self[:, 'y'] - origin['y'])) < b / 2)
& (np.abs((self[:, 'z'] - origin['z'])) < c / 2)
)
if outside_sliced:
molecule = self[boolean_vector, :]
else:
molecule = self[~boolean_vector, :]
if preserve_bonds:
molecule = self._preserve_bonds(molecule)
return molecule
[docs] def topologic_center(self):
"""Returns the average location.
Args:
None
Returns:
np.array:
"""
location_array = self.location()
return np.mean(location_array, axis=0)
[docs] def barycenter(self):
"""Returns the mass weighted average location.
Args:
None
Returns:
np.array:
"""
mass_molecule = self.add_data('mass')
mass_vector = mass_molecule[:, 'mass'].values.astype('float64')
location_array = mass_molecule.location()
barycenter = np.mean(location_array * mass_vector[:, None], axis=0)
return barycenter
[docs] def move(
self,
vector=[0, 0, 0],
matrix=np.identity(3),
indices=None,
copy=False):
"""Move a Cartesian.
The Cartesian is first rotated, mirrored... by the matrix
and afterwards translated by the vector
Args:
vector (np.array): default is np.zeros(3)
matrix (np.array): default is np.identity(3)
indices (list): Indices to be moved.
copy (bool): Atoms are copied or translated to the new location.
Returns:
Cartesian:
"""
output = self.copy()
indices = self.index if (indices is None) else indices
vectors = output[indices, ['x', 'y', 'z']]
vectors = np.dot(np.array(matrix), vectors.T).T
vectors = vectors + np.array(vector)
if copy:
max_index = self.index.max()
index_for_copied_atoms = range(
max_index + 1, max_index + len(indices) + 1
)
temp = self[indices, :].copy()
temp.index = index_for_copied_atoms
temp[index_for_copied_atoms , ['x', 'y', 'z']] = vectors
output = output.append(temp)
else:
output[indices, ['x', 'y', 'z']] = vectors
return output
[docs] def bond_lengths(self, buildlist, start_row=0):
"""Return the distances between given atoms.
In order to know more about the buildlist, go to
:func:`to_zmat`.
Args:
buildlist (np.array):
start_row (int):
Returns:
list: Vector of the distances between the first and second
atom of every entry in the buildlist.
"""
# check sanity of input
buildlist = np.array(buildlist)
try:
buildlist.shape[1]
except IndexError:
buildlist = buildlist[None, :]
else:
pass
buildlist = np.array(buildlist)
own_location = self.location(buildlist[start_row:, 0])
bond_with_location = self.location(buildlist[start_row:, 1])
distance_vector = own_location - bond_with_location
distance_list = np.linalg.norm(distance_vector, axis=1)
return distance_list
[docs] def angle_degrees(self, buildlist, start_row=0):
"""Return the angles between given atoms.
In order to know more about the buildlist, go to :func:`to_zmat`.
Args:
buildlist (list):
start_row (int):
Returns:
list: List of the angle between the first, second and
third atom of every entry in the buildlist.
"""
# check sanity of input
buildlist = np.array(buildlist)
try:
buildlist.shape[1]
except IndexError:
buildlist = buildlist[None, :]
else:
pass
buildlist = np.array(buildlist)
own_location = self.location(buildlist[start_row:, 0])
bond_with_location = self.location(buildlist[start_row:, 1])
angle_with_location = self.location(buildlist[start_row:, 2])
BI, BA = (
own_location - bond_with_location,
angle_with_location - bond_with_location)
bi, ba = (
BI / np.linalg.norm(BI, axis=1)[:, None],
BA / np.linalg.norm(BA, axis=1)[:, None])
dot_product = np.sum(bi * ba, axis=1)
dot_product[np.isclose(dot_product, 1)] = 1
dot_product[np.isclose(dot_product, -1)] = -1
angles = np.degrees(np.arccos(dot_product))
return angles
[docs] def dihedral_degrees(self, buildlist, start_row=0):
"""Return the angles between given atoms.
In order to know more about the buildlist, go to :func:`to_zmat`.
Args:
buildlist (list):
start_row (int):
Returns:
list: List of the dihedral between the first, second,
third and fourth atom of every entry in the buildlist.
"""
# check sanity of input
buildlist = np.array(buildlist)
try:
buildlist.shape[1]
except IndexError:
buildlist = buildlist[None, :]
else:
pass
own_location = self.location(buildlist[start_row:, 0])
bond_with_location = self.location(buildlist[start_row:, 1])
angle_with_location = self.location(buildlist[start_row:, 2])
dihedral_with_location = self.location(buildlist[start_row:, 3])
length = buildlist[start_row:].shape[0]
DA = dihedral_with_location - angle_with_location
AB = angle_with_location - bond_with_location
BI = bond_with_location - own_location
N1 = np.cross(DA, AB, axis=1)
N2 = np.cross(AB, BI, axis=1)
n1 = N1 / np.linalg.norm(N1, axis=1)[:, None]
n2 = N2 / np.linalg.norm(N2, axis=1)[:, None]
dot_product = np.sum(n1 * n2, axis=1)
dot_product[np.isclose(dot_product, 1)] = 1
dot_product[np.isclose(dot_product, -1)] = -1
dihedrals = np.degrees(np.arccos(dot_product))
# the next lines are to test the direction of rotation.
# is a dihedral really 90 or 270 degrees?
test_where_to_modify = (
np.sum(AB * np.cross(n1, n2, axis=1), axis=1) > 0)
where_to_modify = np.nonzero(test_where_to_modify)[0]
sign = np.full(length, 1, dtype='float64')
sign[where_to_modify] = -1
to_add = np.full(length, 0, dtype='float64')
to_add[where_to_modify] = 360
dihedrals = to_add + sign * dihedrals
return dihedrals
[docs] def fragmentate(self, give_only_index=False):
"""Get the indices of non bonded parts in the molecule.
Args:
give_only_index (bool): If ``True`` a set of indices is returned.
Otherwise a new Cartesian instance.
Returns:
list: A list of sets of indices or new Cartesian instances.
"""
list_fragment_indices = []
still_to_check = set(self.index)
while still_to_check != set([]):
indices = self.connected_to(
pick(still_to_check),
give_only_index=True)
still_to_check = still_to_check - indices
list_fragment_indices.append(indices)
if give_only_index:
value_to_return = list_fragment_indices
else:
value_to_return = [self[indices, :] for indices in list_fragment_indices]
return value_to_return
[docs] def get_fragment(self, list_of_indextuples, give_only_index=False):
"""Get the indices of the atoms in a fragment.
The list_of_indextuples contains all bondings from the
molecule to the fragment. ``[(1,3), (2,4)]`` means
for example that the fragment is connected over two
bonds. The first bond is from atom 1 in the molecule
to atom 3 in the fragment. The second bond is from atom
2 in the molecule to atom 4 in the fragment.
Args:
list_of_indextuples (list):
give_only_index (bool): If ``True`` a set of indices
is returned. Otherwise a new Cartesian instance.
Returns:
A set of indices or a new Cartesian instance.
"""
exclude = [tuple[0] for tuple in list_of_indextuples]
index_of_atom = list_of_indextuples[0][1]
fragment_index = self.connected_to(
index_of_atom, exclude=exclude, give_only_index=True)
if give_only_index:
value_to_return = fragment_index
else:
value_to_return = self[fragment_index, :]
return value_to_return
[docs] def _get_buildlist(self, fixed_buildlist=None):
"""Create a buildlist for a Zmatrix.
Args:
fixed_buildlist (np.array): It is possible to provide the
beginning of the buildlist. The rest is "figured" out
automatically.
Returns:
np.array: buildlist
"""
buildlist = np.zeros((self.n_atoms, 4)).astype('int64')
if not fixed_buildlist is None:
buildlist[:fixed_buildlist.shape[0], :] = fixed_buildlist
start_row = fixed_buildlist.shape[0]
already_built = set(fixed_buildlist[:, 0])
to_be_built = set(self.index) - already_built
convert_index = dict(zip(buildlist[:, 0], range(start_row)))
else:
start_row = 0
already_built, to_be_built = set([]), set(self.index)
convert_index = {}
bond_dic = self.get_bonds(use_lookup=True)
topologic_center = self.topologic_center()
distance_to_topologic_center = self.distance_to(topologic_center)
def update(already_built, to_be_built, new_atoms_set):
"""NOT SIDEEFFECT FREE
"""
for index in new_atoms_set:
already_built.add(index)
to_be_built.remove(index)
return already_built, to_be_built
def from_topologic_center(
self,
row_in_buildlist,
already_built,
to_be_built,
first_time=False,
third_time=False
):
index_of_new_atom = distance_to_topologic_center[to_be_built, 'distance'].idxmin()
buildlist[row_in_buildlist, 0] = index_of_new_atom
convert_index[index_of_new_atom] = row_in_buildlist
if not first_time:
bond_with = self.distance_to(
index_of_new_atom,
already_built)[:, 'distance'].idxmin()
angle_with = self.distance_to(
bond_with,
already_built - set([bond_with])
)[:,'distance'].idxmin()
buildlist[row_in_buildlist, 1:3] = [
bond_with, angle_with]
if not third_time:
dihedral_with = self.distance_to(
bond_with,
already_built - set([bond_with, angle_with])
)[:, 'distance'].idxmin()
buildlist[row_in_buildlist, 1:] = [
bond_with, angle_with, dihedral_with]
new_row_to_modify = row_in_buildlist + 1
already_built.add(index_of_new_atom)
to_be_built.remove(index_of_new_atom)
return new_row_to_modify, already_built, to_be_built
def second_atom(self, already_built, to_be_built):
new_atoms_set = bond_dic[buildlist[0, 0]] - already_built
if new_atoms_set != set([]):
index_of_new_atom = pick(new_atoms_set)
convert_index[index_of_new_atom] = 1
buildlist[1, 0] = index_of_new_atom
buildlist[1, 1] = pick(already_built)
else:
new_row_to_modify, already_built, to_be_built = \
from_topologic_center(
self, row_in_buildlist, already_built, to_be_built)
if len(new_atoms_set) > 1:
use_index = buildlist[0, 0]
else:
use_index = buildlist[1, 0]
new_row_to_modify = 2
already_built.add(index_of_new_atom)
to_be_built.remove(index_of_new_atom)
return new_row_to_modify, already_built, to_be_built, use_index
# TODO (3, 1) if atoms are not connected to it
# TODO find nearest atom of 90
def third_atom(self, already_built, to_be_built, use_index):
new_atoms_set = bond_dic[use_index] - already_built
if new_atoms_set != set([]):
index_of_new_atom = pick(new_atoms_set)
convert_index[index_of_new_atom] = 2
buildlist[2, 0] = index_of_new_atom
buildlist[2, 1] = use_index
buildlist[2, 2] = pick(already_built - set([use_index]))
if (
self.angle_degrees(buildlist[2, :]) < 10
or self.angle_degrees(buildlist[2, :]) > 170):
try:
index_of_new_atom = pick(
new_atoms_set - set([index_of_new_atom]))
convert_index[index_of_new_atom] = 2
buildlist[2, 0] = index_of_new_atom
buildlist[2, 1] = use_index
buildlist[2, 2] = pick(
already_built - set([use_index]))
except KeyError:
pass
already_built.add(index_of_new_atom)
to_be_built.remove(index_of_new_atom)
else:
new_row_to_modify, already_built, to_be_built = \
from_topologic_center(
self, 2, already_built, to_be_built, third_time=True)
# The two is hardcoded because of third atom.
if len(new_atoms_set) > 1:
use_index = use_index
else:
use_index = buildlist[2, 0]
new_row_to_modify = 3
return new_row_to_modify, already_built, to_be_built, use_index
def pick_new_atoms(
self,
row_in_buildlist,
already_built,
to_be_built,
use_given_index=None):
"""Get the indices of new atoms to be put in buildlist.
Tries to get the atoms bonded to the one, that was last
inserted into the buildlist. If the last atom is the
end of a branch, it looks for the index of an atom
that is the nearest atom to the topologic center and
not built in yet.
.. note:: It modifies the buildlist array which is global
to this function.
Args:
row_in_buildlist (int): The row which has to be filled
at least.
Returns:
list: List of modified rows.
"""
if use_given_index is None:
new_atoms_set = (
bond_dic[buildlist[row_in_buildlist-1, 0]]
- already_built)
if new_atoms_set != set([]):
update(already_built, to_be_built, new_atoms_set)
new_row_to_modify = row_in_buildlist + len(new_atoms_set)
new_atoms_list = list(new_atoms_set)
bond_with = buildlist[row_in_buildlist - 1, 0]
angle_with = buildlist[convert_index[bond_with], 1]
dihedral_with = buildlist[convert_index[bond_with], 2]
buildlist[
row_in_buildlist: new_row_to_modify,
0] = new_atoms_list
buildlist[
row_in_buildlist: new_row_to_modify,
1] = bond_with
buildlist[
row_in_buildlist: new_row_to_modify,
2] = angle_with
buildlist[
row_in_buildlist: new_row_to_modify,
3] = dihedral_with
for key, value in zip(
new_atoms_list,
range(row_in_buildlist, new_row_to_modify)):
convert_index[key] = value
else:
new_row_to_modify, already_built, to_be_built = \
from_topologic_center(
self, row_in_buildlist, already_built, to_be_built)
else:
new_atoms_set = bond_dic[use_given_index] - already_built
new_row_to_modify = row_in_buildlist + len(new_atoms_set)
new_atoms_list = list(new_atoms_set)
bond_with = use_given_index
angle_with, dihedral_with = (
already_built - set([use_given_index]))
buildlist[
row_in_buildlist: new_row_to_modify, 0] = new_atoms_list
buildlist[
row_in_buildlist: new_row_to_modify, 1] = bond_with
buildlist[
row_in_buildlist: new_row_to_modify, 2] = angle_with
buildlist[
row_in_buildlist: new_row_to_modify, 3] = dihedral_with
update(already_built, to_be_built, new_atoms_set)
for key, value in zip(
new_atoms_list,
range(row_in_buildlist, new_row_to_modify)):
convert_index[key] = value
return new_row_to_modify, already_built, to_be_built
row = start_row
if 0 <= row <= 2:
if row == 0 & 0 < buildlist.shape[0]:
row, already_built, to_be_built = from_topologic_center(
self,
row,
already_built,
to_be_built,
first_time=True)
if row == 1 & 1 < buildlist.shape[0]:
row, already_built, to_be_built, use_index = second_atom(
self, already_built, to_be_built)
if row == 2 & 2 < buildlist.shape[0]:
row, already_built, to_be_built, use_index = third_atom(
self, already_built, to_be_built, use_index)
if row < buildlist.shape[0]:
row, already_built, to_be_built = pick_new_atoms(
self, row, already_built, to_be_built, use_index)
while row < buildlist.shape[0]:
row, already_built, to_be_built = pick_new_atoms(
self, row, already_built, to_be_built)
return buildlist
[docs] def _clean_dihedral(self, buildlist_to_check):
"""Reindexes the dihedral defining atom if colinear.
Args:
buildlist (np.array):
Returns:
np.array: modified_buildlist
"""
buildlist = buildlist_to_check.copy()
bond_dic = self.get_bonds(use_lookup=True)
angles = self.angle_degrees(buildlist[3:, 1:])
test_vector = np.logical_or(170 < angles, angles < 10)
problematic_indices = np.nonzero(test_vector)[0]
converged = True if len(problematic_indices) == 0 else False
# look for index + 3 because index start directly at dihedrals
for index in problematic_indices:
try:
already_tested = set([])
found = False
while not found:
new_dihedral = pick(
(bond_dic[buildlist[index + 3, 2]]
- set(buildlist[index + 3, [0, 1, 3]]))
- set(buildlist[(index + 3):, 0]) - already_tested)
already_tested.add(new_dihedral)
temp_buildlist = buildlist[index + 3]
temp_buildlist[3] = new_dihedral
angle = self.angle_degrees(temp_buildlist[1:])
found = True if 10 < angle < 170 else False
except KeyError:
origin = buildlist[index + 3, 2]
could_be_reference = set(buildlist[: index + 3, 0])
sorted_Cartesian = self.distance_to(
origin,
indices_of_other_atoms=could_be_reference)
sorted_Cartesian.frame = \
sorted_Cartesian.frame.sort_values(by='distance')
buildlist_for_new_dihedral_with = np.empty(
(len(could_be_reference), 3), dtype='int64')
bond_with, angle_with = buildlist[index + 3, [1, 2]]
buildlist_for_new_dihedral_with[:, 0] = bond_with
buildlist_for_new_dihedral_with[:, 1] = angle_with
dihedral_with = sorted_Cartesian.index
buildlist_for_new_dihedral_with[:, 2] = dihedral_with
angles = self.angle_degrees(buildlist_for_new_dihedral_with)
test_vector = np.logical_and(170 > angles, angles > 10)
new_dihedral = dihedral_with[np.nonzero(test_vector)[0][0]]
buildlist[index + 3, 3] = new_dihedral
return buildlist
[docs] def _build_zmat(self, buildlist):
"""Creates the zmatrix from a buildlist.
Args:
buildlist (np.array):
Returns:
Zmat: A new instance of ``Zmat``.
"""
indexlist = buildlist[:, 0]
default_columns = [
'atom', 'bond_with', 'bond', 'angle_with',
'angle', 'dihedral_with', 'dihedral']
additional_columns = list(
set(self.columns)
- set(['atom', 'x', 'y', 'z']))
zmat_frame = pd.DataFrame(
columns=default_columns + additional_columns,
dtype='float',
index=indexlist)
zmat_frame.loc[:, additional_columns] = self[indexlist, additional_columns]
bonds = self.bond_lengths(buildlist, start_row=1)
angles = self.angle_degrees(buildlist, start_row=2)
dihedrals = self.dihedral_degrees(buildlist, start_row=3)
zmat_frame.loc[indexlist, 'atom'] = self[indexlist, 'atom']
zmat_frame.loc[indexlist[1:], 'bond_with'] = buildlist[1:, 1]
zmat_frame.loc[indexlist[1:], 'bond'] = bonds
zmat_frame.loc[indexlist[2:], 'angle_with'] = buildlist[2:, 2]
zmat_frame.loc[indexlist[2:], 'angle'] = angles
zmat_frame.loc[indexlist[3:], 'dihedral_with'] = buildlist[3:, 3]
zmat_frame.loc[indexlist[3:], 'dihedral'] = dihedrals
return zmat_functions.Zmat(zmat_frame)
# TODO docstring
[docs] def to_zmat(
self,
buildlist=None,
fragment_list=None,
check_linearity=True):
"""Transform to internal coordinates.
Transforming to internal coordinates involves basically three
steps:
1. Define an order of how to build.
2. Check for problematic local linearity. In this algorithm an
angle with ``170 < angle < 10`` is assumed to be linear.
This is not the mathematical definition, but makes it safer
against "floating point noise"
3. Calculate the bond lengths, angles and dihedrals using the
references defined in step 1 and 2.
In the first two steps a so called ``buildlist`` is created.
This is basically a ``np.array`` of shape ``(n_atoms, 4)`` and
integer type.
The four columns are ``['own_index', 'bond_with', 'angle_with',
'dihedral_with']``.
This means that usually the upper right triangle can be any
number, because for example the first atom has no other
atom as reference.
It is important to know, that getting the buildlist is a very
costly step since the algoritym tries to make some guesses
based on the connectivity to create a "chemical" zmatrix.
If you create several zmatrices based on the same references
you can save the buildlist of a zmatrix with
:meth:`Zmat.build_list`.
If you then pass the buildlist as argument to ``to_zmat``,
then the algorithm directly starts with step 3.
Another thing is that you can specify fragments.
For this purpose the function :meth:`Cartesian.get_fragment`
is quite handy.
An element of fragment_list looks like::
(fragment, connections)
Fragment is a ``Cartesian`` instance and connections is a
``(3, 4)`` numpy integer array, that defines how the
fragment is connected to the molecule.
Args:
buildlist (np.array):
fragment_list (list):
check_linearity (bool):
Returns:
Zmat: A new instance of ``Zmat``.
"""
if buildlist is None:
if fragment_list is None:
buildlist = self._get_buildlist()
else:
def create_big_molecule(self, fragment_list):
def prepare_variables(self, fragment_list):
buildlist = np.empty((self.n_atoms, 4), dtype='int64')
fragment_index = set([])
for fragment_tpl in fragment_list:
fragment_index |= set(
fragment_tpl[0].index)
big_molecule_index = (
set(self.index) - fragment_index)
return buildlist, big_molecule_index
buildlist, big_molecule_index = prepare_variables(
self, fragment_list)
big_molecule = self[big_molecule_index, :]
row = big_molecule.n_atoms
buildlist[: row, :] = big_molecule._get_buildlist()
return buildlist, big_molecule, row
def add_fragment(
self, fragment_tpl, big_molecule, buildlist, row):
next_row = row + fragment_tpl[0].n_atoms
buildlist[row: next_row, :] = \
fragment_tpl[0]._get_buildlist(
fragment_tpl[1])
return buildlist, big_molecule, row
buildlist, big_molecule, row = create_big_molecule(
self, fragment_list)
for fragment_tpl in fragment_list:
buildlist, big_molecule, row = add_fragment(
self, fragment_tpl, big_molecule, buildlist, row)
if check_linearity:
buildlist = self._clean_dihedral(buildlist)
zmat = self._build_zmat(buildlist)
return zmat
[docs] def inertia(self):
"""Calculates the inertia tensor and transforms along
rotation axes.
This function calculates the inertia tensor and returns
a 4-tuple.
Args:
None
Returns:
dict: The returned dictionary has four possible keys:
``transformed_Cartesian``:
A frame that is transformed to the basis spanned by
the eigenvectors of the inertia tensor. The x-axis
is the axis with the lowest inertia moment, the
z-axis the one with the highest. Contains also a
column for the mass
``diag_inertia_tensor``:
A vector containing the sorted inertia moments after
diagonalization.
``inertia_tensor``:
The inertia tensor in the old basis.
``eigenvectors``:
The eigenvectors of the inertia tensor in the old basis.
"""
Cartesian_mass = self.add_data('mass')
Cartesian_mass = Cartesian_mass.move(vector=-self.barycenter())
frame_mass = Cartesian_mass.frame
coordinates = ['x', 'y', 'z']
def kronecker(i, j):
"""Please note, that it also compares e.g. strings.
"""
if i == j:
return 1
else:
return 0
inertia_tensor = np.zeros([3, 3])
for row_index, row_coordinate in enumerate(coordinates):
for column_index, column_coordinate in enumerate(coordinates):
inertia_tensor[row_index, column_index] = (
frame_mass.loc[:, 'mass'] * (
kronecker(row_index, column_index)
* (frame_mass.loc[:, coordinates]**2).sum(axis=1)
- (frame_mass.loc[:, row_coordinate]
* frame_mass.loc[:, column_coordinate])
)).sum()
diag_inertia_tensor, eigenvectors = np.linalg.eig(inertia_tensor)
# Sort ascending
sorted_index = np.argsort(diag_inertia_tensor)
diag_inertia_tensor = diag_inertia_tensor[sorted_index]
eigenvectors = eigenvectors[:, sorted_index]
new_basis = eigenvectors
new_basis = utilities.orthormalize(new_basis)
old_basis = np.identity(3)
Cartesian_mass = self.basistransform(new_basis, old_basis)
dic_of_values = dict(zip([
'transformed_Cartesian', 'diag_inertia_tensor',
'inertia_tensor', 'eigenvectors'],
[Cartesian_mass, diag_inertia_tensor, inertia_tensor, eigenvectors]
))
return dic_of_values
[docs] def location(self, indexlist=None):
"""Returns the location of an atom.
You can pass an indexlist or an index.
Args:
frame (pd.dataframe):
indexlist (list): If indexlist is None, the complete index
is used.
Returns:
np.array: A matrix of 3D rowvectors of the location of the
atoms specified by indexlist. In the case of one index
given a 3D vector is returned one index.
"""
indexlist = self.index if indexlist is None else indexlist
array = self[indexlist, ['x', 'y', 'z']].values.astype(float)
return array
[docs] def distance_to(self, origin=[0,0,0], indices_of_other_atoms=None, sort=False):
"""Returns a Cartesian with a column for the distance from origin.
"""
try:
origin[0]
except (TypeError, IndexError):
origin = self.location(int(origin))
if indices_of_other_atoms is None:
indices_of_other_atoms = self.index
origin = np.array(origin, dtype=float)
output = self[indices_of_other_atoms, :].copy()
other_locations = output.location()
output[:, 'distance'] = np.linalg.norm(other_locations - origin, axis=1)
if sort:
output.sort_values(by='distance', inplace=True)
return output
[docs] def change_numbering(self, rename_dict, inplace=False):
"""Returns the reindexed version of Cartesian.
Args:
rename_dict (dict): A dictionary mapping integers on integers.
Returns:
Cartesian: A renamed copy according to the dictionary passed.
"""
output = self if inplace else self.copy()
replace_list = list(rename_dict.keys())
with_list = [rename_dict[key] for key in replace_list]
output[:, 'temporary_column'] = output.index
output[:, 'temporary_column'].replace(replace_list, with_list, inplace=True)
output.set_index('temporary_column', drop=True, inplace=True)
output.sort_index(inplace=True)
output.index.name = None
if not inplace:
return output
[docs] def partition_chem_env(self, follow_bonds=4):
"""This function partitions the molecule into subsets of the
same chemical environment.
A chemical environment is specified by the number of
surrounding atoms of a certain kind around an atom with a
certain atomic number represented by a tuple of a string
and a frozenset of tuples.
The ``follow_bonds`` option determines how many branches the
algorithm follows to determine the chemical environment.
Example:
A carbon atom in ethane has bonds with three hydrogen (atomic
number 1) and one carbon atom (atomic number 6).
If ``follow_bonds=1`` these are the only atoms we are
interested in and the chemical environment is::
('C', frozenset([('H', 3), ('C', 1)]))
If ``follow_bonds=2`` we follow every atom in the chemical
enviromment of ``follow_bonds=1`` to their direct neighbours.
In the case of ethane this gives::
('C', frozenset([('H', 6), ('C', 1)]))
In the special case of ethane this is the whole molecule;
in other cases you can apply this operation recursively and
stop after ``follow_bonds`` or after reaching the end of
branches.
Args:
follow_bonds (int):
Returns:
dict: The output will look like this::
{ (element_symbol, frozenset([tuples])) :
set([indices]) }
A dictionary mapping from a chemical environment to
the set of indices of atoms in this environment.
"""
env_dict = {}
def get_chem_env(self, atomseries, index, follow_bonds):
indices_of_env_atoms = self.connected_to(
index, follow_bonds=follow_bonds, give_only_index=True)
indices_of_env_atoms.remove(index)
own_symbol, atoms = (
atomseries[index], atomseries[indices_of_env_atoms])
environment = collections.Counter(atoms).most_common()
environment = frozenset(environment)
return (own_symbol, environment)
atomseries = self[:, 'atom']
for index in self.index:
chem_env = get_chem_env(self, atomseries, index, follow_bonds)
try:
env_dict[chem_env].add(index)
except KeyError:
env_dict[chem_env] = set([index])
return env_dict
[docs] def align(self, Cartesian2, ignore_hydrogens=False):
"""Aligns two Cartesians.
Searches for the optimal rotation matrix that minimizes
the RMSD (root mean squared deviation) of ``self`` to
Cartesian2.
Returns a tuple of copies of ``self`` and ``Cartesian2`` where
both are centered around their topologic center and
``Cartesian2`` is aligned along ``self``.
Uses the Kabsch algorithm implemented with
:func:`utilities.kabsch`
Args:
Cartesian2 (Cartesian):
ignore_hydrogens (bool): Hydrogens are ignored for the
RMSD.
Returns:
tuple:
"""
molecule1 = self.sort_index()
molecule2 = Cartesian2.sort_index()
molecule1[:, 'x':'z'] = molecule1[:, 'x':'z'] - molecule1.topologic_center()
molecule2[:, 'x':'z'] = molecule2[:, 'x':'z'] - molecule2.topologic_center()
if ignore_hydrogens:
location1 = molecule1[molecule1[:, 'atom'] != 'H', :].location()
location2 = molecule2[molecule2[:, 'atom'] != 'H', :].location()
else:
location1 = molecule1.location()
location2 = molecule2.location()
# TODO still to rewrite
# U = utilities.kabsch(location2, location1)
molecule2[:, ['x', 'y', 'z']] = utilities.rotate(location2, location1)
return molecule1, molecule2
[docs] def make_similar(self, Cartesian2, follow_bonds=4, prealign=True):
"""Similarizes two Cartesians.
Returns a reindexed copy of ``Cartesian2`` that minimizes the
distance for each atom in the same chemical environemt
from ``self`` to ``Cartesian2``.
Read more about the definition of the chemical environment in
:func:`Cartesian.partition_chem_env`
.. warning:: Please check the result with e.g.
:func:`Cartesian.move_to()`
It is probably necessary to use the function
:func:`Cartesian.change_numbering()`.
Args:
Cartesian2 (Cartesian):
max_follow_bonds (int):
prealign (bool): The method :func:`Cartesian.align()`
is applied before reindexing.
Returns:
tuple: Aligned copy of ``self`` and aligned + reindexed
version of ``Cartesian2``
"""
if prealign:
molecule1, molecule2_new = self.align(Cartesian2)
else:
molecule1 = self.copy()
# Copy ??
molecule2_new = Cartesian2.copy()
partition1 = molecule1.partition_chem_env(follow_bonds)
partition2 = molecule2_new.partition_chem_env(follow_bonds)
index_dic = {}
def make_subset_similar(
molecule1, subset1, molecule2, subset2, index_dic):
indexlist1 = list(subset1)
for index_on_molecule1 in indexlist1:
distances_to_atom_on_molecule1 = molecule2_new.distance_to(
molecule1.location(index_on_molecule1), subset2, sort=True)
index_on_molecule2 = \
distances_to_atom_on_molecule1.frame.iloc[0].name
distance_new = distances_to_atom_on_molecule1[index_on_molecule2, 'distance']
location_of_atom2 = distances_to_atom_on_molecule1.location(
index_on_molecule2)
i = 1
while True:
if index_on_molecule2 in index_dic.keys():
location_of_old_atom1 = molecule1.location(
index_dic[index_on_molecule2])
distance_old = utilities.distance(
location_of_old_atom1, location_of_atom2)
if distance_new < distance_old:
indexlist1.append(index_dic[index_on_molecule2])
index_dic[index_on_molecule2] = index_on_molecule1
break
else:
index_on_molecule2 = \
distances_to_atom_on_molecule1.frame.iloc[i].name
distance_new = \
distances_to_atom_on_molecule1[index_on_molecule2, 'distance']
location_of_atom2 = \
distances_to_atom_on_molecule1.location(
index_on_molecule2)
i = i + 1
else:
index_dic[index_on_molecule2] = index_on_molecule1
break
return index_dic
for key in partition1.keys():
assert len(partition1[key]) == len(partition2[key]), \
(
'You have chemically different molecules, regarding the'
'topology of their connectivity. Perhaps'
' get_bonds(use_valency=False) helps.')
index_dic = make_subset_similar(
molecule1, partition1[key], molecule2_new,
partition2[key], index_dic)
new_index = [
index_dic[old_index2]
for old_index2 in molecule2_new.index]
molecule2_new.index = new_index
molecule2_new.sort_index(inplace=True)
return molecule1, molecule2_new
[docs] def move_to(self, Cartesian2, step=5, extrapolate=(0, 0)):
"""Returns list of Cartesians for the movement from
self to Cartesian2.
Args:
Cartesian2 (Cartesian):
step (int):
extrapolate (tuple):
Returns:
list: The list contains ``self`` as first and ``Cartesian2``
as last element.
The number of intermediate frames is defined by step.
Please note, that for this reason: len(list) = (step + 1).
The numbers in extrapolate define how many frames are
appended to the left and right of the list continuing
the movement.
"""
difference = Cartesian2[:, ['x', 'y', 'z']] - self[:, ['x', 'y', 'z']]
step_frame = difference.copy() / step
Cartesian_list = []
temp_Cartesian = self.copy()
for t in range(-extrapolate[0], step + 1 + extrapolate[1]):
temp_Cartesian[:, ['x', 'y', 'z']] = (
self[:, ['x', 'y', 'z']] + step_frame.loc[:, ['x', 'y', 'z']] * t
)
Cartesian_list.append(temp_Cartesian)
return list_of_cartesians
[docs] def write(self, outputfile, sort_index=True):
"""Writes the Cartesian into a file.
If sort_index is true, the frame is sorted by the index before writing.
.. note:: Since it permamently writes a file, this function
is strictly speaking **not sideeffect free**.
The frame to be written is of course not changed.
Args:
outputfile (str):
sort_index (bool):
Returns:
None: None
"""
frame = self.frame[['atom', 'x', 'y', 'z']].copy()
if sort_index:
frame = frame.sort_index()
n_atoms = frame.shape[0]
with open(outputfile, mode='w') as f:
f.write(str(n_atoms) + 2 * '\n')
frame.to_csv(
outputfile,
sep=str(' '),
index=False,
header=False,
mode='a'
)
else:
frame = frame.sort_values(by='atom')
n_atoms = frame.shape[0]
with open(outputfile, mode='w') as f:
f.write(str(n_atoms) + 2 * '\n')
frame.to_csv(
outputfile,
sep=str(' '), # https://github.com/pydata/pandas/issues/6035
index=False,
header=False,
mode='a'
)
@classmethod
[docs] def read_xyz(cls, inputfile, pythonic_index=False, get_bonds=True):
"""Reads a xyz file.
Args:
inputfile (str):
pythonic_index (bool):
Returns:
Cartesian:
"""
frame = pd.read_table(
inputfile,
skiprows=2,
comment='#',
delim_whitespace=True,
names=['atom', 'x', 'y', 'z'])
if not pythonic_index:
n_atoms = frame.shape[0]
frame.index = range(1, n_atoms+1)
molecule = cls(frame)
if get_bonds:
previous_warnings_bool = settings.show_warnings['valency']
settings.show_warnings['valency'] = False
molecule.get_bonds(
use_lookup=False, set_lookup=True, use_valency=False)
settings.show_warnings['valency'] = previous_warnings_bool
return molecule
@classmethod
[docs] def read_molden(cls, inputfile, pythonic_index=False, get_bonds=True):
"""Reads a molden file.
Args:
inputfile (str):
pythonic_index (bool):
Returns:
list: A list containing Cartesian is returned.
"""
f = open(inputfile, 'r')
found = False
while not found:
line = f.readline()
if line.strip() == '[N_GEO]':
found = True
number_of_molecules = int(f.readline().strip())
found = False
while not found:
line = f.readline()
if line.strip() == '[GEOMETRIES] (XYZ)':
found = True
current_line = f.tell()
number_of_atoms = int(f.readline().strip())
f.seek(current_line)
for i in range(number_of_molecules):
molecule_in = [f.readline() for j in range(number_of_atoms + 2)]
molecule_in = ''.join(molecule_in)
molecule_in = io.StringIO(molecule_in)
molecule = cls.read_xyz(molecule_in, pythonic_index=pythonic_index, get_bonds=get_bonds)
try:
list_of_cartesians.append(molecule)
except NameError:
list_of_cartesians = [molecule]
f.close()
return list_of_cartesians
@staticmethod
def _write_molden(cartesian_list, outputfile):
"""Writes a list of Cartesians into a molden file.
.. note:: Since it permamently writes a file, this function
is strictly speaking **not sideeffect free**.
The frame to be written is of course not changed.
Args:
cartesian_list (list):
outputfile (str):
Returns:
None:
"""
framelist = [molecule.frame for molecule in cartesian_list]
n_frames = len(framelist)
n_atoms = framelist[0].shape[0]
string = """[MOLDEN FORMAT]
[N_GEO]
"""
values = n_frames * '1\n'
string = (
string +
str(n_frames) + '\n[GEOCONV]\nenergy\n' +
values + 'max-force\n' +
values + 'rms-force\n' +
values + '[GEOMETRIES] (XYZ)\n')
with open(outputfile, mode='w') as f:
f.write(string)
for frame in framelist:
frame = frame.sort_index()
n_atoms = frame.shape[0]
with open(outputfile, mode='a') as f:
f.write(str(n_atoms) + 2 * '\n')
frame.to_csv(
outputfile,
sep=str(' '),
index=False,
header=False,
mode='a')
[docs]def write_molden(cartesian_list, outputfile):
"""Writes a list of Cartesians into a molden file.
.. note:: Since it permamently writes a file, this function is
strictly speaking **not sideeffect free**.
The frame to be written is of course not changed.
Args:
cartesian_list (list):
outputfile (str):
Returns:
None:
"""
cartesian_list[0]._write_molden(cartesian_list, outputfile)
[docs]def read_xyz(inputfile, pythonic_index=False, get_bonds=True):
"""Reads a xyz file.
.. note:: This function calls in the background :func:`Cartesian.read_xyz`.
If you inherited from :class:`Cartesian` to tailor it for your project,
you have to use this method as a constructor.
Otherwise you can choose.
Args:
inputfile (str):
pythonic_index (bool):
Returns:
Cartesian:
"""
molecule = Cartesian.read_xyz(
inputfile, pythonic_index=pythonic_index, get_bonds=get_bonds)
return molecule
[docs]def read_molden(inputfile, pythonic_index=False, get_bonds=True):
"""Reads a molden file.
Args:
inputfile (str):
pythonic_index (bool):
Returns:
list: A list containing Cartesian is returned.
"""
list_of_cartesians = Cartesian.read_molden(
inputfile, pythonic_index=pythonic_index, get_bonds=get_bonds)
return list_of_cartesians