Source code for chemcoord._cartesian_coordinates.xyz_functions

import math as m
import os
import subprocess
import tempfile
import warnings
from collections.abc import Callable, Iterable, Sequence
from io import StringIO
from pathlib import Path
from threading import Thread
from typing import Literal, overload

import numpy as np
import pandas as pd
import sympy
from pandas.core.frame import DataFrame
from typing_extensions import assert_never

from chemcoord._cartesian_coordinates._cart_transformation import (
    _jit_normalize,
    normalize,
)
from chemcoord._cartesian_coordinates._cartesian_class_core import COORDS
from chemcoord._cartesian_coordinates.cartesian_class_main import Cartesian
from chemcoord._internal_coordinates.zmat_class_main import Zmat
from chemcoord._internal_coordinates.zmat_functions import _zmat_interpolate
from chemcoord._utilities._decorators import njit
from chemcoord.configuration import settings
from chemcoord.typing import Matrix, PathLike, Real, Tensor4D, Vector


[docs] def view( molecule: Cartesian | Iterable[Cartesian], viewer: PathLike | None = None, list_viewer_file: Literal["molden", "xyz"] | None = None, use_curr_dir: bool = False, ) -> None: """View your molecule or list of molecules. .. note:: This function writes a temporary file and opens it with an external viewer. If you modify your molecule afterwards you have to recall view in order to see the changes. Args: molecule: A cartesian or a list of cartesians. viewer: The external viewer to use. The default is specified in ``settings.viewer``. use_curr_dir: If True, the temporary file is written to the current directory. Otherwise, it is written to the OS-dependent temporary directory. """ if viewer is None: viewer = settings.defaults.viewer if list_viewer_file is None: list_viewer_file = settings.defaults.list_viewer_file if isinstance(molecule, Cartesian): molecule.view(viewer=viewer, use_curr_dir=use_curr_dir) elif isinstance(molecule, Iterable): cartesians = molecule if use_curr_dir: TEMP_DIR = Path(os.path.curdir) else: TEMP_DIR = Path(tempfile.gettempdir()) def give_filename(i: int, suffix: Literal["molden", "xyz"]) -> Path: return TEMP_DIR / f"ChemCoord_list_{i}.{suffix}" i = 1 while give_filename(i, list_viewer_file).exists(): i = i + 1 if list_viewer_file == "molden": to_molden(cartesians, buf=give_filename(i, list_viewer_file)) elif list_viewer_file == "xyz": to_multiple_xyz(cartesians, buf=give_filename(i, list_viewer_file)) else: assert_never("Invalid list_viewer_file.") def open_file(i: int) -> None: """Open file and close after being finished.""" try: subprocess.check_call([viewer, give_filename(i, list_viewer_file)]) except (subprocess.CalledProcessError, FileNotFoundError): raise finally: if use_curr_dir: pass else: give_filename(i, list_viewer_file).unlink() Thread(target=open_file, args=(i,)).start() else: raise ValueError( "Argument is neither iterable of Cartesians nor Cartesian " f"but instead: {type(molecule)}" )
@overload def to_multiple_xyz( cartesian_list: Iterable[Cartesian], buf: None = None, sort_index: bool = ..., overwrite: bool = ..., float_format: Callable[[float], str] = ..., ) -> str: ... @overload def to_multiple_xyz( cartesian_list: Iterable[Cartesian], buf: PathLike, sort_index: bool = ..., overwrite: bool = ..., float_format: Callable[[float], str] = ..., ) -> None: ...
[docs] def to_multiple_xyz( cartesian_list: Iterable[Cartesian], buf: PathLike | None = None, sort_index: bool = True, overwrite: bool = True, float_format: Callable[[float], str] = "{:.6f}".format, ) -> str | None: """Write a list of Cartesians into an xyz file. .. note:: Since it permamently writes a file, this function is strictly speaking **not sideeffect free**. The list to be written is of course not changed. Args: cartesian_list: List of Cartesian objects. buf: Optional buffer or file path to write to. sort_index: If True, the Cartesian is sorted by the index before writing. overwrite: May overwrite existing files. float_format: Formatter function to apply to columns if they are floats. """ if sort_index: cartesian_list = [molecule.sort_index() for molecule in cartesian_list] output = "" for struct in cartesian_list: output += struct.to_xyz(float_format=float_format) + "\n" if buf is not None: with open(buf, mode="w" if overwrite else "x") as f: f.write(output) return None else: return output
[docs] def read_multiple_xyz(inputfile: PathLike, start_index: int = 0) -> list[Cartesian]: """Read a multiple-xyz file. Args: inputfile: The file to read from. start_index: Index to start from. Returns: A list of Cartesian objects. """ with open(inputfile, "r") as f: strings = f.readlines() cartesians = [] finished = False current_line = 0 while not finished: molecule_len = int(strings[current_line]) cartesians.append( Cartesian.read_xyz( StringIO( "".join(strings[current_line : current_line + molecule_len + 2]) ), start_index=start_index, nrows=molecule_len, engine="python", ) ) current_line += 2 + molecule_len finished = current_line == len(strings) return cartesians
@overload def to_molden( cartesians: Iterable[Cartesian], buf: None = None, sort_index: bool = ..., overwrite: bool = ..., float_format: Callable[[float], str] = ..., ) -> str: ... @overload def to_molden( cartesians: Iterable[Cartesian], buf: PathLike, sort_index: bool = ..., overwrite: bool = ..., float_format: Callable[[float], str] = ..., ) -> None: ...
[docs] def to_molden( cartesians: Iterable[Cartesian], buf: PathLike | None = None, sort_index: bool = True, overwrite: bool = True, float_format: Callable[[float], str] = "{:.6f}".format, ) -> str | None: """Write a list of Cartesians into a molden file. .. note:: Since it permamently writes a file, this function is strictly speaking **not sideeffect free**. The list to be written is of course not changed. Args: cartesians: List of Cartesian objects. buf: Optional buffer or file path to write to. sort_index: If True, the Cartesian is sorted by the index before writing. overwrite: May overwrite existing files. float_format: Formatter function to apply to columns if they are floats. """ if sort_index: cartesian_list = [molecule.sort_index() for molecule in cartesians] else: cartesian_list = list(cartesian_list) if not all(isinstance(molecule, Cartesian) for molecule in cartesian_list): raise TypeError("All elements in cartesians must be Cartesians.") give_header = ( "[MOLDEN FORMAT]\n" + "[N_GEO]\n" + str(len(cartesian_list)) + "\n" + "[GEOCONV]\n" + "energy\n{energy}" + "max-force\n{max_force}" + "rms-force\n{rms_force}" + "[GEOMETRIES] (XYZ)\n" ).format values = len(cartesian_list) * "1\n" energy = ( "\n".join([str(m.metadata.get("energy", 1)) for m in cartesian_list]) + "\n" ) header = give_header(energy=energy, max_force=values, rms_force=values) coordinates = [ x.to_xyz(sort_index=sort_index, float_format=float_format) for x in cartesian_list ] output = header + "\n".join(coordinates) if buf is not None: with open(buf, mode="w" if overwrite else "x") as f: f.write(output) return None else: return output
[docs] def write_molden(*args, **kwargs): # type: ignore[no-untyped-def] """Deprecated, use :func:`~chemcoord.xyz_functions.to_molden`""" message = "Will be removed in the future. Please use to_molden()." with warnings.catch_warnings(): warnings.simplefilter("always") warnings.warn(message, DeprecationWarning) return to_molden(*args, **kwargs)
[docs] def read_molden(inputfile: PathLike, start_index: int = 0) -> list[Cartesian]: """Read a molden file. Args: inputfile: The file to read from. start_index: Index to start from. Returns: A list of Cartesian objects. """ with open(inputfile) as f: found = False while not found: line = f.readline() if "[N_GEO]" in line: found = True number_of_molecules = int(f.readline().strip()) energies = [] found = False while not found: line = f.readline() if "energy" in line: found = True for _ in range(number_of_molecules): energies.append(float(f.readline().strip())) found = False while not found: line = f.readline() if "[GEOMETRIES] (XYZ)" in line: found = True current_line = f.tell() number_of_atoms = int(f.readline().strip()) f.seek(current_line) cartesians = [] for energy in energies: cartesian = Cartesian.read_xyz( f, start_index=start_index, nrows=number_of_atoms, engine="python", ) cartesian.metadata["energy"] = energy cartesians.append(cartesian) return cartesians
[docs] def isclose( a: Cartesian, b: Cartesian, align: bool = False, rtol: float = 1.0e-5, atol: float = 1.0e-8, ) -> DataFrame: """Compare two molecules for numerical equality. Args: a: First molecule. b: Second molecule. align: If True, a and b are prealigned along their principal axes of inertia and moved to their barycenters before comparing. rtol: Relative tolerance for the numerical equality comparison (see ``numpy.isclose``). atol: Absolute tolerance for the numerical equality comparison (see ``numpy.isclose``). """ # The pandas documentation says about the arguments to all(axis=...) # None : reduce all axes, return a scalar # https://pandas.pydata.org/docs/reference/api/pandas.Series.all.html # but the stubs don't have it. if not ( set(a.index) == set(b.index) and (a.loc[:, "atom"] == b.loc[a.index, "atom"]).all(axis=None) # type: ignore[arg-type] ): message = "Can only compare molecules with the same atoms and labels" raise ValueError(message) if align: a = a.get_inertia()["transformed_Cartesian"] b = b.get_inertia()["transformed_Cartesian"] A, B = a.loc[:, COORDS].values, b.loc[a.index, COORDS].values out = pd.DataFrame(index=a.index, columns=["atom"] + COORDS, dtype=bool) out.loc[:, "atom"] = True out.loc[:, COORDS] = np.isclose(A, B, rtol=rtol, atol=atol) return out
[docs] def allclose( a: Cartesian, b: Cartesian, align: bool = False, rtol: float = 1.0e-5, atol: float = 1.0e-8, ) -> bool: """Compare two molecules for numerical equality. Args: a: First molecule. b: Second molecule. align: If True, a and b are prealigned along their principal axes of inertia and moved to their barycenters before comparing. rtol: Relative tolerance for the numerical equality comparison (see ``numpy.allclose``). atol: Absolute tolerance for the numerical equality comparison (see ``numpy.allclose``). Returns: True if all coordinates are close, False otherwise. """ return isclose(a, b, align=align, rtol=rtol, atol=atol).all(axis=None)
[docs] def concat( cartesians: Sequence[Cartesian], ignore_index: bool = False, keys: Iterable | None = None, ) -> Cartesian: """Join list of cartesians into one molecule. Wrapper around the :func:`pandas.concat` function. Default values are the same as in the pandas function except for ``verify_integrity`` which is set to true in case of this library. Args: cartesians: A sequence of Cartesian objects to be concatenated. ignore_index: Behaves like in :func:`pandas.concat`. keys: If multiple levels passed, should contain tuples. Construct hierarchical index using the passed keys as the outermost level. Returns: A concatenated Cartesian object. """ frames = [molecule._frame for molecule in cartesians] if keys is None: new = pd.concat(frames, ignore_index=ignore_index, verify_integrity=True) else: new = pd.concat( frames, ignore_index=ignore_index, keys=keys, verify_integrity=True ) return cartesians[0].__class__(new)
[docs] def get_rotation_matrix(axis: Sequence[float], angle: float) -> Matrix[np.float64]: """Returns the rotation matrix. This function returns a matrix for the counterclockwise rotation around the given axis. The input angle is in radians. Args: axis: The axis to rotate around. angle: The angle in radians. Returns: The rotation matrix as a numpy array. """ vaxis = normalize(np.asarray(axis)) if not (vaxis.shape) == (3,): raise ValueError("axis.shape has to be 3") return _jit_get_rotation_matrix(vaxis, angle)
@njit def _jit_get_rotation_matrix( axis: Vector[np.float64], angle: Real ) -> Matrix[np.float64]: """Returns the rotation matrix. This function returns a matrix for the counterclockwise rotation around the given axis. The input angle is in radians. Args: axis: The axis to rotate around. angle: The angle in radians. Returns: The rotation matrix as a numpy array. """ axis = _jit_normalize(axis) a = m.cos(angle / 2) b, c, d = axis * m.sin(angle / 2) rot_matrix = np.empty((3, 3)) rot_matrix[0, 0] = a**2 + b**2 - c**2 - d**2 rot_matrix[0, 1] = 2.0 * (b * c - a * d) rot_matrix[0, 2] = 2.0 * (b * d + a * c) rot_matrix[1, 0] = 2.0 * (b * c + a * d) rot_matrix[1, 1] = a**2 + c**2 - b**2 - d**2 rot_matrix[1, 2] = 2.0 * (c * d - a * b) rot_matrix[2, 0] = 2.0 * (b * d - a * c) rot_matrix[2, 1] = 2.0 * (c * d + a * b) rot_matrix[2, 2] = a**2 + d**2 - b**2 - c**2 return rot_matrix
[docs] def orthonormalize_righthanded(basis: Matrix[np.floating]) -> Matrix[np.float64]: """Orthonormalizes righthandedly a given 3D basis. This function returns a right handed orthonormalized basis. Since only the first two vectors in the basis are used, it does not matter if you give two or three vectors. Right handed means, that: .. math:: \vec{e_1} \times \vec{e_2} &= \vec{e_3} \\ \vec{e_2} \times \vec{e_3} &= \vec{e_1} \\ \vec{e_3} \times \vec{e_1} &= \vec{e_2} \\ Args: basis: An array of shape (3,2) or (3,3). Returns: A right handed orthonormalized basis as a numpy array. """ v1, v2 = basis[:, 0], basis[:, 1] e1 = normalize(v1) e3 = normalize(np.cross(e1, v2)) e2 = normalize(np.cross(e3, e1)) return np.array([e1, e2, e3]).T
[docs] def get_kabsch_rotation( Q: Matrix[np.floating], P: Matrix[np.floating], weights: Vector[np.floating] | None = None, ) -> Matrix[np.float64]: """Calculate the optimal rotation from ``P`` unto ``Q``. Using the Kabsch algorithm the optimal rotation matrix for the rotation of ``other`` unto ``self`` is calculated. :cite:`kabsch_solution_1976` The algorithm is described very well in `wikipedia <http://en.wikipedia.org/wiki/Kabsch_algorithm>`_. Args: Q: Target coordinates. P: Source coordinates. weights: Optional weights for the points. Returns: The optimal rotation matrix as a numpy array. """ # The general problem with weights is decribed in # https://en.wikipedia.org/wiki/Wahba%27s_problem # The problem with equal weights is described # https://en.wikipedia.org/wiki/Kabsch_algorithm # Naming of variables follows wikipedia article about the Kabsch algorithm if weights is None: A = P.T @ Q else: A = P.T @ np.diag(weights) @ Q # One can't initialize an array over its transposed V, S, W = np.linalg.svd(A) # noqa: F841 W = W.T d = np.linalg.det(W @ V.T) return W @ np.diag([1.0, 1.0, d]) @ V.T
[docs] def apply_grad_zmat_tensor( grad_C: Tensor4D, construction_table: DataFrame, cart_dist: Cartesian ) -> Zmat: """Apply the gradient for transformation to Zmatrix space onto cart_dist. Args: grad_C: A (3, n, n, 3) array. The index layout is explained in :meth:`~chemcoord.Cartesian.get_grad_zmat`. construction_table: Explained in :meth:`~chemcoord.Cartesian.get_construction_table`. cart_dist: Distortions in cartesian space. Returns: Distortions in Zmatrix space as a Zmat object. """ if (construction_table.index != cart_dist.index).any(): message = "construction_table and cart_dist must use the same index" raise ValueError(message) dtypes = [ ("atom", str), ("b", str), ("bond", float), ("a", str), ("angle", float), ("d", str), ("dihedral", float), ] new = pd.DataFrame( np.empty(len(construction_table), dtype=dtypes), index=cart_dist.index ) X_dist = cart_dist.loc[:, COORDS].values.T C_dist = np.tensordot(grad_C, X_dist, axes=([3, 2], [0, 1])).T if C_dist.dtype == np.dtype("i8"): C_dist = C_dist.astype("f8") try: C_dist[:, [1, 2]] = np.rad2deg(C_dist[:, [1, 2]]) # Unevaluated symbolic expressions are remaining. # catches AttributeError as well, because this was # the raised exception before https://github.com/numpy/numpy/issues/13666 except (AttributeError, TypeError): C_dist[:, [1, 2]] = sympy.deg(C_dist[:, [1, 2]]) new = new.astype({k: "O" for k in ["bond", "angle", "dihedral"]}) new.loc[:, ["b", "a", "d"]] = construction_table new.loc[:, "atom"] = cart_dist.loc[:, "atom"] new.loc[:, ["bond", "angle", "dihedral"]] = C_dist return Zmat(new, _metadata={"last_valid_cartesian": cart_dist})
def _cart_interpolate(start: Cartesian, end: Cartesian, N: int) -> list[Cartesian]: Delta = (end - start) / (N - 1) return [start + i * Delta for i in range(N)]
[docs] def interpolate( start: Cartesian, end: Cartesian, N: int, coord: Literal["cart", "zmat"] = "zmat" ) -> list[Cartesian]: """Interpolate between start and end structure. Args: start: Starting structure. end: Ending structure. N: Number of structures to interpolate between. coord: Interpolate either in cartesian or zmatrix space. """ if coord == "cart": return _cart_interpolate(start, end, N) elif coord == "zmat": return _zmat_interpolate(start, end, N) else: assert_never(f"coord must be either 'cart' or 'zmat', not {coord}")