Source code for pygromos.files.trajectory.tre

"""
File:            Class for tre files in pandas
Description:
    The pandas trajectory TRE class offers a easy method to process GROMOS's .tre files in python
    The tre files are parsed into an easy to use pandas dataframe.

    This class should be a alternative for the data post processing with ene_ana in gromos++

Author: Marc Thierry Lehner

TODO: add stride option to all member functions
TODO: add support for periodic boundary condition

TODO: add ene_ana functions

"""

# imports
import pandas as pd
import numpy as np

import pygromos.files.trajectory._general_trajectory as traj
from pygromos.files.trajectory.tre_field_libs.ene_fields import (
    gromos_2021_tre_block_names_table,
    gromos_tre_block_names_table,
)
from pygromos.utils.typing import Tuple, Dict, Tre_Type
from pygromos.analysis import energy_analysis as ea


[docs]class Tre(traj._General_Trajectory): """ The Tre files are results from Gromos simulations, that store all the calculated energies and properties during the simulation. """ _gromos_file_ending: str = "tre" _contributions_nonbonded_names: Tuple[str] = ( "Lennard-Jones", "Coulomb/RF", "lattice sum real", "lattice sum reciproc", ) _contributions_bonded_names: Tuple[str] = ("bond", "angle", "improper", "dihedral", "crossdihedral") _contributions_baths: Tuple[str] = ("kinetic total", "centre of mass", "internal/rotational") _contributions_special: Tuple[str] = ( "constraints", "pos. restraints", "dist. restraints", "disfield res", "dihe. restr.", "SASA", "SASA volume", "jvalue", "rdc", "local elevation", "path integral", "angle restraint", ) _contributions_temperature: Tuple[str] = ("total", "com", "ir", "scaling factor")
[docs] def __init__( self, input_value: str, auto_save: bool = True, stride: int = 1, skip: int = 0, _ene_ana_names: gromos_tre_block_names_table = gromos_2021_tre_block_names_table, ): """ Build a Gromos energy trajectory file (.tre) Parameters ---------- input_value : str,None The input value can be None, or a string path to the .tre/.tre.gz file. auto_save : bool, optional automatically save the file, by default True stride : int, optional only read every x value, by default 1 skip : int, optional skip the first x timesteps, by default 0 _ene_ana_names : gromos_tre_block_names_table, optional get the field names after the provided standard., by default gromos_2020_tre_block_names_table """ super().__init__(input_value, auto_save=auto_save, stride=stride, skip=skip) self.tre_block_name_table = _ene_ana_names if "time" in self.database.columns: self.time = self.database.time if "step" in self.database.columns: self.step = self.database.step
"""-------------------------------------------------------------- Basic getters for Subblocks and Subsubblocks ----------------------------------------------------------------- The following functions, return well formated values from the tre trajectories. ENERGY03 - fields: """
[docs] def get_totals(self) -> pd.DataFrame: """ get all totals of the system """ # print(self.database["totals"][0].shape, self.database["totals"][0]) if not hasattr(self, "totals"): totals_data = np.stack(self.database["totals"].to_numpy()) self.totals = pd.DataFrame( data=totals_data, index=self.database.time, columns=self.tre_block_name_table.totals_subblock_names[: totals_data.shape[1]], ) else: pass return self.totals
[docs] def get_totene(self) -> pd.DataFrame: """ get the total System energy / per time """ self.totene = self.get_totals()["totene"] return self.totene
[docs] def get_totkin(self) -> pd.DataFrame: """ get the total kinetic Energy / per time """ self.totkin = self.get_totals()["totkin"] return self.totkin
[docs] def get_totpot(self) -> pd.DataFrame: """ get the total potential Energy / per time """ self.totpot = self.get_totals()["totpot"] return self.totpot
[docs] def get_totcov(self) -> pd.DataFrame: """ get the total covalent contribution/ per time """ self.totcov = self.get_totals()["totcov"] return self.totcov
[docs] def get_totbonded(self) -> pd.DataFrame: """ get the total bonded contribution/ per time """ self.totbonded = self.get_totals()["totbond"] return self.totbonded
[docs] def get_totangle(self) -> pd.DataFrame: """ get the total angle contribution/ per time """ self.totangle = self.get_totals()["totangle"] return self.totangle
[docs] def get_totdihedral(self) -> pd.DataFrame: """ get the total dihedral contribution/ per time """ self.totdihedral = self.get_totals()["totdihedral"] return self.totdihedral
[docs] def get_totnonbonded(self) -> pd.DataFrame: """ get the total nonbonded contribution/ per time """ self.totnonbonded = self.get_totals()["totnonbonded"] return self.totnonbonded
[docs] def get_totlj(self) -> pd.DataFrame: """ get the total lennard jones contribution/ per time """ self.totlj = self.get_totals()["totlj"] return self.totlj
[docs] def get_totcrf(self) -> pd.DataFrame: """ get the total columbic reactionfield contribution/ per time """ self.totcrf = self.get_totals()["totcrf"] return self.totcrf
[docs] def get_baths(self) -> pd.DataFrame: """ extract data of the baths block """ return self._set_data(attibute_name="baths", rows_name="baths", field_names=self._contributions_baths)
[docs] def get_bondedContributions(self) -> Dict[int, pd.DataFrame]: """ extract data of the bonded block """ return self._set_data( attibute_name="bondedContributions", rows_name="bonded", field_names=self._contributions_bonded_names )
[docs] def get_nonbondedContributions(self) -> Dict[int, Dict[int, pd.DataFrame]]: """ This function returns a nice formatted dictionary for the nonbonded Contributions according to the Force groups of the tre file. Returns ------- Dict[int, Dict[int, pd.DataFrame]] The dictionary containing the nonbonded contributions of the single ForceGroups with each other. Dict[ForceGroupI, Dict[ForceGroupJ, NonbondedEnergyContribs]] Raises ------ ValueError returns Value error, if the dimensionality of the different contributions does not fit to the _nonbonded_contribution_names. """ if not hasattr(self, "forceGroupNonbondedContributions"): # Check contibution_dimensionalities nFFContributions = self.database.nonbonded[0].shape[1] if nFFContributions != len(self._contributions_nonbonded_names): raise ValueError( "The dimensionality of the NonbondedContributions is not corresponding to the expected dimensionality.\n expected: " + str(len(self._contributions_nonbonded_names)) + " \t found: " + str(nFFContributions) ) # Get the number of force groups: nForceGroup = self._get_numberOfForceGroupsFromNonbondeds() # Generate dictionary for the different contributions t = 0 forceGroupNonbondedContributions = {} for i in range(1, 1 + nForceGroup): forceGroupNonbondedContributions[i] = {} for j in range(1, 1 + nForceGroup): forceGroupNonbondedContributions[i][j] = pd.DataFrame( list(self.database.nonbonded.apply(lambda x: x[i + j - 2])), columns=self._contributions_nonbonded_names, ) t += 1 self.forceGroupNonbondedContributions = forceGroupNonbondedContributions return self.forceGroupNonbondedContributions
[docs] def get_specialContributions(self) -> Dict[int, pd.DataFrame]: # CHECK THIS """ extract data of the special block """ return self._set_data( attibute_name="specialContributions", rows_name="special", field_names=self._contributions_special )
[docs] def get_eds(self) -> pd.DataFrame: """ Get EDS energies if present. Returns ------- pd.DataFrame returns datafrae with columns for each endstate. """ if not hasattr(self, "eds"): num_states = self.database["eds"][0][0] if num_states > 0: state_strings = [ [ column + "_" + str(state_row) for column in self.tre_block_name_table.eds_subblock_names_singleState ] for state_row in range(1, 1 + int(num_states)) ] self.tre_block_name_table.eds_subblock_names = ["numstates"] self.tre_block_name_table.eds_subblock_names.extend(list(np.concatenate(state_strings))) else: self.tre_block_name_table.eds_subblock_names = ["numstates"] self.eds = pd.DataFrame( data=np.stack(self.database["eds"].to_numpy()), index=self.database.time, columns=self.tre_block_name_table.eds_subblock_names, ) else: pass return self.eds
[docs] def get_precalclam(self) -> pd.DataFrame: """ Get the energies calculated for the different defined lambda values in a trajectory. Returns ------- pd.DataFrame return the energies calculated for the different lambda values. """ if not hasattr(self, "precalclam"): num_lam = self.database["precalclam"][0][0] if num_lam > 0: state_strings = [ [column + "_" + str(state_row) for column in self.tre_block_name_table.lam_subblock_names_singleLam] for state_row in range(1, 1 + int(num_lam)) ] self.tre_block_name_table.lam_subblock_names = ["nr_lambdas"] self.tre_block_name_table.lam_subblock_names.extend(list(np.concatenate(state_strings))) self.tre_block_name_table.lam_subblock_names.extend(["A_dihedral", "B_dihedral"]) else: self.tre_block_name_table.lam_subblock_names = ["nr_lambdas", "A_dihedral", "B_dihedral"] self.precalclam = pd.DataFrame( data=np.stack(self.database["precalclam"].to_numpy()), index=self.database.time, columns=self.tre_block_name_table.lam_subblock_names, ) else: pass return self.precalclam
""" VOLUMEPRESSURE03 - fields: """
[docs] def get_mass(self) -> pd.Series: """ returns the systems mass per timestep Returns ------- pd.Series series of mass per time """ return pd.Series(map(float, self.database.mass), name="mass", index=self.database.time)
[docs] def get_temperature_Info(self) -> Dict[int, pd.DataFrame]: """ temperature baths Returns ------- Dict[int,pd.DataFrame] returns the full info of the temperature baths per bath """ return self._set_data( attibute_name="temperatureInfo", rows_name="temperature", field_names=self._contributions_temperature )
[docs] def get_temperature( self, ) -> pd.DataFrame: # CHECK THIS - tempcontrib:total com ir scaling factor """ Get the temperature in Kelvin for all temperature baths for every time step Returns ------- pd.DataFrame pandas dataframe with all temperatures """ tmps = [] for i, row in self.database.iterrows(): row_d = {"bath" + str(i + 1): temp for i, temp in enumerate(row["temperature"][:, 0])} tmps.append(row_d) return pd.DataFrame(tmps, index=self.database.time)
""" UTILS """
[docs] def _set_data(self, attibute_name: str, rows_name: str, field_names: Tuple[str]) -> pd.DataFrame: """_summary_ This function extracts generially the information of a column per time Parameters ---------- attibute_name : str name of the target attribute rows_name : str name of the block, that shall be extracted field_names : Tuple[str] name of the fields in each row Returns ------- pd.DataFrame contains the extracted information """ if not hasattr(self, attibute_name): nDimGroups = self.database[rows_name][0].shape[0] setattr(self, attibute_name, {}) for i in range(nDimGroups): getattr(self, attibute_name).update( { i: pd.DataFrame( list(map(lambda x: list(x[i]), list(self.database[rows_name]))), columns=field_names, index=self.database.time, ) } ) return getattr(self, attibute_name)
[docs] def _get_numberOfForceGroupsFromNonbondeds(self) -> int: """ This function gets the number of Force groups in the simulation from the nonbonded block. Returns ------- int number of ForceGroups used for this tre. """ nForceGroupDim = self.database.nonbonded[0].shape[0] r = 0 for nForceGroup in range(1, nForceGroupDim + 1): r += nForceGroup if r == nForceGroupDim: return nForceGroup elif r > nForceGroupDim: raise Exception("That should not happen!")
"""-------------------------------------------------------------- Additional predefined function for common analysis ----------------------------------------------------------------- """
[docs] def get_density(self) -> pd.DataFrame: """ Calculate the density for every frame. (Uses mass and the first volume entry) Returns ------- pd.DataFrame Dataframe with the densities for all time steps """ return pd.Series( list( self.database[["mass", "volume"]].apply(lambda x: ea.get_density(mass=x[0][0], volume=x[1][0]), axis=1) ), index=self.database.time, name="density", )
[docs] def get_Hvap(self, gas_traj: Tre_Type, nMolecules: int = 1, temperature: float = None) -> float: gas_totpot_energy = 0 if type(gas_traj) == type(self): gas_totpot_energy = gas_traj.get_totpot().mean() elif type(gas_traj) == float: gas_totpot_energy = gas_traj else: raise TypeError("Did not understand the type of gas. Allowed are float (E_gas) or Tre (gas_trajectory)") liq_totpot_energy = self.get_totpot().mean() # get temperature from liq trajectory if not given if temperature is None: temperature = float(self.get_temperature().mean()[0]) # calculate heat of vaporization self.heat_vap = ea.get_Hvap( liq_totpot_energy=liq_totpot_energy, gas_totpot_energy=gas_totpot_energy, temperature=temperature, nMolecules=nMolecules, ) return self.heat_vap