Source code for pygromos.files.coord.cnf

import copy
import mdtraj
import tempfile
import numpy as np
import nglview as nj
from scipy.spatial.transform import Rotation
from collections import namedtuple, defaultdict

from rdkit import Chem
from rdkit.Chem import AllChem

from pygromos.files._basics import parser
from pygromos.files._basics._general_gromos_file import _general_gromos_file
from pygromos.files.blocks import coord_blocks as blocks
from pygromos.files.blocks._general_blocks import TITLE
from pygromos.utils import amino_acids as aa
from pygromos.utils.utils import _cartesian_distance
from pygromos.utils.typing import Union, List, Tuple, Dict, Reference_Position_Type, Position_Restraints_Type
from pygromos.files.blocks.coord_blocks import GENBOX
from pygromos.analysis import coordinate_analysis as ca
from pygromos.visualization.coordinates_visualization import visualize_system


# Constructs describing the system
solute_infos = namedtuple("solute_info", ["names", "number", "positions", "number_of_atoms"])
protein_infos = namedtuple(
    "protein_info",
    ["name", "residues", "number_of_residues", "position", "start_position", "end_position", "number_of_atoms"],
)
non_ligand_infos = namedtuple("ligands_info", ["names", "number", "positions", "number_of_atoms"])
solvent_infos = namedtuple("solvent_info", ["name", "number", "atoms_per_residue", "positions", "number_of_atoms"])


# make infos pickle-able


[docs]class Cnf(_general_gromos_file): """ This class is a representation of the gromos .cnf coordinate files. It allows reading, analysis and modifying of the coordinate files. is a child of general_gromos_file """ # general _orig_file_path: str _gromos_file_ending: str = "cnf" residues: Dict[str, Dict[str, int]] # Standard Gromos blocks TITLE: blocks.TITLE # required POSITION: blocks.POSITION # required TIMESTEP: blocks.TIMESTEP LATTICESHIFTS: blocks.LATTICESHIFTS VELOCITY: blocks.VELOCITY GENBOX: blocks.GENBOX PERTDATA: blocks.PERTDATA atom_ref_pos_block: blocks.REFPOSITION # private _block_order: List[str] = ["TITLE", "TIMESTEP", "POSITION", "LATTICESHIFTS", "VELOCITY", "REFPOSITION"] _required_blocks: List[str] = ["TITLE", "POSITION"] _main_block: str = "POSITION" def __init__( self, in_value: Union[str, dict, None], clean_resiNumbers_by_Name: bool = False, verbose: bool = False, _future_file: bool = False, ): # import for rdkit molecules if type(in_value) == Chem.rdchem.Mol: super().__init__(in_value=None, _future_file=_future_file) self.createRDKITconf(mol=in_value) self.residues = self.get_residues(verbose=verbose) # general import else: super().__init__(in_value=in_value, _future_file=_future_file) if hasattr(self, "POSITION"): if clean_resiNumbers_by_Name: self.clean_posiResNums() # carefull! if two resis same name after an another than, here is a problem. self.residues = self.get_residues(verbose=verbose) """ Manage coordinates """
[docs] def add_empty_box(self): self.add_block(block=GENBOX())
[docs] def read_file(self) -> Dict[str, any]: """ Parameters ---------- path : Returns ------- """ return parser.read_cnf(self._orig_file_path)
""" manipulate/analysis of coordinates """
[docs] def add_residue_positions(self, coords: object): """This function adds all residues of an coords file to @DEVELOP This is a very crude functio at the moment! It only takes positions of a residue and merges them! if there are residues with the same name, this might lead to problems, as clean_posiresnumbyname function is not sensitive for that! todo: make more robust! bschroed Parameters ---------- coords :function_libs.gromos.files.coord.Cnf obj object - CNF object Returns ------- """ positions = coords.POSITION.content self.POSITION.content.extend(positions) self.clean_posiResNums() self.get_residues(verbose=True)
[docs] def get_system_information( self, not_ligand_residues: List[str] = [], ligand_resn_prefix: Union[str, List[str]] = None, solvent_name: str = "SOLV", ) -> Tuple[Dict[str, Dict[int, int]], namedtuple, namedtuple, namedtuple, namedtuple]: """get_system_information This function utilizes a dictionary containing all residues and atom numbers (e.g. cnf.get_residues()) and modifies them such, that the result can be used to set up a standard REEDS gromos_simulation Parameters ---------- residues : Dict[str, Dict[int,int]] input a cnf.residues:dict that shall be cleaned and return a reduced form for parameter file. not_ligand_residues :List[str] here all molecules, that are not considered as ligand or protein. ligand_resn_prefix :List[str] here all molecules, that are considered as ligand are listed. Returns ------- Dict[str, Dict[int,int]] cleaned_residue dict NamedTuple ligands NamedTuple protein NamedTuple non_ligands """ residues = self.get_residues() # Build Up new list # Criterium, when a ligand is considered if isinstance(ligand_resn_prefix, str): ligand_resn_prefix = [ligand_resn_prefix] # print(ligand_resn_prefix) ligand_residue = lambda res: ( (res != "SOLV" and (res not in aa.three_letter_aa_lib and res != "prot")) and res not in not_ligand_residues ) or (ligand_resn_prefix is not None and res in ligand_resn_prefix) # print(ligand_residue) # get ligand parameters # Avoid multi ligands with same resi name! ligand_names = [res for res in residues if ligand_residue(res)] if any([len(residues[name]) > 1 for name in ligand_names]): multi_res_ligands = [name for name in ligand_names if (len(residues[name]) > 1)] clean_residues = {} for name in ligand_names: if name in multi_res_ligands: for resi in residues[name].keys(): new_name = name[:-1] + str(resi) clean_residues.update({new_name: {resi: sum(list([residues[name][resi]]))}}) else: clean_residues.update({name: {min(residues[name].keys()): sum(list(residues[name].values()))}}) else: clean_residues = { name: {min(residues[name].keys()): sum(list(residues[name].values()))} for name in ligand_names } # ligands as resi # Update ligand_names = [res for res in clean_residues if ligand_residue(res)] number_of_ligands_atoms = sum([sum(list(clean_residues[res].values())) for res in ligand_names]) number_of_ligands = len(ligand_names) ligand_positions = [min(clean_residues[res]) for res in ligand_names] ligands = solute_infos( names=ligand_names, number=number_of_ligands, positions=ligand_positions, number_of_atoms=number_of_ligands_atoms, ) # get protein parameters if present protein_residues = {res: val for res, val in residues.items() if (res in aa.three_letter_aa_lib)} if len(protein_residues) > 0: protein_name = "protein" number_of_protein_residues = sum([len(list(residues[res].keys())) for res in protein_residues]) number_of_protein_atoms = sum([sum(list(residues[res].values())) for res in protein_residues]) protein_start_position = min([min(val) for res, val in protein_residues.items()]) protein_end_position = max([max(val) for res, val in protein_residues.items()]) clean_residues.update( {protein_name: {len(clean_residues) + 1: number_of_protein_atoms}} ) # protein Atoms as one residue protein = protein_infos( name=protein_name, residues=protein_residues, number_of_residues=number_of_protein_residues, position=protein_start_position, start_position=protein_start_position, end_position=protein_end_position, number_of_atoms=number_of_protein_atoms, ) else: protein = protein_infos( name="", residues=0, number_of_residues=0, position=0, number_of_atoms=0, start_position=-1, end_position=-1, ) # get non_Ligand_residues, e.g.: Cofactors excluded_resi_names = [res for res in residues if (res in not_ligand_residues)] if len(excluded_resi_names) > 0: number_of_non_ligands_atoms = sum([sum(list(residues[res].values())) for res in excluded_resi_names]) number_of_non_ligands = len(ligand_names) non_ligand_positions = 0 # min([min(clean_residues[res]) for res in ligand_names]) #TODO: Fix clean_residues.update( {name: {min(residues[name].keys()): sum(list(residues[name].values()))} for name in excluded_resi_names} ) # protein Atoms as one residue non_ligands = non_ligand_infos( names=excluded_resi_names, number=number_of_non_ligands, positions=non_ligand_positions, number_of_atoms=number_of_non_ligands_atoms, ) else: non_ligands = non_ligand_infos(names=[], number=0, positions=0, number_of_atoms=0) # get Solvent if solvent_name in residues: clean_residues.update({solvent_name: residues[solvent_name]}) # solvent positions = list(residues[solvent_name].keys()) all_atoms = sum(list(residues[solvent_name].values())) natoms_per_resi = int(residues[solvent_name][positions[0]]) solvent = solvent_infos( name=solvent_name, number=len(positions), atoms_per_residue=natoms_per_resi, positions=positions, number_of_atoms=all_atoms, ) else: solvent = solvent_infos(name=[], number=0, atoms_per_residue=0, positions=[], number_of_atoms=0) return clean_residues, ligands, protein, non_ligands, solvent
[docs] def rename_residue(self, new_resName: str, resID: int = False, resName: str = False, verbose: bool = False) -> int: """rename_residue this function is renaming residues from a cnf file with taking following _blocks into account: "POSITION", "VELOCITY" it additionally recounts all atomIds and residueIDs afterwards. you can provide a residue ID or a residue Name or both (than only exact match will be deleted). Parameters ---------- new_resName : str new Name of the residue resID : int, optional Id of the residue to be renamed resName : str, optional Name of the residue to be renamed verbose : bool, optional Text... lots of it! Returns ------- int 0 if succesfull """ check_blocks = ["POSITION", "VELOCITY"] if (not resID or not resName) and verbose: print("WARNING giving only resID or resName can be ambigous!") if resID or resName: # deletable with resID for block in check_blocks[:2]: result_sub = [] # build up new data list if block in dir(self): subblock = getattr(self, block) for i, x in enumerate(subblock.content): # go through block if (int(x.resID) == resID or not resID) and ( x.resName == resName or not resName ): # found correct residue to be changed x.resName = new_resName result_sub.append(x) setattr(subblock, "content", result_sub) setattr(self, block, subblock) elif verbose: print("Skip block: " + block + ", as was not found in file.\n") else: raise IOError("No residue number or resName given.") return 0
[docs] def clean_posiResNums(self): """clean_posiResNums This function recount the Residue number with respect to residue name and residue number. Warnings -------- only in "Position_BLOCK!@development Returns ------- None """ position_copy = self.POSITION pos = position_copy.content tmpN = "" tmpID = 0 tmpOldID = pos[0].resID for p in pos: # print(p) # print(tmpN,tmpID) if p.resName == tmpN and p.resID == tmpOldID: # same residue as before p.resID = tmpID elif p.resName == tmpN and p.resID != tmpOldID: # same resiname but diff ID (double? - this is a problem!) tmpOldID = p.resID tmpID += 1 p.resID = tmpID else: # next name and residue id tmpID += 1 tmpN = p.resName tmpOldID = p.resID p.resID = tmpID self.POSITION.content = pos
[docs] def delete_residue(self, resID: int = False, resName: str = False, verbose=False) -> int: """delete_residue this function is deleting residues from a cnf file with taking following _blocks into account: "POSITION", "VELOCITY", "LATTICESHIFTS", "REFPOSITION" it additionally recounts all atomIds and residueIDs afterwards. you can provide a residue ID or a residue Name or both (than only exact match will be deleted). Parameters ---------- resID : int Id of the residue to be deleted resName : str Name of the residue to be deleted verbose : bool Text... lots of it! Returns ------- int 0 if succesfull """ check_blocks = ["POSITION", "VELOCITY", "LATTICESHIFTS", "REFPOSITION"] atom_nums = [] if (not resID or not resName) and verbose: print("WARNING giving only resID or resName can be ambigous!") if resID or resName: # deletable with resID for block in check_blocks[:2]: result_sub = [] if block in dir(self): subblock = getattr(self, block) if isinstance(subblock, type(None)): check_blocks.remove(block) continue offset_atomID = 0 # recount atomids res_off = 0 # recount residues tmp_del_resID = -10 for i, x in enumerate(subblock.content): if (int(x.resID) == resID or not resID) and (x.resName == resName or not resName): atom_nums.append(x.atomID) # append to list of being deleted offset_atomID += 1 res_off = 1 # catch multiple to be deleted residues after each other in a residue offset : for recounting if tmp_del_resID != x.resID - 1 and tmp_del_resID != x.resID: res_off += 1 else: res_off = 1 tmp_del_resID = x.resID continue else: x.atomID -= offset_atomID if x.resID - res_off <= 0: res_off = -1 * res_off - 1 x.resID -= res_off else: x.resID -= res_off result_sub.append(x) setattr(subblock, "content", result_sub) setattr(self, block, subblock) elif verbose: print("Skip block: " + block + ", as was not found in file.\n") # delete by atomnum for block in check_blocks[2:]: if hasattr(self, block): for x in self.__getattribute__(check_blocks[2]).content: if x.atomID in atom_nums: list(self.__getattribute__(check_blocks[2]).content).remove(x) elif verbose: print("Skip block: " + block + ", as was not found in file.\n") for x in self.__getattribute__(check_blocks[2]).content: if x.atomID in atom_nums: list(self.__getattribute__(check_blocks[2]).content).remove(x) else: raise IOError("No residue number or resName given.") return 0
[docs] def delete_atom(self, resID: int = False, resName: str = False, atomID: int = False, atomType: str = False): check_blocks = ["POSITION", "VELOCITY", "LATTICESHIFTS", "REFPOSITION"] atom_nums = [] if not resID or not resName: print("WARNING giving only resID or resName can be ambigous!") if resID or resName: # deletable with resID for block in check_blocks[:2]: result_sub = [] if block in dir(self): subblock = getattr(self, block) offset_atomID = 0 res_off = 0 for i, x in enumerate(subblock.content): if ( (x.resID == resID or not resID) and (x.resName == resName or not resName) and (x.atomID == atomID or not atomID) and (x.atomType == atomType or not atomType) ): atom_nums.append(x.atomID) offset_atomID += 1 res_off = 1 continue else: x.atomID -= offset_atomID if x.resID - res_off == 0 and x.resName != "Solv": res_off = 0 else: x.resID -= res_off result_sub.append(x) setattr(subblock, "content", result_sub) setattr(self, block, subblock) # deletable with atomnum for block in check_blocks[2:]: for x in self._blocks[check_blocks[2]].content: if x.atomID in atom_nums: list(self._blocks[block].content).remove(x) else: raise IOError("No residue number or resName given.") return 0
[docs] def count_residue_atoms(self, resID: int = False, resName: str = False, verbose=False) -> (int or dict): if resName and resID: print("\t" + resName + "\t" + str(resID) + "\t" + str(self.residues[resName][resID])) return self.residues[resName][resID] if resName and not resID: print( resName + "\n".join(["\t" + str(x) + "\t" + str(self.residues[resName][x]) for x in self.residues[resName]]) ) return self.residues[resName] if not resName and resID: result = [] for x in self.residues: if resID in self.residues[x]: result.update({x: self.residues[x]}) else: continue if verbose: # make string if verbose: out_string = "#\tresName\tresID\tcounts\n" # generate string for resn in result: out_string += "\t" + resn for id in result[resn]: out_string += "\t" + str(id) + "\t" + str(result[resn][id]) + "\n" print(out_string) return result else: raise ValueError("Please verify at least resName or resId for residue atom count ")
[docs] def get_residues(self, verbose: bool = False) -> Dict[str, Dict[str, int]]: """get_residues This function is getting all residues of the used cnf file. it gives back, Parameters ---------- verbose : bool, optional texty? Returns ------- Dict[str, Dict[str, Any]] returns dict containing all residues, numbers and atoms. """ if "POSITION" in dir(self): counts = {} subblock = self.POSITION.content # find Residues and Count them. for i, x in enumerate(subblock): # # TREAT SOLVENT # if (x.resName == "SOLV" or x.resName == "SOL"): # if not (x.resName in counts): # counts.update({x.resName: 1}) # else: # counts[x.resName] += 1 # TREAT Unknown Molecules if x.resName in counts and x.resID in counts[x.resName]: counts[x.resName][x.resID] += 1 elif x.resName in counts and x.resID not in counts[x.resName]: counts[x.resName].update({x.resID: 1}) else: counts.update({x.resName: {x.resID: 1}}) if verbose: print("COUNTS", counts) # make string if verbose: out_string = "#\tresName\tresID:counts\n" # generate string for resn in counts: out_string += "\t" + resn out_string += "\t \t" + str(counts[resn]) + "\n" print(out_string) return counts else: raise ValueError("NO POSITION block in cnf-Object: " + self.path)
[docs] def get_atomP(self, atomID: int = None, atomType: str = None, resID: int = None, resName: str = False) -> list: if "POSITION" in dir(self): atoms = [] for x in self.POSITION.content: if ( (x.resID == resID or not resID) and (x.resName == resName or not resName) and (x.atomID == atomID or not atomID) and (x.atomType == atomType or not atomType) ): atoms.append(x) if len(atoms) > 0: return atoms else: raise ValueError("COULD not find Atom with ID: " + str(atomID) + " in CNF object.") else: raise ValueError("NO POSITION block in cnf-Object: " + self.path)
[docs] def get_atom_coordinates(self) -> np.array: """ This function returns a np.array containing all system coordinates. Returns ------- np.array dims are atoms[x,y,z] """ return np.array([(a.xp, a.yp, a.zp) for a in self.POSITION])
[docs] def get_atoms_distance( self, atomI: Union[int, List[int]] = None, atomJ: int = None, atoms: Union[List[int], Dict[int, int]] = None ) -> Union[float, List[float]]: if "POSITION" in dir(self): # one if (atomI is not None and atomJ is not None) and atoms is None: ai = self.get_atomP(atomI)[0] aj = self.get_atomP(atomJ)[0] distance = _cartesian_distance(x1=ai.xp, x2=aj.xp, y1=ai.yp, y2=aj.yp, z1=ai.zp, z2=aj.zp) return distance # one atom coord set to all atoms of cnf elif atomI is not None and atomJ is None and atoms is None and type(atomI) is tuple: if len(atomI) != 3: raise ValueError("atomI-Coordinate tuple have more or less than 3 dims in get_atoms_distance") distances = {} for x in self._blocks["POSITION"].content: aj = x distance = _cartesian_distance(x1=atomI[0], x2=aj.xp, y1=atomI[1], y2=aj.yp, z1=atomI[2], z2=aj.zp) distances.update({x.atomID: distance}) return distances # one coord set + list of atoms elif atomI is not None and atomJ is None and atoms is not None and type(atomI) is tuple: if len(atomI) != 3: raise ValueError("atomI-Coordinate tuple have more or less than 3 dims in get_atoms_distance") distances = {} for x in atoms: aj = self.get_atomP(x)[0] distance = _cartesian_distance(x1=atomI[0], x2=aj.xp, y1=atomI[1], y2=aj.yp, z1=atomI[2], z2=aj.zp) distances.update({aj.atomID: distance}) return distances elif atomI is not None and atomJ is None and atoms is not None and type(atomI) is int: ai = self.get_atomP(atomI)[0] distances = {} for x in atoms: aj = self.get_atomP(x)[0] distance = _cartesian_distance(x1=ai.xp, x2=aj.xp, y1=ai.yp, y2=aj.yp, z1=ai.zp, z2=aj.zp) distances.update({x: distance}) return distances elif atomI is None and atomJ is None and atoms is not None and atoms is list: distances_all = [] for x in atoms: ai = self.get_atomP(x)[0] distances = [] for y in atoms[atoms.index(x) :]: aj = self.get_atomP(y)[0] distance = _cartesian_distance(x1=ai.xp, x2=aj.xp, y1=ai.yp, y2=aj.yp, z1=ai.zp, z2=aj.zp) distances.append(distance) distances_all.append(distances) return distances_all elif atomI is not None and atomJ is not None and atoms is not None and atoms is dict: distances_all = {} for x in atoms: ai = self.get_atomP(x) distances = {} for y in atoms[atoms.index(x) :]: aj = self.get_atomP(y) distance = _cartesian_distance(x1=ai.xp, x2=aj.xp, y1=ai.yp, y2=aj.yp, z1=ai.zp, z2=aj.zp) distances.update({y: distance}) distances_all.append({x: distances}) return distances_all else: raise ValueError( "Combination of given parameters for get_atoms_distance in cnf class is unknown!\n Please give: int int or int list or list" ) else: raise ValueError("NO POSITION block in cnf-Object: " + self.path)
[docs] def get_last_atomID(self) -> int: """get_last atom A very simple convenience function that returns the last atom Returns ------- int Returns the last atom of the system. """ return self.POSITION.content[-1].atomID
[docs] def center_of_geometry(self, selectedAtoms: List[int] = None) -> list: """calculates the center of geometry for asingle molecule or the selected Atoms Returns ------- list cog """ if "POSITION" in dir(self): cogx = 0.0 cogy = 0.0 cogz = 0.0 if selectedAtoms is None: iterator = self.POSITION.content else: iterator = [] for i in selectedAtoms: iterator.append(self.POSITION.content[i]) for i in iterator: cogx += i.xp cogy += i.yp cogz += i.zp n = len(self.POSITION.content) return [cogx / n, cogy / n, cogz / n] else: raise ValueError("NO POSITION block in cnf-Object: " + self.path)
[docs] def rotate( self, rotationCenter: np.array = None, selectedAtoms=None, alpha: float = 0, beta: float = 0, gamma: float = 0 ): # define rotation center if rotationCenter is None: rotationCenter = np.array(self.center_of_geometry()) # select atoms to rotate if selectedAtoms is None: iterator = range(len(self.POSITION.content)) # define Rotation rotation = Rotation.from_euler("XYZ", [alpha, beta, gamma], degrees=True).as_matrix() # apply rotation for i in iterator: # get atom atom = self.POSITION.content[i] vector = np.array([atom.xp, atom.yp, atom.zp]) # do rotation around rotation center new_vec = np.dot((vector - rotationCenter), rotation) + rotationCenter # rewrite atom atom.xp, atom.yp, atom.zp = new_vec self.POSITION.content[i] = atom
[docs] def supress_atomPosition_singulrarities(self) -> None: """supress_atomPosition_singulrarities This function adds a very small deviation to the position of an atom, dependent on the atom number. This might be needed to avoid singularities in gromosXX. Returns ------- None """ if "POSITION" in dir(self): for ind, atom in enumerate(self.POSITION.content): atom.xp = atom.xp + 10 ** (-7) * ind atom.yp = atom.yp - 10 ** (-7) * ind atom.zp = atom.zp - 10 ** (-7) * ind
[docs] def get_volume(self) -> float: """ This function calculates the volume of the cnf. Returns ------- float volume of the cnf. """ length = self.GENBOX.length return length[0] * length[1] * length[2]
[docs] def get_density(self, mass: float = 1, autoCalcMass: bool = False) -> float: """ This function calculates the density of the cnf. Returns ------- float density of the cnf. """ if autoCalcMass: mass = self.get_mass() return 1.66056 / self.get_volume() * mass
[docs] def get_mass(self) -> float: """ This function calculates the mass of the cnf. Returns ------- float mass of the cnf. """ mass = 0 elemMassDict = { "H": 1.0079, "C": 12.0107, "N": 14.0067, "O": 15.9994, "P": 30.9738, "S": 32.065, "F": 18.998403163, } # TODO: add all elements for atom in self.POSITION: mass += elemMassDict[atom.atomType[0]] return mass
[docs] def shift_periodic_boundary(self): """ This function is shifting the coordinates such that the solute molecule is centered, if it is placed in the corners. However, this function might break down with more complicated situations. Careful the function is not fail safe ;) """ grid = np.array(self.GENBOX.length) tmp_pos = [] for aP in self.POSITION: new_pos = ca.periodic_shift([aP.xp, aP.yp, aP.zp], grid) tmp_pos.append(new_pos) # shift total coords to minimal 0 tmp_pos = np.array(tmp_pos) - min(tmp_pos) # write new pos: result_pos = [] for new_pos, aP in zip(tmp_pos, self.POSITION): result_pos.append( blocks.atomP( xp=new_pos[0], yp=new_pos[1], zp=new_pos[2], resID=aP.resID, resName=aP.resName, atomType=aP.atomType, atomID=aP.atomID, ) ) self.POSITION = result_pos
""" generate further file """
[docs] def generate_position_restraints( self, out_path_prefix: str, residues: Union[Dict[str, List[int]], List[int]], verbose: bool = False ) -> Tuple[str, str]: """ This function generates position restraints for the selected residues. Parameters ---------- out_path_prefix : str target path prefix for the out files. residues : dict or list residues to be restrained (dict containing resname:[res ids] or list of resnames or residue IDs) verbose : bool, optional Loud and noisy, by default False Returns ------- Tuple[str, str] return the two resulting paths to the generated files. """ posres = self.write_possrespec(out_path=out_path_prefix + ".por", residues=residues, verbose=verbose) refpos = self.write_refpos(out_path=out_path_prefix + ".rpf") return posres, refpos
[docs] def gen_possrespec( self, residues: Union[Dict[str, List[int]], List[int]], keep_residues: bool = True, verbose: bool = False ) -> Position_Restraints_Type: """ This function writes out a gromos file, containing a atom list. that is to be position restrained! Raises not Implemented error, if a input variant of the residues Parameters ---------- residues : dict, list residues to be restrained (dict containing resname:[res ids] or list of resnames or residue IDs) keep_residues : bool, optional should the passed residues be kept or deleted? verbose : bool, optional loud and noisy? Returns ------- str out_path Position_Restraints posrespec-file-obj """ from pygromos.files.coord import posres delete_res = {} # dict for the atoms, that should be deleted. cnf = copy.deepcopy(self) # deepcopied cnf for output # Get all atoms not included to posres restrain and store them in delete_res try: if type(residues) == dict: # if adict was given - todo: not tested for res in cnf.residues: if res not in residues: delete_res.update({res: [-1]}) else: ids = [resi for resi in list(cnf.residues[res].keys()) if (resi not in residues[res])] delete_res.update({res: ids}) if type(residues) == list: # if a list of resIDS was given if all([type(x) == str for x in residues]): # for resn for res in cnf.residues: if keep_residues and res not in residues: delete_res.update({res: [-1]}) elif not keep_residues and res in residues: delete_res.update({res: [-1]}) elif all([type(x) == int for x in residues]): # for resids for res in cnf.residues: res_ids = cnf.residues[res] if type(res_ids) == dict: for resi in res_ids: if resi not in residues: if res in delete_res: delete_res[res].append(res_ids) else: delete_res.update({res: [res_ids]}) else: delete_res.update({res: [res_ids]}) else: raise Exception("I will be catched and translated in the except >)") except Exception as err: raise NotImplementedError("Posres _file input arg combination : Not implemented! " + "\n".join(err.args)) # Remove all not to constrain atoms: if verbose: print("delete residues: ", delete_res) for resn, resi_list in delete_res.items(): if type(resi_list) == dict: for resi in resi_list: cnf.delete_residue(resName=resn, resID=resi) else: cnf.delete_residue(resName=resn) if verbose: print("remaining: ", cnf.get_residues()) return posres.Position_Restraints(cnf)
[docs] def write_possrespec(self, out_path: str, residues: dict or list, verbose: bool = False) -> str: """write_possrespec This function writes out a gromos file, containing a atom list. that is to be position restrained! Raises not Implemented error, if a input variant of the residues Parameters ---------- out_path : str path to the outputfile residues : dict, list residues to be restrained (dict containing resname:[res ids] or list of resnames or residue IDs) verbose : bool, optional loud and noisy? Raises ------ NotImplementedError Returns ------- str out_path Position_Restraints posrespec-file-obj """ posres_class = self.gen_possrespec(residues=residues, verbose=verbose) posres_class.write(out_path) return out_path
[docs] def gen_refpos(self) -> Reference_Position_Type: from pygromos.files.coord import refpos return refpos.Reference_Position(self)
[docs] def write_refpos(self, out_path: str) -> str: refpos_class = self.gen_refpos() refpos_class.write(out_path) return out_path
""" convert file: """
[docs] def createRDKITconf(self, mol: Chem.rdchem.Mol, conversionFactor: float = 0.1): """creates a PyGromosTools CNF type from a rdkit molecule. If a conformation exists the first one will be used. Parameters ---------- mol : Chem.rdchem.Mol Molecule, possibly with a conformation conversionFactor : float the factor used to convert length from rdkit to Gromos (default: angstrom -> nano meter = 0.1) """ inchi = Chem.MolToInchi(mol).split("/") if len(inchi) >= 2: name = inchi[1] else: name = "XXX" self.__setattr__("TITLE", TITLE("\t" + name + " created from RDKit")) # check if conformations exist else create a new one if mol.GetNumConformers() < 1: mol = Chem.AddHs(mol) AllChem.EmbedMolecule(mol) AllChem.UFFOptimizeMolecule(mol) conf = mol.GetConformer(0) # fill a list with atomP types from RDKit data atomList = [] for i in range(mol.GetNumAtoms()): x = conversionFactor * conf.GetAtomPosition(i).x y = conversionFactor * conf.GetAtomPosition(i).y z = conversionFactor * conf.GetAtomPosition(i).z atomType = mol.GetAtomWithIdx(i).GetSymbol() atomList.append(blocks.atomP(resID=1, resName=name, atomType=atomType, atomID=i + 1, xp=x, yp=y, zp=z)) # set POSITION attribute self.__setattr__("POSITION", blocks.POSITION(atomList)) # Defaults set for GENBOX - for liquid sim adjust manually self.__setattr__("GENBOX", blocks.GENBOX(pbc=1, length=[4, 4, 4], angles=[90, 90, 90]))
[docs] def get_pdb(self, rdkit_ready: bool = False, connectivity_top=None) -> str: """ translate cnf to pdb. Parameters ---------- rdkit_ready: bool, optional str output was tested with RDKIT (default: False) connectivity_top: top.Top, optional if the pygromos top class is provided (containing a BOND block), then the pdb gets a connect block. Returns ------- str pdb str. """ dummy_occupancy = dummy_bfactor = dummy_charge = dummy_mass = 0.0 dummy_chain = "" if rdkit_ready: format_str = ( "HETATM {:>4} {:>4} {:>3} {:>3} {:>6.3f} {:>6.3f} {:>6.3f} {:>2.2f} {:>2.2f} {:>2}" ) frame_positions = [] for pos in self.POSITION: frame_positions.append( format_str.format( pos.atomID, pos.atomType, pos.resName[:3], pos.resID, pos.xp * 10, pos.yp * 10, pos.zp * 10, dummy_mass, int(dummy_charge), pos.atomType[0], ) ) pdb_str = "TITLE " + str(self.TITLE).replace("END", "") + "\n" pdb_str += "\n".join(frame_positions) else: # 2) CONSTUCT PDB BLOCKS # ref: https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html pdb_format = ( "ATOM {:>5d} {:>4} {:<3} {:1}{:>4d} {:>8.3f}{:>8.3f}{:>8.3f} {:>3.2f} {:>5} {:>4}{:>2}" ) # pdb_format = "ATOM %5d %-4s %3s %1s%4d %s%s%s 1.00 %5s %-4s%2s " frame_positions = [] for ind, atom in enumerate(self.POSITION): frame_positions.append( pdb_format.format( atom.atomID % 100000, atom.atomType, atom.resName, dummy_chain, atom.resID % 10000, atom.xp * 10, atom.yp * 10, atom.zp * 10, dummy_occupancy, dummy_bfactor, int(dummy_charge), atom.atomType[0], ) ) # times *10 because pdb is in A pdb_str = "TITLE " + str(self.TITLE).replace("END", "") + "\n" pdb_str += "\n".join(frame_positions) # future: # add bond block with top optionally if connectivity_top is not None and hasattr(connectivity_top, "BOND") and connectivity_top.BOND is not None: connection_block = defaultdict(list) for bond in connectivity_top.BOND: connection_block[bond.IB].append(bond.JB) out_str = "" for atomI in sorted(connection_block): connections = connection_block[atomI] substr = "".join([" {:>4}" for i in range(len(connections))]) out_str += ("CONECT {:>4}" + substr).format(atomI, *connections) + "\n" pdb_str += out_str pdb_str += "\nEND" return pdb_str
[docs] def get_xyz(self) -> str: """ translate cnf to xyz Returns ------- str in xyz format """ xyz_str = str(len(self.POSITION)) + "\n" xyz_str += "# " + str(self.TITLE.content[0]).strip() + "\t >> exported wit PyGromosTools <<\n" xyz_format = " {:<3} {:> 3.9f} {:> 3.9f} {:> 3.9f}\n" for position in self.POSITION: xyz_line = xyz_format.format(position.atomType[0], position.xp * 10, position.yp * 10, position.zp * 10) xyz_str += xyz_line return xyz_str
[docs] def write_xyz(self, out_path: str) -> str: """ This function converts the atom POS db of the traj into a xyz structure. Parameters ---------- out_path : str path, were the file should be written to. Returns ------- str outpath of the file """ return self._write_to_file(out_path=out_path, content_str=self.get_xyz())
[docs] def write_pdb(self, out_path: str) -> str: """ This function converts the atom POS db of the traj into a pdb traj. Parameters ---------- out_path : str path, were the file should be written to. Returns ------- str outpath of the file """ return self._write_to_file(out_path=out_path, content_str=self.get_pdb())
""" visualize """ @property def view(self) -> nj.NGLWidget: if not hasattr(self, "_view") or not (hasattr(self, "_view") and isinstance(self._view, nj.NGLWidget)): return self.recreate_view() else: return self._view
[docs] def get_mdtraj(self): tmpFile = tempfile.NamedTemporaryFile(suffix="_tmp.pdb") self.write_pdb(tmpFile.name) self._mdtraj = mdtraj.load_pdb(tmpFile.name) if hasattr(self, "GENBOX"): self._mdtraj.unitcell_lengths = self.GENBOX.length self._mdtraj.unitcell_angles = self.GENBOX.angles # print(tmpFile.name) #for debbuging and checking if temp file is really deleted. tmpFile.close() return self._mdtraj
[docs] def recreate_view(self) -> nj.NGLWidget: # Topo tmp file self._mdtraj = self.get_mdtraj() self._view = visualize_system(traj=self._mdtraj) return self._view
[docs] def recenter_pbc(self): """ This function is shifting the coordinates such that the solute molecule is centered, if it is placed in the corners. However, this function might break down with more complicated situations. Careful the function is not fail safe ;) """ self.get_mdtraj() self._mdtraj.image_molecules() # write new pos: result_pos = [] for new_pos, aP in zip(self._mdtraj.xyz[0], self.POSITION): result_pos.append( blocks.atomP( xp=new_pos[0], yp=new_pos[1], zp=new_pos[2], resID=aP.resID, resName=aP.resName, atomType=aP.atomType, atomID=aP.atomID, ) ) self.POSITION = result_pos