From e4f8ab36b851d99ff554e8db5348659425134b8c Mon Sep 17 00:00:00 2001 From: jessbade Date: Mon, 28 Aug 2023 09:19:10 -0700 Subject: [PATCH 1/5] Fix SMARTS conversion error --- isicle/md.py | 515 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 322 insertions(+), 193 deletions(-) diff --git a/isicle/md.py b/isicle/md.py index 1fa0a18..9fc6e50 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -10,14 +10,14 @@ from isicle.interfaces import WrapperInterface from isicle.parse import XTBParser -''' +""" Files resulting from an xtb job always run in the same directory that the command is issued in, no matter where the input is. Can direct the .log file, but no other files. -''' +""" def _program_selector(program): - ''' + """ Selects a supported molecular dynamics program for associated simulation. Currently only NWChem has been implemented. @@ -32,19 +32,20 @@ def _program_selector(program): Wrapped functionality of the selected program. Must implement :class:`~isicle.interfaces.MDWrapperInterface`. - ''' + """ - program_map = {'xtb': XTBWrapper, 'rdkit': RDKitWrapper, 'tinker': TINKERWrapper} + program_map = {"xtb": XTBWrapper, "rdkit": RDKitWrapper, "tinker": TINKERWrapper} if program.lower() in program_map.keys(): return program_map[program.lower()]() else: raise ValueError( - '{} not a supported molecular dynamics program.'.format(program)) + "{} not a supported molecular dynamics program.".format(program) + ) -def md(geom, program='xtb', **kwargs): - ''' +def md(geom, program="xtb", **kwargs): + """ Optimize geometry via molecular dyanmics using supplied forcefield and basis set. @@ -58,14 +59,14 @@ def md(geom, program='xtb', **kwargs): result Object containing relevant outputs from the simulation. - ''' + """ # Select program return _program_selector(program).run(geom, **kwargs) class XTBWrapper(XYZGeometry, WrapperInterface): - ''' + """ Wrapper for xtb functionality. Implements :class:`~isicle.interfaces.WrapperInterface` to ensure required methods are exposed. @@ -84,18 +85,17 @@ class XTBWrapper(XYZGeometry, WrapperInterface): job_list : str List of commands for simulation. - ''' + """ - _defaults = ['geom'] + _defaults = ["geom"] _default_value = None def __init__(self, **kwargs): - self.__dict__.update(dict.fromkeys( - self._defaults, self._default_value)) + self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) self.__dict__.update(**kwargs) def set_geometry(self, geom): - ''' + """ Set :obj:`~isicle.geometry.Geometry` instance for simulation. Parameters @@ -103,7 +103,7 @@ def set_geometry(self, geom): geom : :obj:`~isicle.geometry.Geometry` Molecule representation. - ''' + """ # Assign geometry self.geom = geom @@ -112,8 +112,8 @@ def set_geometry(self, geom): # Save geometry self.save_geometry() - def save_geometry(self, fmt='xyz'): - ''' + def save_geometry(self, fmt="xyz"): + """ Save internal :obj:`~isicle.geometry.Geometry` representation to file. Parameters @@ -122,20 +122,22 @@ def save_geometry(self, fmt='xyz'): Filetype used by xtb. Must be "xyz", "smi", ".inchi", ".mol", ".xyz", ".pdb", ".pkl". - ''' + """ # Path operationspyth self.temp_dir = isicle.utils.mkdtemp() self.fmt = fmt.lower() - geomfile = os.path.join(self.temp_dir, - '{}.{}'.format(self.basename, - self.fmt.lower())) + geomfile = os.path.join( + self.temp_dir, "{}.{}".format(self.basename, self.fmt.lower()) + ) # All other formats isicle.io.save(geomfile, self.geom) self.geom.path = geomfile - def _configure_xtb(self, forcefield='gfn2', optlevel='normal', charge=None, solvation=None): - ''' + def _configure_xtb( + self, forcefield="gfn2", optlevel="normal", charge=None, solvation=None + ): + """ Set command line for xtb simulations. Parameters @@ -152,39 +154,50 @@ def _configure_xtb(self, forcefield='gfn2', optlevel='normal', charge=None, solv Charge of molecular system. Default : 0 (Neutral charge) - ''' + """ # Add base command - s = 'xtb ' + s = "xtb " # Add geometry - s += '{}.{}'.format(self.basename, self.fmt.lower()) + s += "{}.{}".format(self.basename, self.fmt.lower()) # Add optimize tag - s += ' --opt ' + optlevel + ' ' + s += " --opt " + optlevel + " " # Add forcefield - s += '--' + forcefield + ' ' + s += "--" + forcefield + " " # Add optional charge if charge is not None: - s += '--chrg ' + charge + ' ' + s += "--chrg " + charge + " " # Add optional implicit solvation if solvation is not None: - s += '--alpb ' + solvation + ' ' + s += "--alpb " + solvation + " " # Add output - s += '&>' + ' ' + s += "&>" + " " - s += '{}.{}'.format(self.basename, "out") + s += "{}.{}".format(self.basename, "out") return s - def _configure_crest(self, ewin=6, optlevel='Normal', forcefield='gfn2', - protonate=False, deprotonate=False, tautomerize=False, - ion=None, charge=None, dryrun=False, processes=1, - solvation=None, ignore_topology=False): - ''' + def _configure_crest( + self, + ewin=6, + optlevel="Normal", + forcefield="gfn2", + protonate=False, + deprotonate=False, + tautomerize=False, + ion=None, + charge=None, + dryrun=False, + processes=1, + solvation=None, + ignore_topology=False, + ): + """ Set command line for crest simulations. Parameters @@ -215,67 +228,75 @@ def _configure_crest(self, ewin=6, optlevel='Normal', forcefield='gfn2', charge : int Charge of molecular system. Default : 0 (Neutral charge) - ''' + """ # Start base command - s = 'crest ' + s = "crest " # Add geometry - s += str(os.path.join(self.temp_dir, - '{}.{}'.format(self.basename, - self.fmt.lower()))) + s += str( + os.path.join(self.temp_dir, "{}.{}".format(self.basename, self.fmt.lower())) + ) - s += ' ' + s += " " # Add optional tag if protonate: - s += '-protonate ' + s += "-protonate " elif deprotonate: - s += '-deprotonate ' + s += "-deprotonate " elif tautomerize: - s += '-tautomerize ' + s += "-tautomerize " if ion is not None: - s += '-swel ' + ion + ' ' + s += "-swel " + ion + " " if charge is not None: - s += '-chrg ' + str(charge) + ' ' + s += "-chrg " + str(charge) + " " # Add dryrun option if dryrun: - s += '--dryrun ' + s += "--dryrun " # Add energy window - s += '--ewin ' + str(ewin) + ' ' + s += "--ewin " + str(ewin) + " " # Add optlevel - s += '--optlevel ' + optlevel + ' ' + s += "--optlevel " + optlevel + " " # Add forcefield - s += '-' + forcefield + ' ' + s += "-" + forcefield + " " # Add optional solvation if solvation is not None: - s += '--alpb ' + solvation + ' ' + s += "--alpb " + solvation + " " # Number of processes - s += '-T ' + str(processes) + ' ' + s += "-T " + str(processes) + " " if ignore_topology: - s += '--noreftopo ' + s += "--noreftopo " # Add output - s += '&>' + ' ' + s += "&>" + " " - s += os.path.join(self.temp_dir, - '{}.{}'.format(self.basename, - "out")) + s += os.path.join(self.temp_dir, "{}.{}".format(self.basename, "out")) return s - def configure(self, task='optimize', forcefield='gfn2', charge=None, - ewin=6, ion=None, optlevel='Normal', dryrun=False, processes=1, - solvation=None, ignore_topology=False): - ''' + def configure( + self, + task="optimize", + forcefield="gfn2", + charge=None, + ewin=6, + ion=None, + optlevel="Normal", + dryrun=False, + processes=1, + solvation=None, + ignore_topology=False, + ): + """ Generate command line Parameters @@ -300,57 +321,59 @@ def configure(self, task='optimize', forcefield='gfn2', charge=None, charge : int Charge of molecular system. Default : 0 (Neutral charge) - ''' + """ if type(task) == list: - raise TypeError('Initiate one xtb or crest job at a time.') + raise TypeError("Initiate one xtb or crest job at a time.") if type(forcefield) == list: - raise TypeError('Initiate one forcefield at a time.') + raise TypeError("Initiate one forcefield at a time.") if type(optlevel) == list: - raise TypeError('Initiate one opt level at a time.') + raise TypeError("Initiate one opt level at a time.") - if task == 'optimize': - config = self._configure_xtb(optlevel=optlevel, - forcefield=forcefield) + if task == "optimize": + config = self._configure_xtb(optlevel=optlevel, forcefield=forcefield) else: - if task == 'conformer': + if task == "conformer": p, d, t, i = False, False, False, None - elif task == 'protonate': + elif task == "protonate": p, d, t, i = True, False, False, ion - elif task == 'deprotonate': + elif task == "deprotonate": p, d, t, i = False, True, False, ion - elif task == 'tautomerize': + elif task == "tautomerize": p, d, t, i = False, False, True, ion if p is not None: - config = self._configure_crest(ewin=ewin, - optlevel=optlevel, - forcefield=forcefield, - protonate=p, - deprotonate=d, - tautomerize=t, - ion=i, - charge=charge, - dryrun=dryrun, - processes=processes, - solvation=solvation, - ignore_topology=ignore_topology) + config = self._configure_crest( + ewin=ewin, + optlevel=optlevel, + forcefield=forcefield, + protonate=p, + deprotonate=d, + tautomerize=t, + ion=i, + charge=charge, + dryrun=dryrun, + processes=processes, + solvation=solvation, + ignore_topology=ignore_topology, + ) else: raise Error( - 'Task not assigned properly, please choose optimize, conformer, protonate, deprotonate, or tautomerize') + "Task not assigned properly, please choose optimize, conformer, protonate, deprotonate, or tautomerize" + ) self.task = task self.config = config def submit(self): - ''' + """ Run xtb or crest simulation according to configured inputs. - ''' + """ owd = os.getcwd() os.chdir(self.temp_dir) job = self.config @@ -358,25 +381,24 @@ def submit(self): os.chdir(owd) def finish(self): - ''' + """ Parse results, save xtb output files, and clean temporary directory - ''' + """ parser = XTBParser() - parser.load(os.path.join(self.temp_dir, self.basename + '.out')) - self.output = parser.load(os.path.join( - self.temp_dir, self.basename + '.out')) + parser.load(os.path.join(self.temp_dir, self.basename + ".out")) + self.output = parser.load(os.path.join(self.temp_dir, self.basename + ".out")) result = parser.parse() self.__dict__.update(result) for i in self.geom: - i.add___dict__({k: v for k, v in result.items() if k != 'geom'}) + i.add___dict__({k: v for k, v in result.items() if k != "geom"}) i.__dict__.update(basename=self.basename) - if self.task != 'optimize': + if self.task != "optimize": conformerID = 1 for i in self.geom: i.__dict__.update(conformerID=conformerID) @@ -386,7 +408,7 @@ def finish(self): self.geom = self.geom[0] def run(self, geom, **kwargs): - ''' + """ Optimize geometry via density functional theory using supplied functional and basis set. @@ -403,7 +425,7 @@ def run(self, geom, **kwargs): :obj:`~isicle.md.XTBWrapper` Wrapper object containing relevant outputs from the simulation. - ''' + """ # New instance self = XTBWrapper() @@ -423,7 +445,7 @@ def run(self, geom, **kwargs): return self def get_structures(self): - ''' + """ Extract all structures from containing object as a conformational ensemble. Returns @@ -431,33 +453,35 @@ def get_structures(self): :obj:`~isicle.conformers.ConformationalEnsemble` Conformational ensemble. - ''' + """ if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): return self.geom raise TypeError( - 'Object does not contain multiple structures. Use `get_structure` instead.') + "Object does not contain multiple structures. Use `get_structure` instead." + ) def get_structure(self): - ''' + """ Extract structure from containing object. Returns ------- :obj:`~isicle.geometry.XYZGeometry` - Structure instance. + Structure instance. - ''' + """ if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): raise TypeError( - 'Object contains multiple structures. Use `get_structures` instead.') + "Object contains multiple structures. Use `get_structures` instead." + ) return self.geom class RDKitWrapper(Geometry, WrapperInterface): - ''' + """ Wrapper for rdkit functionality. Implements :class:`~isicle.interfaces.WrapperInterface` to ensure required methods are exposed. @@ -476,14 +500,13 @@ class RDKitWrapper(Geometry, WrapperInterface): job_list : str List of commands for simulation. - ''' + """ - _defaults = ['geom'] + _defaults = ["geom"] _default_value = None def __init__(self, **kwargs): - self.__dict__.update(dict.fromkeys( - self._defaults, self._default_value)) + self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) self.__dict__.update(**kwargs) def set_geometry(self): @@ -504,7 +527,7 @@ def finish(self): class TINKERWrapper(Geometry, WrapperInterface): - ''' + """ Wrapper for TINKER functionality. Implements :class:`~isicle.interfaces.WrapperInterface` to ensure required methods are exposed. @@ -523,77 +546,175 @@ class TINKERWrapper(Geometry, WrapperInterface): job_list : str List of commands for simulation. - ''' + """ - _defaults = ['geom'] + _defaults = ["geom"] _default_value = None def __init__(self, **kwargs): - self.__dict__.update(dict.fromkeys( - self._defaults, self._default_value)) + self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) self.__dict__.update(**kwargs) - def _convert_to_tinkerxyz(self): - ''' - Convert mol to TINKER XYZ format using code from DP5, Goodman Lab. + """ + Convert mol to TINKER XYZ format using code from DP5, Goodman Lab. Parameters ---------- geom : :obj: `~isicle.geometry.Geometry` Molecule representation. - ''' + """ # getting MMFF values for large atom types - def getMMFF_large_atom_type(mmff_props , atom, m): - - small_to_large_list = [[[]], + def getMMFF_large_atom_type(mmff_props, atom, m): + small_to_large_list = [ + [[]], [[1]], - [[3, "C"], [2,"C=C"]], - [[4,"C=O"], [5,"C=N"], [6,"NC(N)=N"], [7,"CC=O"], [8,"NC=O"], [10,"NC(=O)N"], [11,"OC=O"], [12,"NC(=O)O"], [13,"NC(=O)O"], [14,"OC(=O)O"], [15,"SC=O"], [16,"NC=S"], [17,"C=S(O)O"], [18,"C=S=O"], [19,"SC=S"], [20,"C=P"]], - [[21,"C#[C,N]"], [22,"[C,N,O]=C=[C,N,O]"]], - [[23,"C[H]"], [24,"[Si][H]"]], - [[41,"O"], [25,"OC"], [26,"OC=O"], [27,"OC=C"], [27,"Occ"], [28,"OC=N"], [29,"OC=S"], [31,"ON=O"], [30,"O[N+]([O-])=O"], [36,"OS"], [34,"OSO"], [35,"OS=O"], [33,"OS(O)=O"], [32,"OS(O)(=O)=O"], [40,"OP"], [39,"OPO"], [38,"OP(=O)O"], [37,"OP(=O)(=O)O"]], - [[42,"C=O"], [44,"CC=O"], [43,"NC=O"], [45,"OC=O"], [46,"O=N"], [47,"S=O"], [48,"[C,N]=S=O"]], + [[3, "C"], [2, "C=C"]], + [ + [4, "C=O"], + [5, "C=N"], + [6, "NC(N)=N"], + [7, "CC=O"], + [8, "NC=O"], + [10, "NC(=O)N"], + [11, "OC=O"], + [12, "NC(=O)O"], + [13, "NC(=O)O"], + [14, "OC(=O)O"], + [15, "SC=O"], + [16, "NC=S"], + [17, "C=S(O)O"], + [18, "C=S=O"], + [19, "SC=S"], + [20, "C=P"], + ], + [[21, "C#[C,N]"], [22, "[C,N,O]=C=[C,N,O]"]], + [[23, "C[H]"], [24, "[Si][H]"]], + [ + [41, "O"], + [25, "OC"], + [26, "OC=O"], + [27, "OC=C"], + [27, "Occ"], + [28, "OC=N"], + [29, "OC=S"], + [31, "ON=O"], + [30, "O[N+]([O-])=O"], + [36, "OS"], + [34, "OSO"], + [35, "OS=O"], + [33, "OS(O)=O"], + [32, "OS(O)(=O)=O"], + [40, "OP"], + [39, "OPO"], + [38, "OP(=O)O"], + [37, "OP(=O)(=O)O"], + ], + [ + [42, "C=O"], + [44, "CC=O"], + [43, "NC=O"], + [45, "OC=O"], + [46, "O=N"], + [47, "S=O"], + [48, "[C,N]=S=O"], + ], [[49]], - [[50,"C=N"], [51,"N=N"]], - [[52,"NC=O"], [53,"NC=S"], [54,"NN=C"], [55,"NN=N"]], + [[50, "C=N"], [51, "N=N"]], + [[52, "NC=O"], [53, "NC=S"], [54, "NN=C"], [55, "NN=N"]], [[56]], [[57]], [[58]], [[59]], [[60]], [[61]], - [[62,"S=O"], [63,"S=N"]], - [[64,"O=S=O"], [70,"OSN"], [65,"N-S(=O)=O"], [66,"OS(O)O"], [67,"C"], [68,"OS(O)(O)O"], [69,"CS(O)(O)C"]], + [[62, "S=O"], [63, "S=N"]], + [ + [64, "O=S=O"], + [70, "OSN"], + [65, "N-S(=O)=O"], + [66, "OS(O)O"], + [67, "C"], + [68, "OS(O)(O)O"], + [69, "CS(O)(O)C"], + ], [[71]], [[72]], - [[74,"[H]O"], [73,"[H]OC"], [75,"[H][O-]"]], + [[74, "[H]O"], [73, "[H]OC"], [75, "[H][O-]"]], [[76]], - [[82,"[H]N"], [77,"[H]N(C)C"], [78,"[H]N([H])[H]"], [79,"[H]n1cccc1"], [80,"[H]NO"], [81,"[H][N-]"]], - [[83,"[H]OC=O"], [84,"[H]OP"]], - [[89,"P"], [88,"PO"], [87,"OPO"], [86,"OP(O)O"], [89,"OP(O)(O)O"]], + [ + [82, "[H]N"], + [77, "[H]N(C)C"], + [78, "[H]N([H])[H]"], + [79, "[H]n1cccc1"], + [80, "[H]NO"], + [81, "[H][N-]"], + ], + [[83, "[H]OC=O"], [84, "[H]OP"]], + [[89, "P"], [88, "PO"], [87, "OPO"], [86, "OP(O)O"], [89, "OP(O)(O)O"]], [[90]], - [[91,"[H]N=N"], [92,"[H]N=C"]], - [[102,"[H]N"], [93,"[H]NC=O"], [94,"[H]NC=S"], [95,"[H]NC=C"], [96,"[H]NC=N"], [97,"[H]NN=C"], [98,"[H]NN=N"], [99,"[H]NS=O"], [100,"[H]NP=O"], [101,"HN#[C,N]"]], - [[103,"[H]OC=C"], [104,"[C]OC=N"]], + [[91, "[H]N=N"], [92, "[H]N=C"]], + [ + [102, "[H]N"], + [93, "[H]NC=O"], + [94, "[H]NC=S"], + [95, "[H]NC=C"], + [96, "[H]NC=N"], + [97, "[H]NN=C"], + [98, "[H]NN=N"], + [99, "[H]NS=O"], + [100, "[H]NP=O"], + [101, "[H]N#[C,N]"], + ], + [[103, "[H]OC=C"], [104, "[C]OC=N"]], [[105]], [[106]], - [[107,"[O-]C=O"], [108,"NO"], [109,"ON=O"], [110,"O[N+]([O-])=O"], [111,"[O-][N+]([O-])=O"], [112,"OS"], [113,"OS=O"], [114,"OS(=O)=O"], [115,"OS(=O)(=O)O"], [116,"O=[S-]S"], [117,"OP"], [118,"OPO"], [119,"OP(=O)O"], [120,"OP(=O)(=O))"], [121,"OCl(=O)(=O)[O-]"]], + [ + [107, "[O-]C=O"], + [108, "NO"], + [109, "ON=O"], + [110, "O[N+]([O-])=O"], + [111, "[O-][N+]([O-])=O"], + [112, "OS"], + [113, "OS=O"], + [114, "OS(=O)=O"], + [115, "OS(=O)(=O)O"], + [116, "O=[S-]S"], + [117, "OP"], + [118, "OPO"], + [119, "OP(=O)O"], + [120, "OP(=O)(=O)"], + [121, "OCl(=O)(=O)[O-]"], + ], [[122]], [[123]], - [[124,"[O-]"], [125,"[O-]C=[C,N]"]], - [[126,"[H][N+][H,C][H,C][H,C]"], [127,"C1=[NH+]C=CN1"], [128,"C1=C[NH+]=CC=C1"], [129,"CC(N)=[NH2+]"], [130,"C=[NH2+]"], [131,"NC(N)=[NH2+]"], [132,"[H]N([H])([H])([H])[H]"]], + [[124, "[O-]"], [125, "[O-]C=[C,N]"]], + [ + [126, "[H][N+][H,C][H,C][H,C]"], + [127, "C1=[NH+]C=CN1"], + [128, "C1=C[NH+]=CC=C1"], + [129, "CC(N)=[NH2+]"], + [130, "C=[NH2+]"], + [131, "NC(N)=[NH2+]"], + [132, "[H]N([H])([H])([H])[H]"], + ], [[133]], [[134]], [[135]], - [[136,"NC=C"], [137,"NC=N"], [138,"NC=P"], [139,"NC#C"]], - [[140,"[O-]C=O"], [141,"[S-]C=S"]], + [[136, "NC=C"], [137, "NC=N"], [138, "NC=P"], [139, "NC#C"]], + [[140, "[O-]C=O"], [141, "[S-]C=S"]], [[142]], - [[143,"NS(=O)O"], [144,"NS(=O)(=O)O"], [145,"NP(=O)O"], [146,"NP(=O)(=O)O"], [147,"NC#N"]], + [ + [143, "NS(=O)O"], + [144, "NS(=O)(=O)O"], + [145, "NP(=O)O"], + [146, "NP(=O)(=O)O"], + [147, "NC#N"], + ], [[148]], - [[149,"ON=O"], [150,"O[N+][O-]=O"]], + [[149, "ON=O"], [150, "O[N+][O-]=O"]], [[151]], [[152]], [[153]], @@ -602,10 +723,10 @@ def getMMFF_large_atom_type(mmff_props , atom, m): [[156]], [[157]], [[158]], - [[159,"[N+]=C"], [160,"[N+=N]"]], + [[159, "[N+]=C"], [160, "[N+]=N"]], [[161]], [[162]], - [[163,"NC(N)=[NH2+]"], [164,"[N+]=CN"]], + [[163, "NC(N)=[NH2+]"], [164, "[N+]=CN"]], [[165]], [[166]], [[167]], @@ -619,9 +740,9 @@ def getMMFF_large_atom_type(mmff_props , atom, m): [[175]], [[176]], [[177]], - [[178,"[H]S"], [179,"[H]S=N"], [180,"[H]P"]], - [[181,"SP"], [183,"[S-]"], [182,"[S-]C=S"], [184,"[S-]S(=O)"]], - [[185,"[O-]S=O"], [186,"[O-]S=S"]], + [[178, "[H]S"], [179, "[H]S=N"], [180, "[H]P"]], + [[181, "SP"], [183, "[S-]"], [182, "[S-]C=S"], [184, "[S-]S(=O)"]], + [[185, "[O-]S=O"], [186, "[O-]S=S"]], [[187]], [[188]], [[189]], @@ -629,8 +750,17 @@ def getMMFF_large_atom_type(mmff_props , atom, m): [[191]], [[192]], [[193]], - [[194,"N[N+]1=CNC=C1"], [195,"[H][N+]([H])([H])([H])[H]"], [196,"[H][N+]([H])([H])([H])[H]"], [197,"[H][N+]([H])([H])([H])[H]"]], - [[198,"[H][N+]([H])([H])([H])[H]"],[199,"[H][N+]([H])([H])([H])[H]"],[200,"[H][N+]([H])([H])([H])[H]"]], + [ + [194, "N[N+]1=CNC=C1"], + [195, "[H][N+]([H])([H])([H])[H]"], + [196, "[H][N+]([H])([H])([H])[H]"], + [197, "[H][N+]([H])([H])([H])[H]"], + ], + [ + [198, "[H][N+]([H])([H])([H])[H]"], + [199, "[H][N+]([H])([H])([H])[H]"], + [200, "[H][N+]([H])([H])([H])[H]"], + ], [[-1]], [[-1]], [[-1]], @@ -643,11 +773,12 @@ def getMMFF_large_atom_type(mmff_props , atom, m): [[206]], [[207]], [[208]], - [[209,"[Zn]"], [210,"Zn++"]], + [[209, "[Zn]"], [210, "[Zn++]"]], [[211]], [[212]], [[213]], - [[214]]] + [[214]], + ] MMFF_small_atom_type = mmff_props.GetMMFFAtomType(atom.GetIdx()) MMFF_large_atom_type = small_to_large_list[MMFF_small_atom_type][0][0] @@ -663,34 +794,36 @@ def getMMFF_large_atom_type(mmff_props , atom, m): conf = mol.GetConformer() mmff_props = AllChem.MMFFGetMoleculeProperties(mol) - xyz = '' + xyz = "" # Set header line with number of atoms and basename - xyz += "{:>6} {}\n".format(mol.GetNumAtoms(),self.basename) + xyz += "{:>6} {}\n".format(mol.GetNumAtoms(), self.basename) for atom in mol.GetAtoms(): bond_list = [] attached_atoms = "" for connection in atom.GetNeighbors(): - bond_list.append(connection.GetIdx()+1) + bond_list.append(connection.GetIdx() + 1) bond_list.sort() for connection in bond_list: - attached_atoms += "{:>5}".format(str(connection))+" " - - xyz += "{:>6} {:>2} {:13.6f} {:11.6f} {:11.6f} {:>5} {}\n".format(atom.GetIdx()+1, - atom.GetSymbol(), - list(conf.GetAtomPosition(atom.GetIdx()))[0], - list(conf.GetAtomPosition(atom.GetIdx()))[1], - list(conf.GetAtomPosition(atom.GetIdx()))[2], - getMMFF_large_atom_type(mmff_props , atom, mol), - attached_atoms) - + attached_atoms += "{:>5}".format(str(connection)) + " " + + xyz += "{:>6} {:>2} {:13.6f} {:11.6f} {:11.6f} {:>5} {}\n".format( + atom.GetIdx() + 1, + atom.GetSymbol(), + list(conf.GetAtomPosition(atom.GetIdx()))[0], + list(conf.GetAtomPosition(atom.GetIdx()))[1], + list(conf.GetAtomPosition(atom.GetIdx()))[2], + getMMFF_large_atom_type(mmff_props, atom, mol), + attached_atoms, + ) + return xyz def set_geometry(self, geom): - ''' + """ Set :obj:`~isicle.geometry.Geometry` instance for simulation. Parameters @@ -698,7 +831,7 @@ def set_geometry(self, geom): geom : :obj:`~isicle.geometry.Geometry` Molecule representation. - ''' + """ # Assign geometry self.geom = geom @@ -709,8 +842,8 @@ def set_geometry(self, geom): # Save geometry self.save_geometry() - def save_geometry(self, fmt='xyz'): - ''' + def save_geometry(self, fmt="xyz"): + """ Save internal :obj:`~isicle.geometry.Geometry` representation to file. Parameters @@ -719,30 +852,28 @@ def save_geometry(self, fmt='xyz'): Filetype used by xtb. Must be "xyz", "smi", ".inchi", ".mol", ".xyz", ".pdb", ".pkl". - ''' + """ # Path operationspyth self.temp_dir = isicle.utils.mkdtemp() self.fmt = fmt.lower() - geomfile = os.path.join(self.temp_dir, - '{}.{}'.format(self.basename, - self.fmt.lower())) + geomfile = os.path.join( + self.temp_dir, "{}.{}".format(self.basename, self.fmt.lower()) + ) - with open(geomfile, 'w+') as f: + with open(geomfile, "w+") as f: f.write(self.tinkerxyz) f.close() self.geom.path = geomfile - def configure(self, task='scan', tinker_path='~/tinker'): - - config = tinker_path + '/bin/' + task + ' ' + self.geom.path + ' ' - config += tinker_path + '/params/mmff.prm 0 10 20 0.00001 ' - config += '| tee ./'+self.basename + '.tout' + def configure(self, task="scan", tinker_path="~/tinker"): + config = tinker_path + "/bin/" + task + " " + self.geom.path + " " + config += tinker_path + "/params/mmff.prm 0 10 20 0.00001 " + config += "| tee ./" + self.basename + ".tout" self.config = config def submit(self): - owd = os.getcwd() os.chdir(self.temp_dir) job = self.config @@ -750,22 +881,20 @@ def submit(self): os.chdir(owd) def finish(self): - parser = TINKERParser() - parser.load(os.path.join(self.temp_dir, self.basename + '.tout')) - self.output = parser.load(os.path.join( - self.temp_dir, self.basename + '.tout')) + parser.load(os.path.join(self.temp_dir, self.basename + ".tout")) + self.output = parser.load(os.path.join(self.temp_dir, self.basename + ".tout")) result = parser.parse() self.__dict__.update(result) for i in self.geom: - i.add___dict__({k: v for k, v in result.items() if k != 'geom'}) + i.add___dict__({k: v for k, v in result.items() if k != "geom"}) i.__dict__.update(basename=self.basename) - if self.task != 'optimize': + if self.task != "optimize": conformerID = 1 for i in self.geom: i.__dict__.update(conformerID=conformerID) @@ -775,7 +904,7 @@ def finish(self): self.geom = self.geom[0] def run(self, geom, **kwargs): - ''' + """ Take geometry and conduct specified tasks using TINKER Parameters @@ -791,7 +920,7 @@ def run(self, geom, **kwargs): :obj:`~isicle.md.TINKERWrapper` Wrapper object containing relevant outputs from the simulation. - ''' + """ # New instance self = TINKERWrapper() @@ -811,4 +940,4 @@ def run(self, geom, **kwargs): return self def finish(self): - return \ No newline at end of file + return From f2f1537514a10d4b7eb5c83dae46792c27b00370 Mon Sep 17 00:00:00 2001 From: jessbade Date: Tue, 29 Aug 2023 08:45:36 -0700 Subject: [PATCH 2/5] Update TINKER parsing --- isicle/parse.py | 1107 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 836 insertions(+), 271 deletions(-) diff --git a/isicle/parse.py b/isicle/parse.py index 8a46935..6a8b890 100644 --- a/isicle/parse.py +++ b/isicle/parse.py @@ -8,8 +8,9 @@ from openbabel import pybel import isicle + class NWChemParser(FileParserInterface): - '''Extract text from an NWChem simulation output file.''' + """Extract text from an NWChem simulation output file.""" def __init__(self): self.contents = None @@ -17,23 +18,22 @@ def __init__(self): self.path = None def load(self, path: str): - '''Load in the data file''' - with open(path, 'r') as f: + """Load in the data file""" + with open(path, "r") as f: self.contents = f.readlines() self.path = path return self.contents def _parse_geometry(self): search = os.path.dirname(self.path) - geoms = sorted(glob.glob(os.path.join(search, '*.xyz'))) - + geoms = sorted(glob.glob(os.path.join(search, "*.xyz"))) + if len(geoms) > 0: return isicle.io.load(geoms[-1]) raise Exception def _parse_energy(self): - # TO DO: Add Initial energy and final energy if different # Init @@ -41,14 +41,13 @@ def _parse_energy(self): # Cycle through file for line in self.contents: - if 'Total DFT energy' in line: + if "Total DFT energy" in line: # Overwrite last saved energy energy = float(line.split()[-1]) return energy def _parse_shielding(self): - # Init ready = False shield_idxs = [] @@ -74,10 +73,8 @@ def _parse_shielding(self): shield_idxs.append(int(idx)) if len(shields) > 1: - return {'index': shield_idxs, - 'atom': shield_atoms, - 'shielding': shields} - + return {"index": shield_idxs, "atom": shield_atoms, "shielding": shields} + raise Exception def _parse_spin(self): @@ -112,8 +109,12 @@ def _parse_spin(self): g = float(line[5]) g_factor.append(g) - return {'pair indices': coup_pairs, 'spin couplings': coup, - 'index': index, 'g-tensors': g_factor} + return { + "pair indices": coup_pairs, + "spin couplings": coup, + "index": index, + "g-tensors": g_factor, + } def _parse_frequency(self): # TO DO: Add freq intensities @@ -129,41 +130,58 @@ def _parse_frequency(self): has_frequency = None for i, line in enumerate(self.contents): - if ('geometry' in line) and (natoms is None): + if ("geometry" in line) and (natoms is None): atom_start = i + 7 - if ('Atomic Mass' in line) and (natoms is None): + if ("Atomic Mass" in line) and (natoms is None): atom_stop = i - 2 natoms = atom_stop - atom_start + 1 - if 'Normal Eigenvalue' in line: + if "Normal Eigenvalue" in line: has_frequency = True freq_start = i + 3 freq_stop = i + 2 + 3 * natoms # Get values - if 'Zero-Point correction to Energy' in line: - zpe = line.rstrip().split('=')[-1] + if "Zero-Point correction to Energy" in line: + zpe = line.rstrip().split("=")[-1] - if 'Thermal correction to Enthalpy' in line: - enthalpies = line.rstrip().split('=')[-1] + if "Thermal correction to Enthalpy" in line: + enthalpies = line.rstrip().split("=")[-1] - if 'Total Entropy' in line: - entropies = line.rstrip().split('=')[-1] + if "Total Entropy" in line: + entropies = line.rstrip().split("=")[-1] - if 'constant volume heat capacity' in line: - capacities = line.rstrip().split('= ')[-1] + if "constant volume heat capacity" in line: + capacities = line.rstrip().split("= ")[-1] if has_frequency is True: - freq = np.array([float(x.split()[1]) for x in self.contents[freq_start:freq_stop + 1]]) - intensity_au = np.array([float(x.split()[3]) for x in self.contents[freq_start:freq_stop + 1]]) - intensity_debyeangs = np.array([float(x.split()[4]) for x in self.contents[freq_start:freq_stop + 1]]) - intensity_KMmol = np.array([float(x.split()[5]) for x in self.contents[freq_start:freq_stop + 1]]) - intensity_arbitrary = np.array([float(x.split()[6]) for x in self.contents[freq_start:freq_stop + 1]]) - - return {'frequencies': freq, 'intensity atomic units': intensity_au, 'intensity (debye/angs)**2': intensity_debyeangs, - 'intensity (KM/mol)': intensity_KMmol, 'intensity arbitrary': intensity_arbitrary, - 'correction to enthalpy': enthalpies, 'total entropy': entropies, - 'constant volume heat capacity': capacities, 'zero-point correction': zpe} - + freq = np.array( + [float(x.split()[1]) for x in self.contents[freq_start : freq_stop + 1]] + ) + intensity_au = np.array( + [float(x.split()[3]) for x in self.contents[freq_start : freq_stop + 1]] + ) + intensity_debyeangs = np.array( + [float(x.split()[4]) for x in self.contents[freq_start : freq_stop + 1]] + ) + intensity_KMmol = np.array( + [float(x.split()[5]) for x in self.contents[freq_start : freq_stop + 1]] + ) + intensity_arbitrary = np.array( + [float(x.split()[6]) for x in self.contents[freq_start : freq_stop + 1]] + ) + + return { + "frequencies": freq, + "intensity atomic units": intensity_au, + "intensity (debye/angs)**2": intensity_debyeangs, + "intensity (KM/mol)": intensity_KMmol, + "intensity arbitrary": intensity_arbitrary, + "correction to enthalpy": enthalpies, + "total entropy": entropies, + "constant volume heat capacity": capacities, + "zero-point correction": zpe, + } + raise Exception def _parse_charge(self): @@ -174,13 +192,12 @@ def _parse_charge(self): ready = False for line in self.contents: - # Load charges from table - if 'Atom Charge Shell Charges' in line: + if "Atom Charge Shell Charges" in line: # Table header found. Overwrite anything saved previously ready = True charges = [] - elif ready is True and line.strip() in ['', 'Line search:']: + elif ready is True and line.strip() in ["", "Line search:"]: # Table end found ready = False elif ready is True: @@ -205,17 +222,18 @@ def _parse_charge(self): charges = charges[1:] # Process charge information - df = pd.DataFrame([x.split()[0:4] for x in charges], - columns=['idx', 'Atom', 'Number', 'Charge']) - df.Number = df.Number.astype('int') - df.Charge = df.Number - df.Charge.astype('float') + df = pd.DataFrame( + [x.split()[0:4] for x in charges], + columns=["idx", "Atom", "Number", "Charge"], + ) + df.Number = df.Number.astype("int") + df.Charge = df.Number - df.Charge.astype("float") return df.Charge.values raise Exception def _parse_timing(self): - # Init indices = [] preoptTime = 0 @@ -228,44 +246,46 @@ def _parse_timing(self): freq = False for i, line in enumerate(self.contents): - # ? - if 'No.' in line and len(indices) == 0: + if "No." in line and len(indices) == 0: indices.append(i + 2) # 0 - elif 'Atomic Mass' in line and len(indices) == 1: + elif "Atomic Mass" in line and len(indices) == 1: indices.append(i - 1) # 1 indices.append(i + 3) # 2 - elif 'Effective nuclear repulsion energy' in line and len(indices) == 3: + elif "Effective nuclear repulsion energy" in line and len(indices) == 3: indices.append(i - 2) # 3 # Check for optimization and frequency calcs - if 'NWChem geometry Optimization' in line: + if "NWChem geometry Optimization" in line: opt = True - elif 'NWChem Nuclear Hessian and Frequency Analysis' in line: + elif "NWChem Nuclear Hessian and Frequency Analysis" in line: freq = True # Get timing - if 'Total iterative time' in line and opt is False: - preoptTime += float(line.rstrip().split('=')[1].split('s')[0]) - elif 'Total iterative time' in line and opt is True and freq is False: - geomoptTime += float(line.rstrip().split('=')[1].split('s')[0]) - elif 'Total iterative time' in line and freq is True: - freqTime += float(line.rstrip().split('=')[1].split('s')[0]) - - if 'Total times' in line: - cpuTime = float(line.rstrip().split(':')[1].split('s')[0]) + if "Total iterative time" in line and opt is False: + preoptTime += float(line.rstrip().split("=")[1].split("s")[0]) + elif "Total iterative time" in line and opt is True and freq is False: + geomoptTime += float(line.rstrip().split("=")[1].split("s")[0]) + elif "Total iterative time" in line and freq is True: + freqTime += float(line.rstrip().split("=")[1].split("s")[0]) + + if "Total times" in line: + cpuTime = float(line.rstrip().split(":")[1].split("s")[0]) # wallTime = float(line.rstrip().split(':')[2].split('s')[0]) - freqTime = (cpuTime - geomoptTime - preoptTime) + freqTime = cpuTime - geomoptTime - preoptTime # natoms = int(self.contents[indices[1] - 1].split()[0]) - return {'single point': preoptTime, 'geometry optimization': geomoptTime, - 'frequency': freqTime, 'total': cpuTime} + return { + "single point": preoptTime, + "geometry optimization": geomoptTime, + "frequency": freqTime, + "total": cpuTime, + } def _parse_molden(self): - search = splitext(self.path)[0] - m = glob.glob(search + '*.molden') + m = glob.glob(search + "*.molden") if len(m) != 1: raise Exception @@ -273,8 +293,7 @@ def _parse_molden(self): return m[0] def _parse_protocol(self): - - '''Parse out dft protocol''' + """Parse out dft protocol""" functional = [] basis_set = [] solvation = [] @@ -284,50 +303,53 @@ def _parse_protocol(self): solvent = None for line in self.contents: - - if '* library' in line: + if "* library" in line: basis = line.split()[-1] - if ' xc ' in line: - func = line.split(' xc ')[-1].strip() - if 'solvent ' in line: + if " xc " in line: + func = line.split(" xc ")[-1].strip() + if "solvent " in line: solvent = line.split()[-1] - if 'task dft optimize' in line: - tasks.append('optimize') + if "task dft optimize" in line: + tasks.append("optimize") basis_set.append(basis) functional.append(func) solvation.append(solvent) - if 'SHIELDING' in line: - tasks.append('shielding') + if "SHIELDING" in line: + tasks.append("shielding") basis_set.append(basis) functional.append(func) solvation.append(solvent) - if 'SPINSPIN' in line: - tasks.append('spin') + if "SPINSPIN" in line: + tasks.append("spin") basis_set.append(basis) functional.append(func) solvation.append(solvent) - if 'freq ' in line: - tasks.append('frequency') + if "freq " in line: + tasks.append("frequency") basis_set.append(basis) functional.append(func) solvation.append(solvent) - return {'functional': functional, 'basis set': basis_set, - 'solvation': solvation, 'tasks': tasks} + return { + "functional": functional, + "basis set": basis_set, + "solvation": solvation, + "tasks": tasks, + } def _parse_connectivity(self): - coor_substr = 'internuclear distances' + coor_substr = "internuclear distances" # Extracting Atoms & Coordinates ii = [i for i in range(len(self.contents)) if coor_substr in self.contents[i]] ii.sort() - g = ii[0]+4 + g = ii[0] + 4 connectivity = [] - while g <= len(self.contents)-1: - if '-' not in self.contents[g]: + while g <= len(self.contents) - 1: + if "-" not in self.contents[g]: line = self.contents[g].split() - pair = [line[1], line[4],int(line[0]), int(line[3])] + pair = [line[1], line[4], int(line[0]), int(line[3])] connectivity.append(pair) else: @@ -337,115 +359,111 @@ def _parse_connectivity(self): return connectivity def parse(self): - ''' + """ Extract relevant information from NWChem output Parameters ---------- to_parse : list of str - geometry, energy, shielding, spin, frequency, molden, charge, timing - ''' + geometry, energy, shielding, spin, frequency, molden, charge, timing + """ # Check that the file is valid first if len(self.contents) == 0: - raise RuntimeError('No contents to parse: {}'.format(self.path)) - if 'Total times cpu' not in self.contents[-1]: - raise RuntimeError('Incomplete NWChem run: {}'.format(self.path)) + raise RuntimeError("No contents to parse: {}".format(self.path)) + if "Total times cpu" not in self.contents[-1]: + raise RuntimeError("Incomplete NWChem run: {}".format(self.path)) # Initialize result object to store info result = {} try: - result['protocol'] = self._parse_protocol() + result["protocol"] = self._parse_protocol() except: pass - try: - result['geom'] = self._parse_geometry() + result["geom"] = self._parse_geometry() except: pass try: - result['energy'] = self._parse_energy() + result["energy"] = self._parse_energy() except: pass try: - result['shielding'] = self._parse_shielding() + result["shielding"] = self._parse_shielding() except: # Must be no shielding info pass - try: - result['spin'] = self._parse_spin() + result["spin"] = self._parse_spin() except: pass - try: - result['frequency'] = self._parse_frequency() + result["frequency"] = self._parse_frequency() except: pass - try: - result['molden'] = self._parse_molden() + result["molden"] = self._parse_molden() except: pass - try: - result['charge'] = self._parse_charge() + result["charge"] = self._parse_charge() except: pass try: - result['timing'] = self._parse_timing() + result["timing"] = self._parse_timing() except: pass try: - result['connectivity'] = self._parse_connectivity() + result["connectivity"] = self._parse_connectivity() except: pass - + return result def save(self, path): - with open(path, 'wb') as f: + with open(path, "wb") as f: pickle.dump(self, f) return + class ImpactParser(FileParserInterface): - '''Extract text from an Impact mobility calculation output file.''' + """Extract text from an Impact mobility calculation output file.""" def __init__(self): self.contents = None self.result = None def load(self, path: str): - '''Load in the data file''' - with open(path, 'rb') as f: + """Load in the data file""" + with open(path, "rb") as f: self.contents = f.readlines() return self.contents def parse(self): - '''Extract relevant information from data''' + """Extract relevant information from data""" # Check CCS results == 1 count = 0 for line in self.contents: - l = line.split(' ') - if 'CCS' in l[0]: + l = line.split(" ") + if "CCS" in l[0]: count += 1 if count != 1: return self.result # Assume values in second line - l = self.contents[1].split(' ') + l = self.contents[1].split(" ") l = [x for x in l if len(x) > 0] # Pull values of interest - may be error prone @@ -456,12 +474,12 @@ def parse(self): values.append(float(l[-2])) values.append(int(l[-1])) except (ValueError, IndexError) as e: - print('Could not parse file: ', e) + print("Could not parse file: ", e) return None # Add to dictionary to return result = {} - keys = ['CCS_PA', 'SEM_rel', 'CCS_TJM', 'n_iter'] + keys = ["CCS_PA", "SEM_rel", "CCS_TJM", "n_iter"] for key, val in zip(keys, values): result[key] = [val] @@ -469,62 +487,65 @@ def parse(self): self.result = result return result # TODO: return CCS? - def save(self, path: str, sep='\t'): - '''Write parsed object to file''' + def save(self, path: str, sep="\t"): + """Write parsed object to file""" pd.DataFrame(self.result).to_csv(path, sep=sep, index=False) return + class MobcalParser(FileParserInterface): - '''Extract text from a MOBCAL mobility calculation output file.''' + """Extract text from a MOBCAL mobility calculation output file.""" def __init__(self): self.contents = None self.result = {} def load(self, path: str): - '''Load in the data file''' - with open(path, 'r') as f: + """Load in the data file""" + with open(path, "r") as f: self.contents = f.readlines() return self.contents def parse(self): - '''Extract relevant information from data''' + """Extract relevant information from data""" done = False for line in self.contents: # if "average (second order) TM mobility" in line: # m_mn = float(line.split('=')[-1]) if "average TM cross section" in line: - ccs_mn = float(line.split('=')[-1]) + ccs_mn = float(line.split("=")[-1]) elif "standard deviation TM cross section" in line: - ccs_std = float(line.split('=')[-1]) - elif 'standard deviation (percent)' in line: + ccs_std = float(line.split("=")[-1]) + elif "standard deviation (percent)" in line: done = True if done is True: - self.result['ccs'] = {'mean': ccs_mn, 'std': ccs_std} + self.result["ccs"] = {"mean": ccs_mn, "std": ccs_std} return self.result - def save(self, path: str, sep='\t'): - '''Write parsed object to file''' + def save(self, path: str, sep="\t"): + """Write parsed object to file""" pd.DataFrame(self.result).to_csv(path, sep=sep, index=False) return + class SanderParser(FileParserInterface): - '''Extract text from an Sander simulated annealing simulation output file.''' + """Extract text from an Sander simulated annealing simulation output file.""" def load(self, path: str): - '''Load in the data file''' + """Load in the data file""" raise NotImplementedError def parse(self): - '''Extract relevant information from data''' + """Extract relevant information from data""" raise NotImplementedError def save(self, path: str): - '''Write parsed object to file''' + """Write parsed object to file""" raise NotImplementedError + class XTBParser(FileParserInterface): def __init__(self): self.contents = None @@ -532,23 +553,22 @@ def __init__(self): self.path = None def load(self, path: str): - '''Load in the data file''' - with open(path, 'r') as f: + """Load in the data file""" + with open(path, "r") as f: self.contents = f.readlines() self.path = path - #return self.contents + # return self.contents def _crest_energy(self): - relative_energy = [] total_energy = [] population = [] ready = False for h in range(len(self.contents), 0, -1): - if 'Erel/kcal' in self.contents[h]: + if "Erel/kcal" in self.contents[h]: g = h + 1 - for j in range(g,len(self.contents)): + for j in range(g, len(self.contents)): line = self.contents[j].split() if len(line) == 8: relative_energy.append(float(line[1])) @@ -556,22 +576,23 @@ def _crest_energy(self): population.append(float(line[4])) ready = True - if '/K' in line[1]: + if "/K" in line[1]: break if ready == True: break - return {'relative energies': relative_energy, - 'total energies': total_energy, - 'population': population} + return { + "relative energies": relative_energy, + "total energies": total_energy, + "population": population, + } def _crest_timing(self): - def grab_time(line): - line = line.replace(' ', '') - line = line.split(':') + line = line.replace(" ", "") + line = line.split(":") - return ':'.join(line[1:]).strip('\n') + return ":".join(line[1:]).strip("\n") ready = False for line in self.contents: @@ -595,22 +616,24 @@ def grab_time(line): if "Overall wall time" in line: overall = grab_time(line) - return {'test MD wall time': test_MD, - 'metadynamics wall time': MTD_time, - 'multilevel opt wall time': multilevel_OPT, - 'molecular dynamics wall time': MD, - 'genetic z-matrix crossing wall time': GC, - 'overall wall time': overall} + return { + "test MD wall time": test_MD, + "metadynamics wall time": MTD_time, + "multilevel opt wall time": multilevel_OPT, + "molecular dynamics wall time": MD, + "genetic z-matrix crossing wall time": GC, + "overall wall time": overall, + } def _isomer_energy(self): complete = False relative_energies = [] total_energies = [] for i in range(len(self.contents), 0, -1): - if 'structure ΔE(kcal/mol) Etot(Eh)' in self.contents[i]: + if "structure ΔE(kcal/mol) Etot(Eh)" in self.contents[i]: h = i + 1 - for j in range(h,len(self.contents)): - if self.contents[j] != ' \n': + for j in range(h, len(self.contents)): + if self.contents[j] != " \n": line = self.contents[j].split() relative_energies.append(float(line[1])) total_energies.append(float(line[2])) @@ -621,16 +644,14 @@ def _isomer_energy(self): if complete == True: break - return {'relative energy': relative_energies, - 'total energy': total_energies} + return {"relative energy": relative_energies, "total energy": total_energies} def _isomer_timing(self): - def grab_time(line): - line = line.replace(' ', '') - line = line.split(':') + line = line.replace(" ", "") + line = line.split(":") - return ':'.join(line[1:]).strip('\n') + return ":".join(line[1:]).strip("\n") for line in self.contents: if "LMO calc. wall time" in line: @@ -642,24 +663,25 @@ def grab_time(line): if "Overall wall time" in line: OVERALL_time = grab_time(line) - return {'local molecular orbital wall time': LMO_time, - 'multilevel opt wall time': OPT_time, - 'overall wall time': OVERALL_time} + return { + "local molecular orbital wall time": LMO_time, + "multilevel opt wall time": OPT_time, + "overall wall time": OVERALL_time, + } def _opt_energy(self): for line in self.contents: - if 'TOTAL ENERGY' in line: - energy = line.split()[3] + ' Hartrees' + if "TOTAL ENERGY" in line: + energy = line.split()[3] + " Hartrees" - return {'Total energy': energy} + return {"Total energy": energy} def _opt_timing(self): - def grab_time(line): - line = line.replace(' ', '') - line = line.split(':') + line = line.replace(" ", "") + line = line.split(":") - return ':'.join(line[1:]).strip('\n') + return ":".join(line[1:]).strip("\n") tot = False scf = False @@ -678,12 +700,13 @@ def grab_time(line): anc_time = grab_time(line) anc = True - return {'Total wall time': total_time, - 'SCF wall time': scf_time, - 'ANC optimizer wall time': anc_time} + return { + "Total wall time": total_time, + "SCF wall time": scf_time, + "ANC optimizer wall time": anc_time, + } def _parse_energy(self): - if self.parse_crest == True: return self._crest_energy() if self.parse_opt == True: @@ -700,27 +723,26 @@ def _parse_timing(self): return self._isomer_timing() def _parse_protocol(self): - protocol = None for line in self.contents: if " > " in line: - protocol = line.strip('\n') + protocol = line.strip("\n") if "program call" in line: - protocol = (line.split(':')[1]).strip('\n') + protocol = (line.split(":")[1]).strip("\n") return protocol def _parse_xyz(self): - ''' + """ Split .xyz into separate XYZGeometry instances - ''' + """ FILE = self.xyz_path - if len(list(pybel.readfile('xyz', FILE))) > 1: + if len(list(pybel.readfile("xyz", FILE))) > 1: geom_list = [] count = 1 XYZ = FILE.split(".")[0] - for geom in pybel.readfile('xyz', FILE): + for geom in pybel.readfile("xyz", FILE): geom.write("xyz", "%s_%d.xyz" % (XYZ, count)) geom_list.append("%s_%d.xyz" % (XYZ, count)) count += 1 @@ -733,14 +755,14 @@ def _parse_xyz(self): return isicle.conformers.ConformationalEnsemble(x) def parse(self): - '''Extract relevant information from data''' + """Extract relevant information from data""" # Check that the file is valid first if len(self.contents) == 0: - raise RuntimeError('No contents to parse: {}'.format(self.path)) + raise RuntimeError("No contents to parse: {}".format(self.path)) if "terminated normally" not in self.contents[-1]: if "ratio" not in self.contents[-2]: - raise RuntimeError('XTB job failed: {}'.format(self.path)) + raise RuntimeError("XTB job failed: {}".format(self.path)) self.parse_crest = False self.parse_opt = False @@ -750,64 +772,62 @@ def parse(self): result = {} try: - result['protocol'] = self._parse_protocol() + result["protocol"] = self._parse_protocol() except: pass try: - result['timing'] = self._parse_timing() + result["timing"] = self._parse_timing() except: pass try: - result['energy'] = self._parse_energy() + result["energy"] = self._parse_energy() except: pass # Parse geometry from assoc. XYZ file try: - if self.path.endswith('xyz'): - - if 'geometry' in to_parse: + if self.path.endswith("xyz"): + if "geometry" in to_parse: try: self.xyz_path = self.path - result['geom'] = self._parse_xyz() - + result["geom"] = self._parse_xyz() + except: pass - if self.path.endswith('out') or self.path.endswith('log'): - + if self.path.endswith("out") or self.path.endswith("log"): # try geometry parsing try: XYZ = None - if result['protocol'].split()[0] == 'xtb': + if result["protocol"].split()[0] == "xtb": self.parse_opt = True - XYZ = 'xtbopt.xyz' - if result['protocol'].split()[1] == 'crest': - - if '-deprotonate' in result['protocol']: + XYZ = "xtbopt.xyz" + if result["protocol"].split()[1] == "crest": + if "-deprotonate" in result["protocol"]: self.parse_isomer = True - XYZ = 'deprotonated.xyz' - elif '-protonate' in result['protocol']: + XYZ = "deprotonated.xyz" + elif "-protonate" in result["protocol"]: self.parse_isomer = True - XYZ = 'protonated.xyz' - elif '-tautomer' in result['protocol']: + XYZ = "protonated.xyz" + elif "-tautomer" in result["protocol"]: self.parse_isomer = True - XYZ = 'tautomers.xyz' + XYZ = "tautomers.xyz" else: self.parse_crest = True - XYZ = 'crest_conformers.xyz' - + XYZ = "crest_conformers.xyz" if XYZ is None: - raise RuntimeError('XYZ file associated with XTB job not available,\ - please parse separately.') + raise RuntimeError( + "XYZ file associated with XTB job not available,\ + please parse separately." + ) else: temp_dir = os.path.dirname(self.path) self.xyz_path = os.path.join(temp_dir, XYZ) - result['geom'] = self._parse_xyz() + result["geom"] = self._parse_xyz() except: pass except: @@ -815,80 +835,604 @@ def parse(self): return result def save(self, path): - with open(path, 'wb') as f: + with open(path, "wb") as f: pickle.dump(self, f) return -class TINKERParser(FileParserInterface): +class TINKERParser(FileParserInterface): def __init__(self): self.contents = None self.result = None self.path = None def load(self, path: str): - '''Load in the data file''' - with open(path, 'r') as f: + """Load in the data file""" + with open(path, "r") as f: self.contents = f.readlines() self.path = path - def _tinker_energy(self): - return - - def _tinker_conformers(self): - - def check_float(coord: str): - if coord[0] == '-': - return ' '+coord + def _parse_energy(self): + inp = self.contents + if len(inp) < 13: + quit() + + # Get the conformer energies from the file + energies = [] + for line in inp[13:]: + data = line[:-1].split(" ") + data = [_f for _f in data if _f] + if len(data) >= 3: + if "Map" in data[0] and "Minimum" in data[1]: + energies.append(float(data[-1])) + + return energies + + def _parse_conformers(self): + def parse_atom_symbol(AtomNum): + Lookup = [ + "H", + "He", + "Li", + "Be", + "B", + "C", + "N", + "O", + "F", + "Ne", + "Na", + "Mg", + "Al", + "Si", + "P", + "S", + "Cl", + "Ar", + "K", + "Ca", + "Sc", + "Ti", + "V", + "Cr", + "Mn", + "Fe", + "Co", + "Ni", + "Cu", + "Zn", + "Ga", + "Ge", + "As", + "Se", + "Br", + "Kr", + "Rb", + "Sr", + "Y", + "Zr", + "Nb", + "Mo", + "Tc", + "Ru", + "Rh", + "Pd", + "Ag", + "Cd", + "In", + "Sn", + "Sb", + "Te", + "I", + "Xe", + "Cs", + "Ba", + "La", + "Ce", + "Pr", + "Nd", + "Pm", + "Sm", + "Eu", + "Gd", + "Tb", + "Dy", + "Ho", + "Er", + "Tm", + "Yb", + "Lu", + "Hf", + "Ta", + "W", + "Re", + "Os", + "Ir", + "Pt", + "Au", + "Hg", + "Tl", + "Pb", + "Bi", + "Po", + "At", + "Rn", + ] + + if AtomNum > 0 and AtomNum < len(Lookup): + return Lookup[AtomNum - 1] else: - return ' '+coord - - # Define file path and load file - self.xyzpath = self.path.split('.')[0]+'.arc' - with open('butyric_acid.arc', 'r') as f: - geoms = f.readlines() - - - #atom count in molecule - atomnum = int(geoms[0].split()[0]) - - FILE = self.path.split('.')[0]+'.xyz' + print("No such element with atomic number " + str(AtomNum)) + return 0 + + conffile = open(self.path.split(".")[0] + ".arc", "r") + confdata = conffile.readlines() + conffile.close() + conformers = [] + atoms = [] + atomtypes = [ + "CR", + "C=C", + "CSP", + "C=O", + "C=N", + "CGD", + "C=O", + "C=O", + "CON", + "COO", + "COO", + "COO", + "C=O", + "C=S", + "C=S", + "CSO", + "CS=", + "CSS", + "C=P", + "CSP", + "=C=", + "HC", + "HSI", + "OR", + "OC=", + "OC=", + "OC=", + "OC=", + "ONO", + "ON=", + "OSO", + "OSO", + "OSO", + "OS=", + "-OS", + "OPO", + "OPO", + "OPO", + "-OP", + "-O-", + "O=C", + "O=C", + "O=C", + "O=C", + "O=N", + "O=S", + "O=S", + "NR", + "N=C", + "N=N", + "NC=", + "NC=", + "NN=", + "NN=", + "F", + "CL", + "BR", + "I", + "S", + "S=C", + "S=O", + ">S=", + "SO2", + "SO2", + "SO3", + "SO4", + "=SO", + "SNO", + "SI", + "CR4", + "HOR", + "HO", + "HOM", + "CR3", + "HNR", + "H3N", + "HPY", + "HNO", + "HNM", + "HN", + "HOC", + "HOP", + "PO4", + "PO3", + "PO2", + "PO", + "PTE", + "P", + "HN=", + "HN=", + "HNC", + "HNC", + "HNC", + "HNC", + "HNN", + "HNN", + "HNS", + "HNP", + "HNC", + "HSP", + "HOC", + "HOC", + "CE4", + "HOH", + "O2C", + "OXN", + "O2N", + "O2N", + "O3N", + "O-S", + "O2S", + "O3S", + "O4S", + "OSM", + "OP", + "O2P", + "O3P", + "O4P", + "O4C", + "HOS", + "NR+", + "OM", + "OM2", + "HNR", + "HIM", + "HPD", + "HNN", + "HNC", + "HGD", + "HN5", + "CB", + "NPY", + "NPY", + "NC=", + "NC=", + "NC=", + "NC%", + "CO2", + "CS2", + "NSP", + "NSO", + "NSO", + "NPO", + "NPO", + "NC%", + "STH", + "NO2", + "NO3", + "N=O", + "NAZ", + "NSO", + "O+", + "HO+", + "O=+", + "HO=", + "=N=", + "N+=", + "N+=", + "NCN", + "NGD", + "CGD", + "CNN", + "NPD", + "OFU", + "C%", + "NR%", + "NM", + "C5A", + "C5B", + "N5A", + "N5B", + "N2O", + "N3O", + "NPO", + "OH2", + "HS", + "HS=", + "HP", + "S-P", + "S2C", + "SM", + "SSM", + "SO2", + "SSO", + "=S=", + "-P=", + "N5M", + "CLO", + "C5", + "N5", + "CIM", + "NIM", + "N5A", + "N5B", + "N5+", + "N5A", + "N5B", + "N5O", + "FE+", + "FE+", + "F-", + "CL-", + "BR-", + "LI+", + "NA+", + "K+", + "ZIN", + "ZN+", + "CA+", + "CU+", + "CU+", + "MG+", + ] + anums = [ + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 6, + 1, + 1, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 9, + 17, + 35, + 53, + 16, + 16, + 16, + 16, + 16, + 16, + 16, + 16, + 16, + 16, + 14, + 6, + 1, + 1, + 1, + 6, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 15, + 15, + 15, + 15, + 15, + 15, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 6, + 1, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 1, + 7, + 8, + 8, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 6, + 7, + 7, + 7, + 7, + 7, + 7, + 6, + 6, + 7, + 7, + 7, + 7, + 7, + 7, + 16, + 7, + 7, + 7, + 7, + 7, + 8, + 1, + 8, + 1, + 7, + 7, + 7, + 7, + 7, + 6, + 6, + 7, + 8, + 6, + 7, + 7, + 6, + 6, + 7, + 7, + 7, + 7, + 7, + 8, + 1, + 1, + 1, + 16, + 16, + 16, + 16, + 16, + 16, + 16, + 15, + 7, + 17, + 6, + 7, + 6, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 26, + 26, + 9, + 17, + 35, + 3, + 11, + 19, + 30, + 30, + 20, + 29, + 29, + 12, + ] + atypes = [x[:3] for x in atomtypes] + + # Parse data from arc file, extract coordinates and atom type + for line in confdata: + data = [_f for _f in line.split(" ") if _f] + if len(data) < 3: + conformers.append([]) + else: + if len(conformers) == 1: + anum = anums[atypes.index(data[1][:3])] + atoms.append(parse_atom_symbol(anum)) + conformers[-1].append([x for x in data[2:5]]) - f = open(FILE, 'w+') # Convert from TINKER xyz format to standard xyz format - for idx, line in enumerate(geoms): - - if line == geoms[0]: - xyz = geoms[idx:idx+atomnum+1] - - new_xyz = [str(xyz[0].strip()+'\n\n')] - for coord in xyz[1:]: - old_line = coord.split() - - s = ' ' - - if old_line[1][1].islower(): - s += old_line[1][0:2] - else: - s += old_line[1][0] - - # XYZ coordinates - s += check_float(old_line[2]) - s += check_float(old_line[3]) - s += check_float(old_line[4])+'\n' - - new_xyz.append(s) - - for i in new_xyz: - f.write(i) + xyz_file = [] + for conf in conformers: + xyz_file.append(" {}\n".format(len(conf))) + for idx, line in enumerate(conf): + s = " {}\t{}\t{}\t{}".format(atoms[idx], line[0], line[1], line[2]) + xyz_file.append(s) + + # Write xyzs to file + FILE = "conformers.xyz" + f = open(FILE, "w+") + for i in xyz_file: + f.write(i + "\n") f.close() - if len(list(pybel.readfile('xyz', FILE))) > 1: + # Read in file by + if len(list(pybel.readfile("xyz", FILE))) > 1: geom_list = [] count = 1 XYZ = FILE.split(".")[0] - for geom in pybel.readfile('xyz', FILE): - geom.write("xyz", "%s_%d.xyz" % (XYZ, count)) + for geom in pybel.readfile("xyz", FILE): + geom.write("xyz", "%s_%d.xyz" % (XYZ, count), overwrite=True) geom_list.append("%s_%d.xyz" % (XYZ, count)) count += 1 @@ -898,12 +1442,33 @@ def check_float(coord: str): x = [isicle.io.load(self.xyz_path)] return isicle.conformers.ConformationalEnsemble(x) - + def parse(self): - return - + """Extract relevant information from data""" + + # Check that the file is valid first + if len(self.contents) == 0: + raise RuntimeError("No contents to parse: {}".format(self.path)) + + # Initialize result object to store info + result = {} + + try: + result["geom"] = self._parse_conformers() + except: + pass + + try: + result["energy"] = self._parse_energy() + except: + pass + + try: + result["charge"] = self._parse_charge() + except: + pass + + return result + def save(self): return - - - \ No newline at end of file From fabbad32009a3cee622eaf01c9a2bc033cb2dccd Mon Sep 17 00:00:00 2001 From: jessbade Date: Tue, 29 Aug 2023 08:50:35 -0700 Subject: [PATCH 3/5] Update TINKER md --- isicle/md.py | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/isicle/md.py b/isicle/md.py index 9fc6e50..9754494 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -939,5 +939,36 @@ def run(self, geom, **kwargs): return self - def finish(self): - return + def get_structures(self): + """ + Extract all structures from containing object as a conformational ensemble. + + Returns + ------- + :obj:`~isicle.conformers.ConformationalEnsemble` + Conformational ensemble. + + """ + if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): + return self.geom + + raise TypeError( + "Object does not contain multiple structures. Use `get_structure` instead." + ) + + def get_structure(self): + """ + Extract structure from containing object. + + Returns + ------- + :obj:`~isicle.geometry.XYZGeometry` + Structure instance. + + """ + if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): + raise TypeError( + "Object contains multiple structures. Use `get_structures` instead." + ) + + return self.geom From 8e8491399044dade00d52a40797658577400d0a8 Mon Sep 17 00:00:00 2001 From: jessbade Date: Tue, 29 Aug 2023 10:03:18 -0700 Subject: [PATCH 4/5] Add TINKERParse to md --- isicle/md.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/isicle/md.py b/isicle/md.py index 9754494..ba12ab6 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -8,7 +8,7 @@ import isicle from isicle.geometry import Geometry, XYZGeometry from isicle.interfaces import WrapperInterface -from isicle.parse import XTBParser +from isicle.parse import XTBParser, TINKERParser """ Files resulting from an xtb job always run in the same directory that the command is From ecef23c06e0c2c3578ef7b2f2ef5a69bc4c2fed6 Mon Sep 17 00:00:00 2001 From: jessbade Date: Tue, 29 Aug 2023 10:28:42 -0700 Subject: [PATCH 5/5] Fix TINKER parsing scan attribute error --- isicle/md.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/isicle/md.py b/isicle/md.py index ba12ab6..ab865d7 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -890,18 +890,15 @@ def finish(self): self.__dict__.update(result) + conformerID = 1 for i in self.geom: i.add___dict__({k: v for k, v in result.items() if k != "geom"}) i.__dict__.update(basename=self.basename) - - if self.task != "optimize": - conformerID = 1 - for i in self.geom: - i.__dict__.update(conformerID=conformerID) - conformerID += 1 + i.__dict__.update(conformerID=conformerID) + conformerID += 1 return self - else: - self.geom = self.geom[0] + + return self def run(self, geom, **kwargs): """