From 87e1566b83f403590db50c727826c9f8a3d8b0fc Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Tue, 15 Aug 2023 13:16:49 -0700 Subject: [PATCH 01/14] Update formatting, docstrings --- isicle/conformers.py | 195 ++++++++++++++----------------------------- 1 file changed, 61 insertions(+), 134 deletions(-) diff --git a/isicle/conformers.py b/isicle/conformers.py index 7113e0d..504435a 100644 --- a/isicle/conformers.py +++ b/isicle/conformers.py @@ -1,5 +1,3 @@ -import pickle - import numpy as np import pandas as pd from statsmodels.stats.weightstats import DescrStatsW @@ -23,18 +21,19 @@ def _function_selector(func): ------- func Conformer reduction function. - ''' - + # Mapping between names and functions func_map = {'boltzmann': boltzmann, 'simple': simple_average, 'lowest': lowest_energy, 'threshold': energy_threshold} + # Check for function by name if func.lower() in func_map: return func_map[func.lower()] - else: - raise ValueError('{} not a supported reduction function.'.format(func)) + + # Not a named/implemented function + raise ValueError('{} not a supported reduction function.'.format(func)) def _energy_based(func): @@ -43,19 +42,19 @@ def _energy_based(func): Parameters ---------- - func : func + func : function Conformer reduction function. Returns ------- bool True if energy based, otherwise False. - ''' - + # Check if among energy-based functions if func in [boltzmann, lowest_energy, energy_threshold]: return True + # Not energy based return False @@ -70,16 +69,15 @@ def reduce(value, func='boltzmann', **kwargs): func : str Alias for function selection (one of "boltzmann", "simple", "lowest", or "threshold"). - **kwargs + kwargs Additional keyword arguments passed to `func`. Returns ------- :obj:`~pandas.DataFrame` Result of reduction operation. - ''' - + # Select function f = _function_selector(func) # Energy-based method @@ -110,33 +108,45 @@ def boltzmann(value, energy, index=None, atom=None): ------- :obj:`~pandas.DataFrame` Result of reduction operation. - ''' - + # Placeholder for index if index is None: index = np.full_like(value, -1) + + # Placeholder for atom if atom is None: atom = np.full_like(value, -1) + # Initialize data frame df = pd.DataFrame.from_dict({'value': value, 'energy': energy, 'index': index, 'atom': atom}) + # Result container res = [] + + # Iterate over unique indices for name, group in df.groupby(['index', 'atom']): + # Compute relative delta G g = group['energy'] * 627.503 mn = g.min() relG = g - mn + + # Compute Boltzmann weighting factors b = np.exp(-relG / 0.5924847535) w = (b / b.sum()) * len(b) + # Compute weighted statistics ws = DescrStatsW(group['value'], weights=w, ddof=0) + # Append to container res.append([name[0], name[1], ws.mean, ws.std, len(group.index)]) + # Initialize data frame res = pd.DataFrame(res, columns=['index', 'atom', 'mean', 'std', 'n']) + # Drop index if not supplied if np.all(index == -1): return res.drop(columns=['index', 'atom']).iloc[0] @@ -160,22 +170,28 @@ def simple_average(value, index=None, atom=None): ------- :obj:`~pandas.DataFrame` Result of reduction operation. - ''' - + # Placeholder for index if index is None: index = np.full_like(value, -1) + + # Placeholder for atom if atom is None: atom = np.full_like(value, -1) + # Initialize data frame df = pd.DataFrame.from_dict({'value': value, 'index': index, 'atom': atom}) + # Average per unique index res = df.groupby(['index', 'atom'], as_index=False).agg({'value': ['mean', 'std', 'count']}) + + # Rename columns res.columns = ['index', 'atom', 'mean', 'std', 'n'] + # Drop indices if not supplied if np.all(index == -1): return res.drop(columns=['index', 'atom']).iloc[0] @@ -201,22 +217,26 @@ def lowest_energy(value, energy, index=None, atom=None): ------- :obj:`~pandas.DataFrame` Result of reduction operation. - ''' - + # Placeholder for index if index is None: index = np.full_like(value, -1) + + # Placeholder for atom if atom is None: atom = np.full_like(value, -1) + # Initialize data frame df = pd.DataFrame.from_dict({'value': value, 'energy': energy, 'index': index, 'atom': atom}) + # Take minimum energy per unique index res = df.loc[df.groupby(['index', 'atom'])[ 'energy'].idxmin()] + # Drop indices if not supplied if np.all(index == -1): return res.drop(columns=['index', 'atom']).iloc[0] @@ -243,25 +263,32 @@ def energy_threshold(value, energy, threshold=5, index=None, atom=None): ------- :obj:`~pandas.DataFrame` Result of reduction operation. - ''' - + # Placeholder for index if index is None: index = np.full_like(value, -1) + + # Placeholder for atom if atom is None: atom = np.full_like(value, -1) + # Initialize data frame df = pd.DataFrame.from_dict({'value': value, 'energy': energy, 'index': index, 'atom': atom}) + # Filter by energy df = df.loc[df['energy'] <= threshold, :] + # Aggregate res = df.groupby(['index', 'atom'], as_index=False).agg({'value': ['mean', 'std', 'count']}) + + # Rename columns res.columns = ['index', 'atom', 'mean', 'std', 'n'] + # Drop indices if not supplied if index is None: return res.drop(columns=['index', 'atom']).iloc[0] @@ -289,18 +316,21 @@ def transform(value, m={'H': 1.0, 'C': 1.0}, b={'H': 0.0, 'C': 0.0}, index=None, ------- :obj: `~pandas.DataFrame` Result of transformation operation. - ''' - + # Placeholder for index if index is None: index = np.full_like(value, -1) + + # Placeholder for atom if atom is None: atom = np.full_like(value, -1) + # Initialize data frame df = pd.DataFrame.from_dict({'value': value, 'index': index, 'atom': atom}) + # Process with per-atom values if isinstance(m, dict): res = pd.DataFrame() for idx in m: @@ -309,6 +339,8 @@ def transform(value, m={'H': 1.0, 'C': 1.0}, b={'H': 0.0, 'C': 0.0}, index=None, lambda x: m[idx] * x + b[idx]) res = pd.concat([res, part]) + + # Process with global values else: res = df.copy() res['new_value'] = res['value'].apply(lambda x: m * x + b) @@ -329,76 +361,14 @@ def build_conformational_ensemble(geometries): ------- :obj:`~isicle.conformers.ConformationalEnsemble` Conformational ensemble. - ''' - return ConformationalEnsemble(geometries) -def load_pickle(path): - ''' - Load pickled file. - - Parameters - ---------- - path : str - Path to pickle. - - Returns - ------- - :obj:`~isicle.conformers.ConformationalEnsemble` - Previously pickled :obj:`~isicle.conformers.ConformationalEnsemble` - instance. - - Raises - ------ - IOError - If file cannot be read. - TypeError - If file is not a supported class instance. - - ''' - - # Load file - with open(path, 'rb') as f: - try: - ensemble = pickle.load(f) - except pickle.UnpicklingError: - raise IOError('Could not read file as pickle: {}'.format(path)) - - # Check for valid class type - if isinstance(ensemble, ConformationalEnsemble): - return ensemble - - # Failure. This is not a ConformationalEnsemble instance - raise TypeError('Unsupported format: {}.'.format(ensemble.__class__)) - - -def load(path): - ''' - Load file. - - Parameters - ---------- - path : str - Path to file. - - Returns - ------- - :obj:`~isicle.conformers.ConformationalEnsemble` - Previously pickled :obj:`~isicle.conformers.ConformationalEnsemble` - instance. - - ''' - - return load_pickle(path) - - class ConformationalEnsemble(TypedList): ''' Collection of :obj:`~isicle.geometry.Geometry`, or related subclass, instances. - ''' def __init__(self, *args): @@ -409,9 +379,7 @@ def __init__(self, *args): ---------- *args Objects to comprise the conformational ensemble. - ''' - super().__init__((Geometry, XYZGeometry), *args) def _check_attributes(self, attr): @@ -427,9 +395,7 @@ def _check_attributes(self, attr): ------ AttributeError If all members do not have `attr`. - ''' - value = [x.get___dict__() for x in self] for key in safelist(attr): if not all(key in x for x in value): @@ -437,9 +403,6 @@ def _check_attributes(self, attr): 'ensemble members.'.format(attr)) value = [x.get(key) for x in value] - # if type(value[0]) is dict: - # raise AttributeError('"{}" has additional keys: {}'.format(attr, value[0].keys())) - def reduce(self, attr, func='boltzmann', **kwargs): ''' Combine attribute values according to indicated function. @@ -451,16 +414,15 @@ def reduce(self, attr, func='boltzmann', **kwargs): func : str Alias for function selection (one of "boltzmann", "simple", "lowest", or "threshold"). - **kwargs + kwargs Additional keyword arguments passed to `func`. Returns ------- :obj:`~pandas.DataFrame` Result of reduction operation. - ''' - + # Select reduction function f = _function_selector(func) # Check for primary attribute @@ -516,7 +478,6 @@ def reduce(self, attr, func='boltzmann', **kwargs): # Execute other method return f(value, index=index, atom=atom, **kwargs) - # TODO: automatically unpack multiple returned objects def _apply_method(self, method, **kwargs): ''' Process conformational ensemble members according to supplied method. @@ -525,16 +486,14 @@ def _apply_method(self, method, **kwargs): ---------- method : str Method by which ensemble members will be processed. - **kwargs + kwargs Keyword arguments passed to `method`. Returns ------- :obj:`~isicle.conformers.ConformationalEnsemble` or list Result of operation, type depends on `method` return type. - ''' - # Check for attribute if not all(hasattr(x, method) for x in self): raise AttributeError('"{}" not found for all conformational ' @@ -551,7 +510,6 @@ def _apply_method(self, method, **kwargs): except: return result - # TODO: automatically unpack multiple returned objects def _apply_function(self, func, **kwargs): ''' Process conformational ensemble members according to supplied function. @@ -560,16 +518,14 @@ def _apply_function(self, func, **kwargs): ---------- func : function Function by which ensemble members will be processed. - **kwargs + kwargs Keyword arguments passed to `func`. Returns ------- :obj:`~isicle.conformers.ConformationalEnsemble` or list Result of operation, type depends on `func` return type. - ''' - # Apply method to collection result = [func(x, **kwargs) for x in self] @@ -581,7 +537,6 @@ def _apply_function(self, func, **kwargs): except: return result - # TODO: automatically unpack multiple returned objects def apply(self, func=None, method=None, **kwargs): ''' Process conformational ensemble members according to supplied function @@ -593,7 +548,7 @@ def apply(self, func=None, method=None, **kwargs): Function by which ensemble members will be processed. method : str Method by which ensemble members will be processed. - **kwargs + kwargs Keyword arguments passed to `method`. Returns @@ -605,43 +560,17 @@ def apply(self, func=None, method=None, **kwargs): ------ ValueError If neither `func` nor `method` is supplied. - ''' + # Apply function if func is not None: return self._apply_function(func, **kwargs) + # Apply method if method is not None: return self._apply_method(method, **kwargs) raise ValueError('Must supply `func` or `method`.') - def save_pickle(self, path): - ''' - Pickle this class instance. - - Parameters - ---------- - path : str - Path to save pickle object to. - - ''' - - with open(path, 'wb') as f: - pickle.dump(self, f) - - def save(self, path): - ''' - Save this class instance. - - Parameters - ---------- - path : str - Path to save object to. - - ''' - - io.save(path, self) - def get_structures(self): ''' Extract all structures from containing object as a conformational ensemble. @@ -650,9 +579,7 @@ def get_structures(self): ------- :obj:`~isicle.conformers.ConformationalEnsemble` Conformational ensemble. - ''' - # Check for geom attribute self._check_attributes('geom') From ea3b414d0bca99d45d54681fe6cd5bfe601a6887 Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Thu, 17 Aug 2023 13:49:41 -0700 Subject: [PATCH 02/14] Conform code to consistent implementation philosophy Removal of global logic, inplace, update_structure etc Ensure __copy__ methods work as intended Unify `to_xyz` and `to_xyzblock` Remove `downgrade_to_xyz` --- isicle/adducts.py | 8 +- isicle/conformers.py | 14 + isicle/geometry.py | 723 +++++++++++++++++++++++-------------------- isicle/interfaces.py | 96 ++---- isicle/io.py | 8 +- isicle/md.py | 108 ++++--- isicle/mobility.py | 8 +- isicle/parse.py | 47 +-- isicle/qm.py | 284 +++++++++++------ isicle/utils.py | 31 +- 10 files changed, 717 insertions(+), 610 deletions(-) diff --git a/isicle/adducts.py b/isicle/adducts.py index dac76c5..8e692bf 100644 --- a/isicle/adducts.py +++ b/isicle/adducts.py @@ -301,7 +301,7 @@ def set_charge(self): ''' if self.geom.__dict__.get('charge') is None: - self.geom.__dict__.update(charge=self.geom.get_formal_charge()) + self.geom.__dict__.update(charge=self.geom.get_charge()) def _set_ions(self, ion_path=None, ion_list=None): ''' @@ -472,7 +472,7 @@ def _update_geometry_charge(self, geom): ''' ''' - geom.__dict__.update(charge=geom.get_formal_charge()) + geom.__dict__.update(charge=geom.get_charge()) def _add_ion(self, init_mol, base_atom_idx, ion_atomic_num, sanitize, forcefield, ff_iter): ''' @@ -983,9 +983,9 @@ def set_charge(self): if self.geom.load.get('filetype') == '.xyz': # self.geom.__dict__.update(charge=charge) raise ValueError( - 'Must first run geom.set_formal_charge for an xyz structure') + 'Must first run geom.set_charge for an xyz structure') else: - self.geom.__dict__.update(charge=self.geom.get_formal_charge()) + self.geom.__dict__.update(charge=self.geom.get_charge()) def _set_ions(self, ion_path=None, ion_list=None): cations, anions, complex = _parse_ions( diff --git a/isicle/conformers.py b/isicle/conformers.py index 504435a..ff0b59b 100644 --- a/isicle/conformers.py +++ b/isicle/conformers.py @@ -379,7 +379,9 @@ def __init__(self, *args): ---------- *args Objects to comprise the conformational ensemble. + ''' + super().__init__((Geometry, XYZGeometry), *args) def _check_attributes(self, attr): @@ -395,7 +397,9 @@ def _check_attributes(self, attr): ------ AttributeError If all members do not have `attr`. + ''' + value = [x.get___dict__() for x in self] for key in safelist(attr): if not all(key in x for x in value): @@ -421,7 +425,9 @@ def reduce(self, attr, func='boltzmann', **kwargs): ------- :obj:`~pandas.DataFrame` Result of reduction operation. + ''' + # Select reduction function f = _function_selector(func) @@ -493,7 +499,9 @@ def _apply_method(self, method, **kwargs): ------- :obj:`~isicle.conformers.ConformationalEnsemble` or list Result of operation, type depends on `method` return type. + ''' + # Check for attribute if not all(hasattr(x, method) for x in self): raise AttributeError('"{}" not found for all conformational ' @@ -525,7 +533,9 @@ def _apply_function(self, func, **kwargs): ------- :obj:`~isicle.conformers.ConformationalEnsemble` or list Result of operation, type depends on `func` return type. + ''' + # Apply method to collection result = [func(x, **kwargs) for x in self] @@ -560,7 +570,9 @@ def apply(self, func=None, method=None, **kwargs): ------ ValueError If neither `func` nor `method` is supplied. + ''' + # Apply function if func is not None: return self._apply_function(func, **kwargs) @@ -579,7 +591,9 @@ def get_structures(self): ------- :obj:`~isicle.conformers.ConformationalEnsemble` Conformational ensemble. + ''' + # Check for geom attribute self._check_attributes('geom') diff --git a/isicle/geometry.py b/isicle/geometry.py index 57df6f8..6f0f2b3 100644 --- a/isicle/geometry.py +++ b/isicle/geometry.py @@ -1,123 +1,66 @@ import os -import isicle import numpy as np -import pandas as pd -from isicle.interfaces import GeometryInterface, XYZGeometryInterface +from openbabel import pybel from rdkit import Chem from rdkit.Chem.MolStandardize import rdMolStandardize from rdkit.Chem.SaltRemover import SaltRemover -from openbabel import pybel + +import isicle +from isicle.interfaces import GeometryInterface, XYZGeometryInterface + class XYZGeometry(XYZGeometryInterface): """ Molecule information, specialized for XYZ files (bonding info not provided). - It is not recommended to manipulate or retrieve - attributes of this class without using class functions. - - Attributes - ---------- - xyz: list of str - Lines of XYZ block used to represent this compound's structure - global_properties : dict - Dictionary of properties calculated for this structure. Always at least - contains the "from" key, which reports the last operation performed on - this compound (e.g. load, dft_optimize). To generate compound - properties, use get_* and *_optimize functions. + It is not recommended to manipulate or retrieve attributes of this class + without using class functions. """ _defaults = ["xyz"] _default_value = None def __init__(self, **kwargs): - self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) - self.__dict__.update(kwargs) - - def _upgrade_to_Geometry(self, mol): """ - Upgrade this class to the Geometry class using a provided mol object. - Hard copies everything but the xyz structure. + Initialize :obj:`~isicle.geometry.XYZGeometry` instance. Parameters ---------- - mol : RDKit Mol object - Structure to use in new Geometry object - - Returns - ------- - Geometry - Instance containing the new mol representation. + kwargs + Keyword arguments to initialize instance attributes. """ - if mol is None: - mol = self.mol - # Create dict to load in - d = self.__dict__.copy() - d["mol"] = mol - d.pop("xyz") - - return Geometry(**d) + self.__dict__.update(dict.fromkeys( + self._defaults, self._default_value)) + self.__dict__.update(kwargs) - def _update_structure( - self, inplace, mol=None, xyz=None, xyz_filename=None, event=None - ): + def get_basename(self): """ - Return object with given structure. If inplace and a xyz structure is - provided, manipulates the current instance. Otherwise, a new - XYZGeometry or Geometry object is created with hard copied attributes. - Only one of mol, xyz, or xyz_filename is to be provided. If multiple - are provided, they are prioritized in the following order: (1) mol, - (2) xyz_filename, (3) xyz. If none of these are provided, an error is - raised. - - Parameters - ---------- - inplace : boolean - If true, update this instance with the new structure. Otherwise, - create a new XYZGeometry or Geometry instance and populate it with - the structure. - mol : RDKit Mol object (optional) - Structure with 3D abilities. - xyz_filename: str (optional) - Path to file to load in xyz from - xyz: list of str (optional) - Lines from xyz block to use as structure representation. + Returns a copy of this object's basename (original filename). Returns ------- - XYZGeometry or Geometry - Updated structure instance. Geometry is returned if mol was - provided, otherwise XYZGeometry is returned. + str + Basename of original filepath. """ - if mol is not None: - # Upgrade to the Geometry class - geom = self._upgrade_to_Geometry(mol) - - else: - if inplace: # Modify this object - geom = self - else: # Make a new object - geom = self.__copy__() - - # Ensure exactly one of xyz or a filename is provided - # Add as structure of the new XYZGeometry - if xyz_filename is not None: - geom.xyz = load_xyz(xyz_filename) - elif xyz is not None: - geom.xyz = xyz - - return geom - - def get_basename(self): - """Returns a copy of this object's basename (original filename).""" return self.basename def add___dict__(self, d, override=False): - """Accepts a dictionary of values and adds any non-conflicting - information to the attribute dictionary""" + """ + Accepts a dictionary of values and adds any non-conflicting + information to the attribute dictionary. + + Parameters + ---------- + d : dict + Attribute dictionary. + override : bool + Singal whether to override existing attributes. + + """ if not override: # Remove keys that are already present in attribute dictionary @@ -125,32 +68,50 @@ def add___dict__(self, d, override=False): # Add to attributes self.__dict__.update(d) - return - def dft(self, program="NWChem", template=None, **kwargs): + def dft(self, program='NWChem', **kwargs): """ - Optimize geometry from XYZ, using stated functional and basis set. - Additional inputs can be grid size, optimization criteria level, + Perform density functional theory calculations according to supplied task list + and configuration parameters for specified program (currently only NWChem). + + Parameters + ---------- + kwargs + Keyword arguments supplied to selected program. See `run` method of relevant + wrapper for configuration parameters, e.g. :meth:`~isicle.qm.NWChemWrapper.run`. + + Returns + ------- + :obj:`~isicle.qm.{program}Wrapper` + Wrapper object containing relevant outputs from the simulation. + """ - geom = isicle.qm.dft( - self.__copy__(), program=program, template=template, **kwargs - ) - return geom + return isicle.qm.dft(self, program=program, **kwargs) def md(self, program="xtb", **kwargs): """ - Optimize geometry or generate conformers or adducts from XYZ using stated forcefield. - Additional inputs can be energy window, optimization criteria level, charge, or ion. + Optimize geometry or generate conformers or adducts using stated forcefield. + + Parameters + ---------- + kwargs + Keyword arguments supplied to selected program. See `run` method of relevant + wrapper for configuration parameters, e.g. :meth:`~isicle.md.XTBWrapper.run`. + + Returns + ------- + :obj:`~isicle.md.{program}Wrapper` + Wrapper object containing relevant outputs from the simulation. + """ - geom = isicle.md.md(self.__copy__(), program=program, **kwargs) - return geom + return isicle.md.md(self.__copy__(), program=program, **kwargs) - def ionize(self, ion_path=None, ion_list=None, save=False, path=None, **kwargs): + def ionize(self, ion_path=None, ion_list=None, **kwargs): """ - Calls xtb CREST to ionize xyz geometry, using specified list of ions and method of ionization. - WARNING: must run isicle.geometry.XYZGeometry.set_formal_charge prior to this. + Calls xtb CREST to ionize XYZ geometry, using specified list of ions and method of ionization. + WARNING: must run isicle.geometry.XYZGeometry.set_charge prior to this. Parameters ---------- @@ -161,42 +122,57 @@ def ionize(self, ion_path=None, ion_list=None, save=False, path=None, **kwargs): List of strings of adducts to be considered. Must be specifed in syntax `Atom+` or `Atom-`, eg. `H+`, `Na+`, `H-Na+` Either ion_path or ion_list must be specified - save : bool - Saves wrapper object to .pkl in specified path directory - path : str (optional) - Directory to write output files - **kwargs : - see :meth: `~isicle.adducts.CRESTIonizationWrapper.submit` + kwargs + See :meth: `~isicle.adducts.CRESTIonizationWrapper.submit` for more options Returns ------- Dictionary of adducts, `{:[]}` + """ + if self.__dict__.get("charge") is None: raise ValueError( - "Must first run isicle.geometry.XYZGeometry.set_formal_charge for an xyz structure" + "Must first run isicle.geometry.XYZGeometry.set_charge for an XYZ structure" ) iw = isicle.adducts.ionize("crest").run( self.__copy__(), ion_path=ion_path, ion_list=ion_list, **kwargs ) - if save == True and path is not None: - iw.save(path) - return iw - def set_formal_charge(self, charge): - """Specify and set the known formal charge of the xyz molecule to __dict__""" + def set_charge(self, charge): + """ + Specify and set the known formal charge of the XYZ molecule. + + Parameters + ---------- + charge : int + Nominal charge of the molecule. + + """ + + # Attempt cast to int try: charge = int(charge) except: - raise TypeError("Charge specifed must be str or int") + raise TypeError("Charge specifed must be str or int.") + + # Update attribute self.__dict__.update(charge=charge) - return self.charge def get_natoms(self): - """Calculate total number of atoms.""" + """ + Calculate total number of atoms. + + Returns + ------- + int + Number of atoms. + + """ + self.__dict__["natoms"] = int(self.xyz[0].strip()) return self.__dict__["natoms"] @@ -213,35 +189,95 @@ def get_atom_indices(self, atoms=["C", "H"]): ------- list of int Atom indices. + """ + + # Index container idx = [] + # Enumerate rows for i in range(2, len(self.xyz)): + # Extract atom atom = self.xyz[i].split(" ")[0] + + # Check if of interest if atom in atoms: + # Append index idx.append(i - 2) + return idx def get___dict__(self): - """Return a copy of this object's attribute dictionary""" + """ + Return a copy of this object's attributes as a dictionary. + + Returns + ------- + dict + Instance attributes. + + """ + return self.__dict__.copy() - def __copy__(self): - """Return hard copy of this class instance.""" - # TODO: manage what should be passed, rather than all? + def __copy__(self, **kwargs): + """ + Copy this class instance. + + Parameters + ---------- + kwargs + Keyword arguments to update copy. + + Returns + ------- + :obj:`~isicle.geometry.XYZGeometry` + Molecule representation. + + """ + + # Copy attributes d = self.__dict__.copy() - return type(self)(**d) + + # Safely copy mol if present + if 'mol' in d: + d['mol'] = self.to_mol() + + # Build self instance from scratch + instance = type(self)(**d) + + # Update from kwargs + instance.__dict__.update(kwargs) + + return instance def to_xyzblock(self): - """Get XYZ text for this structure.""" + """ + Get XYZ text for this structure. + + Returns + ------- + str + XYZ text. + + """ + return "\n".join(self.xyz) def to_mol(self): + """ + Get structure representation in :obj:`~rdkit.Chem.rdchem.Mol` format. + + :obj:`~rdkit.Chem.rdchem.Mol` + RDKit Mol instance. + + """ + self.temp_dir = isicle.utils.mkdtemp() geomfile = os.path.join(self.temp_dir, - '{}.{}'.format(self.basename, - "xyz")) + '{}.{}'.format(self.basename, + "xyz")) isicle.io.save(geomfile, self) mols = list(pybel.readfile("xyz", geomfile)) @@ -252,122 +288,56 @@ def to_mol(self): class Geometry(XYZGeometry, GeometryInterface): """ - Molecule information, specialized for 3D representations. - It is not recommended to manipulate or retrieve - attributes of this class without using class functions. - - Attributes - ---------- = - mol : RDKit Mol object - Current structure. - __dict__ : dict - Dictionary of properties calculated for this structure. + Molecule information, specialized for 3D representations. It is not + recommended to manipulate or retrieve attributes of this class without + using class functions. """ _defaults = ["mol"] _default_value = None def __init__(self, **kwargs): - self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) + self.__dict__.update(dict.fromkeys( + self._defaults, self._default_value)) self.__dict__.update(kwargs) - - def _downgrade_to_XYZGeometry(self, xyz): + def _is_embedded(self): """ - Downgrade this class to the XYZGeometry class using a provided xyz. - Hard copies everything but the mol structure. - - Parameters - ---------- - xyz : list of str - Lines of xyz block to use for new object. + Check if molecule has embedded 3D coordinates. Returns ------- - XYZGeometry - Instance containing the new xyz representation. + bool + Indication of 3D coordinate presence. """ - # Create dict to load in - d = self.__dict__.copy() - d["xyz"] = xyz - - return XYZGeometry(**d) + try: + self.mol.GetConformers() + return True + except: + return False - def _update_structure( - self, inplace, mol=None, xyz=None, xyz_filename=None): + def initial_optimize(self, embed=False, forcefield="UFF", ff_iter=200): """ - Return object with given structure. If inplace and a mol structure is - provided, manipulates the current instance. Otherwise, a new - XYZGeometry or Geometry object is created with hard copied attributes. - Only one of mol, xyz, or xyz_filename is to be provided. If multiple - are provided, they are prioritized in the following order: (1) mol, - (2) xyz_filename, (3) xyz. If none of these are provided, an error is - raised. + Initial molecule optimization by basic force field to establish rough + 3D coordinates. Further optimization (molecular dynamics, density + functional theory) recommended. Parameters ---------- - inplace : boolean - If true, update this instance with the new structure. Otherwise, - create a new XYZGeometry or Geometry instance and populate it with - the structure. - mol : RDKit Mol object (optional) - Structure with 3D abilities. - xyz_filename: str (optional) - Path to file to load in xyz from - xyz: list of str (optional) - Lines from xyz block to use as structure representation. - - Returns - ------- - XYZGeometry or Geometry - Updated structure instance. Geometry is returned if mol was - provided, otherwise XYZGeometry is returned. - - """ - - if mol is None: - - if xyz_filename is not None: - # Load in file first - xyz = load_xyz(xyz_filename) - - if xyz is not None: - # Downgrade to XYZGeometry class - geom = self._downgrade_to_XYZGeometry(xyz) - - else: - if inplace: # Modify this object - geom = self - else: # Make a new object - geom = self.__copy__() - geom.mol = mol - - # Add event that led to this change in structure - # If event is None, nothing will happen - - return geom - - def initial_optimize( - self, embed=False, forcefield="UFF", ff_iter=200, inplace=False - ): - """ - Params - ------ embed: bool - Try embedding molecule with eigenvalues of distance matrix - Except failure seeds with random coordinates + Attempt molecule embedding with eigenvalues of distance matrix. + Failure results in seeding with random coordinates. forcefield: str - Specify "UFF" for Universal Force Field optimization by RDKit - Specify "MMFF" or "MMFF94" for Merck Molecular Force Field 94 - Specify "MMFF94s" for the MMFF94 s variant - inplace: bool - If geom.mol is modified inplace + Specify "UFF" for universal force field, "MMFF" or "MMFF94" for + Merck molecular force field 94, or "MMFF94s" for the MMFF94 s variant. + Returns ------- - Geometry - Updated structure instance + :obj:`~isicle.geometry.Geometry` + Molecule representation. + """ def _forcefield_selector(forcefield, mw): @@ -379,7 +349,8 @@ def _forcefield_selector(forcefield, mw): if Chem.rdForceFieldHelpers.UFFHasAllMoleculeParams(mw) is True: return Chem.AllChem.UFFOptimizeMolecule else: - raise ValueError("UFF is not available for all atoms in molecule.") + raise ValueError( + "UFF is not available for all atoms in molecule.") elif forcefield in ["mmff", "mmff94", "mmff94s"]: if Chem.rdForceFieldHelpers.MMFFHasAllMoleculeParams(mw) is True: return Chem.rdForceFieldHelpers.MMFFOptimizeMolecule @@ -392,25 +363,41 @@ def _forcefield_selector(forcefield, mw): "RDKit only supports UFF, MMFF, MMFF94, MMFF94s as forcefields." ) + # Get rdkit mol mol = self.to_mol() - if embed == True: + + # Embed molecule 3D coordinates + if embed is True: + # Attempt embedding try: Chem.AllChem.EmbedMolecule(mol) + + # Use random coordinates except: Chem.AllChem.EmbedMolecule(mol, useRandomCoords=True) + + # Optimize according to supplied forcefield if forcefield is not None: - if "mmff94s" in forcefield.lower(): - _forcefield_selector(forcefield, mol)( - mol, mmffVariant=forcefield, maxIters=ff_iter - ) - else: - _forcefield_selector(forcefield, mol)(mol, maxIters=ff_iter) - geom = self._update_structure(inplace, mol=mol) + # Check if embedded + if self._is_embedded(): + + # Forcefield selection + if "mmff94s" in forcefield.lower(): + _forcefield_selector(forcefield, mol)( + mol, mmffVariant=forcefield, maxIters=ff_iter + ) + else: + _forcefield_selector(forcefield, mol)(mol, maxIters=ff_iter) + + # Not embedded + else: + raise ValueError('Molecule must have embedded 3D coordinates.') - return geom + # Return copy with updated mol + return self.__copy__(mol=mol) - def desalt(self, salts=None, inplace=False): + def desalt(self, salts=None): """ Desalts RDKit mol object using Chem.SaltRemover module. @@ -418,22 +405,18 @@ def desalt(self, salts=None, inplace=False): ---------- salts : str (optional) Salt type to remove. Ex: 'Cl', 'Br', '[Na+]'. Default: None. - inplace : boolean (optional) - If true, update this instance with the new structure. Otherwise, - create a new Geometry instance and populate it with the structure. - Default: False. Returns ------- - Geometry - Molecule with given salt(S) removed. + :obj:`~isicle.geometry.Geometry` + Moleculerepresentation. """ # TODO: should this raise an error instead? # If no salts given, skip desalting if salts is None: - return self._update_structure(inplace, mol=self.to_mol()) + return self.__copy__() remover = SaltRemover(defnFormat="smiles", defnData=salts) # defnData="[Cl,Br,Na]" *sample definition of salts to be removed* @@ -446,26 +429,16 @@ def desalt(self, salts=None, inplace=False): # atomno = res.GetNumAtoms # if relevant to future use, returns atom count post desalting - geom = self._update_structure(inplace, mol=mol) - # TODO: add any properties from this operation to global_props? + return self.__copy__(mol=mol) - return geom - - def neutralize(self, inplace=False): + def neutralize(self): """ Neutralizes RDKit mol object using neutralization reactions. - Parameters - ---------- - inplace : boolean (optional) - If true, update this instance with the new structure. Otherwise, - create a new Geometry instance and populate it with the structure. - Default: False. - Returns ------- - Geometry - Neutralized form of the molecule. + :obj:`~isicle.geometry.Geometry` + Molecule representation. """ @@ -495,6 +468,8 @@ def _initialize_neutralisation_reactions(): reactions = _initialize_neutralisation_reactions() mol = self.to_mol() + + # TODO: `replaced` is unused replaced = False for i, (reactant, product) in enumerate(reactions): while mol.HasSubstructMatch(reactant): @@ -502,12 +477,9 @@ def _initialize_neutralisation_reactions(): rms = Chem.AllChem.ReplaceSubstructs(mol, reactant, product) mol = rms[0] - geom = self._update_structure(inplace, mol=mol) - # TODO: add any properties from this operation to global_props? - - return geom + return self.__copy__(mol=mol) - def tautomerize(self, return_all=False, inplace=False): + def tautomerize(self, return_all=False): """ Generate tautomers according to RDKit TautomerEnumerator() method. @@ -516,14 +488,10 @@ def tautomerize(self, return_all=False, inplace=False): return_all : boolean (optional) If true, return all tautomers generated. Otherwise, only return the most common. Default=False - inplace : boolean (optional) - If true, update this instance with the new structure. Otherwise, - create a new Geometry instance and populate it with the structure. - Default: False. Returns ------- - Geometry or list(Geometry) + :obj:`~isicle.geometry.Geometry` or list of :obj:`~isicle.geometry.Geometry` Tautomer(s) of starting structure. """ @@ -536,81 +504,77 @@ def tautomerize(self, return_all=False, inplace=False): res = [mol] tauts = enumerator.Enumerate(mol) smis = [Chem.MolToSmiles(x) for x in tauts] - s_smis = sorted((x, y) for x, y in zip(smis, tauts) if x != self.to_smiles()) + s_smis = sorted((x, y) + for x, y in zip(smis, tauts) if x != self.to_smiles()) res += [y for x, y in s_smis] # Ensure res is a list of mol objects if return_all: new_geoms = [] for r in res: - geom = self._update_structure(False, mol=r, event="tautomerize") + geom = self.__copy__(mol=r) new_geoms.append(geom) return new_geoms - geom = self._update_structure(inplace, mol=res[0]) - # TODO: add any properties from this operation to global_props? + return self.__copy__(mol=res[0]) - return geom - - def kekulize(self, inplace=False): + def kekulize(self): """ - `Kekulizes the molecule if possible. Otherwise the molecule is not modified` + Kekulizes the molecule if possible. Otherwise the molecule is not modified. This is recommended for aromatic molecules undergoing explicit ionization. Aromatic bonds are explicitly defined and aromatic flags are cleared. + """ + mol = Chem.KekulizeIfPossible(self.to_mol(), clearAromaticFlags=True) - geom = self._update_structure(inplace, mol=mol, event="kekulize") - return geom - - def ionize( - self, - ion_path=None, - ion_list=None, - method="explicit", - save=False, - path=None, - **kwargs, - ): + return self.__copy__(mol=mol) + + def ionize(self, ion_path=None, ion_list=None, method="explicit", **kwargs): """ Ionize geometry, using specified list of ions and method of ionization. Parameters ---------- ion_path : str - Filepath to text file containing ions with charge (eg. `H+`) to be considered - Either ion_path or ion_list must be specified + Filepath to text file containing ions with charge (eg. `H+`) to be + considered. Either ion_path or ion_list must be specified. ion_list : list - List of strings of adducts to be considered. - Must be specifed in syntax `Atom+` or `Atom-`, eg. `H+`, `Na+`, `H-Na+` - Either ion_path or ion_list must be specified + List of strings of adducts to be considered. Must be specifed in + syntax `Atom+` or `Atom-`, eg. `H+`, `Na+`, `H-Na+`. Either ion_path + or ion_list must be specified method : str Method of ionization to be used, 'explicit' or 'crest' is accepted - save : bool - Saves wrapper object to .pkl in specified path directory - path : str (optional) - Directory to write output files ensembl : bool (optional) Returns instead a list of adduct geometries - **kwargs: + kwargs: see :meth: `~isicle.adducts.ExplicitIonizationWrapper.submit` or `~isicle.adducts.CRESTIonizationWrapper.submit` for more options Returns ------- - Dictionary of adducts, `{:[]}` + :obj:`~isicle.adducts.IonizationWrapper` + Ionization result. + """ + iw = isicle.adducts.ionize(method).run( self.__copy__(), ion_path=ion_path, ion_list=ion_list, **kwargs ) - if save == True and path is not None: - iw.save(path) - return iw def get_natoms(self): - """Calculate total number of atoms.""" + """ + Calculate total number of atoms. + + Returns + ------- + int + Number of atoms. + + """ + natoms = Chem.Mol.GetNumAtoms(self.to_mol()) self.__dict__["natoms"] = natoms return self.__dict__["natoms"] @@ -632,6 +596,7 @@ def get_atom_indices( ------- list of int Atom indices. + """ atoms = [lookup[x] for x in atoms] @@ -643,7 +608,16 @@ def get_atom_indices( return idx def get_total_partial_charge(self): - """Sum the partial charge across all atoms.""" + """ + Sum the partial charge across all atoms. + + Returns + ------- + float + Total partial charge. + + """ + mol = self.to_mol() Chem.AllChem.ComputeGasteigerCharges(mol) contribs = [ @@ -652,80 +626,151 @@ def get_total_partial_charge(self): ] return np.nansum(contribs) - def get_formal_charge(self): - """Calculate formal charge of the molecule.""" - mol = self.to_mol() - return Chem.rdmolops.GetFormalCharge(mol) + def get_charge(self): + """ + Get formal charge of the molecule. + + Returns + ------- + int + Formal charge. + + """ + + return Chem.rdmolops.GetFormalCharge(self.to_mol()) + + def set_charge(self, charge=None): + """ + Set formal charge of the molecule. + + Parameters + ---------- + charge : int + Formal charge of molecule. + + """ - def set_formal_charge(self): - """Set formal charge of the molecule to __dict__""" - self.__dict__.update(charge=self.get_formal_charge()) - return self.charge + if charge is None: + charge = self.get_charge() + + self.__dict__.update(charge=int(charge)) + + def __copy__(self, **kwargs): + """ + Return hard copy of this class instance. - def __copy__(self): - """Return hard copy of this class instance.""" - # TODO: manage what should be passed, rather than all? + Parameters + ---------- + kwargs + Keyword arguments to update copy. + + Returns + ------- + :obj:`~isicle.geometry.Geometry` + Molecule representation. + + """ + + # Copy attributes d = self.__dict__.copy() - d["mol"] = self.to_mol() - return type(self)(**d) + + # Safely copy mol if present + if 'mol' in d: + d['mol'] = self.to_mol() + + # Build self instance from scratch + instance = type(self)(**d) + + # Update from kwargs + instance.__dict__.update(kwargs) + + return instance def to_mol(self): """ - Returns RDKit Mol object for this Geometry. + Returns :obj:`~rdkit.Chem.rdchem.Mol` instance for this Geometry. Returns ------- - RDKit Mol object - Copy of structure + :obj:`~rdkit.Chem.rdchem.Mol` + RDKit Mol instance. """ return self.mol.__copy__() def to_smiles(self): - """Get SMILES for this structure.""" + """ + Get SMILES for this structure. + + Returns + ------- + str + SMILES string. + + """ + return Chem.MolToSmiles(self.to_mol()) def to_inchi(self): - """Get InChI for this structure.""" + """ + Get InChI for this structure. + + Returns + ------- + str + InChI string. + + """ + return Chem.MolToInchi(self.to_mol()) def to_smarts(self): - """Get SMARTS for this structure.""" + """ + Get SMARTS for this structure. + + Returns + ------- + str + SMARTS string. + """ return Chem.MolToSmarts(self.to_mol()) def to_xyzblock(self): - """Get XYZ text for this structure.""" - return Chem.MolToXYZBlock(self.to_mol()) + """ + Get XYZ text for this structure. - def to_pdbblock(self): - """Get PDB text for this structure""" - return Chem.MolToPDBBlock(self.to_mol()) + Returns + ------- + str + XYZ representation as string. - def to_molblock(self): - """Get PDB text for this structure""" - return Chem.MolToMolBlock(self.to_mol()) + """ + return Chem.MolToXYZBlock(self.to_mol()) -class MDOptimizedGeometry(Geometry): - """ - Builds off of the 3D representation, with additional methods specific to a - representation with MD optimized 3D coordinates. Any methods that would - result in a more defined representation (e.g. DFT optimized) should yield - the appropriate subclass. - """ + def to_pdbblock(self): + """ + Get PDB text for this structure. + + Returns + ------- + str + PDB representation as string. - def __init__(self): - # Initialize the base class - super().__init__() + """ + return Chem.MolToPDBBlock(self.to_mol()) -class DFTOptimizedGeometry(Geometry): - """ - Builds off of the 3D representation, with additional methods specific to a - representation with DFT optimized 3D coordinates. - """ + def to_molblock(self): + """ + Get :obj:`~rdkit.Chem.rdchem.Mol` text for this structure. + + Returns + ------- + str + :obj:`~rdkit.Chem.rdchem.Mol` representation as string. - def __init__(self): - # Initialize the base class - super().__init__() + """ + + return Chem.MolToMolBlock(self.to_mol()) diff --git a/isicle/interfaces.py b/isicle/interfaces.py index 377a747..709420a 100644 --- a/isicle/interfaces.py +++ b/isicle/interfaces.py @@ -2,10 +2,10 @@ class FileParserInterface(metaclass=abc.ABCMeta): - ''' + """ Abstract base class for file parser interface. All file parsers conform to this definition. - ''' + """ @classmethod def __subclasshook__(cls, subclass): @@ -19,17 +19,17 @@ def __subclasshook__(cls, subclass): @abc.abstractmethod def load(self, path: str): - '''Load in the data file''' + """Load in the data file""" raise NotImplementedError @abc.abstractmethod def parse(self): - '''Extract relevant information from data''' + """Extract relevant information from data""" raise NotImplementedError @abc.abstractmethod def save(self, path: str): - '''Write parsed object to file''' + """Write parsed object to file""" raise NotImplementedError @@ -51,47 +51,41 @@ def __subclasshook__(cls, subclass): and callable(subclass.__copy__) and hasattr(subclass, 'to_xyzblock') and callable(subclass.to_xyzblock) - and hasattr(subclass, 'save_xyz') - and callable(subclass.save_xyz) - and hasattr(subclass, 'save_pickle') - and callable(subclass.save_pickle) - and hasattr(subclass, 'save') - and callable(subclass.save) or NotImplemented) @abc.abstractmethod def dft(self): - '''Optimize geometry using density function theory (DFT) methods.''' + """Optimize geometry using density function theory (DFT) methods.""" raise NotImplementedError @abc.abstractmethod def md(self): - '''Optimize geometry using molecule dynamics methods (MD).''' + """Optimize geometry using molecule dynamics methods (MD).""" raise NotImplementedError @abc.abstractmethod def get_natoms(self): - '''Count number of atoms''' + """Count number of atoms""" raise NotImplementedError @abc.abstractmethod def get_atom_indices(self): - '''Extract indices of each atom from the internal geometry.''' + """Extract indices of each atom from the internal geometry.""" raise NotImplementedError @abc.abstractmethod def get___dict__(self): - '''Return a copy of this object's attributes dictionary''' + """Return a copy of this object's attributes dictionary""" raise NotImplementedError @abc.abstractmethod def __copy__(self): - '''Return hard copy of this class instance.''' + """Return hard copy of this class instance.""" raise NotImplementedError @abc.abstractmethod def to_xyzblock(self): - '''Get XYZ text for this structure.''' + """Get XYZ text for this structure.""" raise NotImplementedError @@ -113,35 +107,34 @@ def __subclasshook__(cls, subclass): @abc.abstractmethod def to_mol(self, path: str): - '''Returns RDKit Mol object for this Geometry.''' + """Returns RDKit Mol object for this Geometry.""" raise NotImplementedError @abc.abstractmethod def get_total_partial_charge(self): - '''Determine total partial charge of molecule''' + """Determine total partial charge of molecule""" raise NotImplementedError @abc.abstractmethod def to_smiles(self, path: str): - '''Return SMILES representation''' + """Return SMILES representation""" raise NotImplementedError @abc.abstractmethod def to_inchi(self, path: str): - '''Return InChI representation''' + """Return InChI representation""" raise NotImplementedError @abc.abstractmethod def to_smarts(self, path: str): - '''Return SMARTS representation''' + """Return SMARTS representation""" raise NotImplementedError class WrapperInterface(metaclass=abc.ABCMeta): - ''' - Abstract base class for wrapper interface. All QM - wrappers conform to this definition. - ''' + """ + Abstract base class for wrapper interfaces. + """ @classmethod def __subclasshook__(cls, subclass): @@ -159,64 +152,25 @@ def __subclasshook__(cls, subclass): @abc.abstractmethod def set_geometry(self): - '''Load the input geometry file''' + """Assign the input geometry.""" raise NotImplementedError @abc.abstractmethod def configure(self): - '''Configure the run.''' + """Configure the run.""" raise NotImplementedError @abc.abstractmethod def submit(self): - '''Configure the run.''' - raise NotImplementedError - - @abc.abstractmethod - def run(self): - '''Execute/submit the run.''' - raise NotImplementedError - - @abc.abstractmethod - def finish(self, path: str): - '''Finalize, parse, return result object.''' - raise NotImplementedError - - -class MDWrapperInterface(metaclass=abc.ABCMeta): - ''' - Abstract base class for molecular dynamics wrapper interface. All QM - wrappers conform to this definition. - ''' - - @classmethod - def __subclasshook__(cls, subclass): - return (hasattr(subclass, 'set_geometry') - and callable(subclass.set_geometry) - and hasattr(subclass, 'job_type') - and callable(subclass.job_type) - and hasattr(subclass, 'run') - and callable(subclass.run) - and hasattr(subclass, 'finish') - and callable(subclass.finish) - or NotImplemented) - - @abc.abstractmethod - def set_geometry(self): - '''Load the input geometry file''' - raise NotImplementedError - - @abc.abstractmethod - def job_type(self): - '''Make list of jobs.''' + """Configure the run.""" raise NotImplementedError @abc.abstractmethod def run(self): - '''Execute/submit the run.''' + """Execute/submit the run.""" raise NotImplementedError @abc.abstractmethod def finish(self, path: str): - '''Finalize, parse, return result object.''' + """Finalize, parse, return result object.""" raise NotImplementedError diff --git a/isicle/io.py b/isicle/io.py index a60a5c6..2fca531 100644 --- a/isicle/io.py +++ b/isicle/io.py @@ -153,7 +153,6 @@ def load_nwchem(path, basename=None): """ - dft = isicle.qm.NWChemWrapper() parser = isicle.parse.NWChemParser() @@ -305,6 +304,7 @@ def load_smiles(path, force=False, basename=None): Molecule representation. """ + extension = os.path.splitext(path)[-1].lower() if "smi" in extension: return _load_line_notation(path, func=Chem.MolFromSmiles, force=force, basename=basename) @@ -331,6 +331,7 @@ def load_inchi(path, force=False, basename=None): Molecule representation. """ + if "inchi=" in path.lower(): return _load_line_notation( path, func=Chem.MolFromInchi, force=force, string=True, basename=basename @@ -386,7 +387,7 @@ def _check_mol_obj(mol_obj): try: Chem.MolToMolBlock(mol_obj) except ValueError: - print("Invalid RDKit mol object") + print("Invalid RDKit Mol object") raise @@ -443,6 +444,7 @@ def load(path, force=False, basename=None): Molecule representation. """ + if (type(path)) == str: path = path.strip() extension = os.path.splitext(path)[-1].lower() @@ -480,8 +482,6 @@ def load(path, force=False, basename=None): raise IOError("Not a valid RDKit mol object passed.") - - def save_xyz(path, geom): """ Save molecule geometry as XYZ file. diff --git a/isicle/md.py b/isicle/md.py index 1fa0a18..aaa94e2 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -10,14 +10,14 @@ from isicle.interfaces import WrapperInterface from isicle.parse import XTBParser -''' +""" Files resulting from an xtb job always run in the same directory that the command is issued in, no matter where the input is. Can direct the .log file, but no other files. -''' +""" def _program_selector(program): - ''' + """ Selects a supported molecular dynamics program for associated simulation. Currently only NWChem has been implemented. @@ -32,7 +32,7 @@ def _program_selector(program): Wrapped functionality of the selected program. Must implement :class:`~isicle.interfaces.MDWrapperInterface`. - ''' + """ program_map = {'xtb': XTBWrapper, 'rdkit': RDKitWrapper, 'tinker': TINKERWrapper} @@ -44,7 +44,7 @@ def _program_selector(program): def md(geom, program='xtb', **kwargs): - ''' + """ Optimize geometry via molecular dyanmics using supplied forcefield and basis set. @@ -58,14 +58,14 @@ def md(geom, program='xtb', **kwargs): result Object containing relevant outputs from the simulation. - ''' + """ # Select program return _program_selector(program).run(geom, **kwargs) class XTBWrapper(XYZGeometry, WrapperInterface): - ''' + """ Wrapper for xtb functionality. Implements :class:`~isicle.interfaces.WrapperInterface` to ensure required methods are exposed. @@ -84,7 +84,7 @@ class XTBWrapper(XYZGeometry, WrapperInterface): job_list : str List of commands for simulation. - ''' + """ _defaults = ['geom'] _default_value = None @@ -95,7 +95,7 @@ def __init__(self, **kwargs): self.__dict__.update(**kwargs) def set_geometry(self, geom): - ''' + """ Set :obj:`~isicle.geometry.Geometry` instance for simulation. Parameters @@ -103,7 +103,7 @@ def set_geometry(self, geom): geom : :obj:`~isicle.geometry.Geometry` Molecule representation. - ''' + """ # Assign geometry self.geom = geom @@ -113,7 +113,7 @@ def set_geometry(self, geom): self.save_geometry() def save_geometry(self, fmt='xyz'): - ''' + """ Save internal :obj:`~isicle.geometry.Geometry` representation to file. Parameters @@ -122,7 +122,7 @@ def save_geometry(self, fmt='xyz'): Filetype used by xtb. Must be "xyz", "smi", ".inchi", ".mol", ".xyz", ".pdb", ".pkl". - ''' + """ # Path operationspyth self.temp_dir = isicle.utils.mkdtemp() self.fmt = fmt.lower() @@ -135,7 +135,7 @@ def save_geometry(self, fmt='xyz'): self.geom.path = geomfile def _configure_xtb(self, forcefield='gfn2', optlevel='normal', charge=None, solvation=None): - ''' + """ Set command line for xtb simulations. Parameters @@ -152,7 +152,7 @@ def _configure_xtb(self, forcefield='gfn2', optlevel='normal', charge=None, solv Charge of molecular system. Default : 0 (Neutral charge) - ''' + """ # Add base command s = 'xtb ' @@ -184,7 +184,7 @@ def _configure_crest(self, ewin=6, optlevel='Normal', forcefield='gfn2', protonate=False, deprotonate=False, tautomerize=False, ion=None, charge=None, dryrun=False, processes=1, solvation=None, ignore_topology=False): - ''' + """ Set command line for crest simulations. Parameters @@ -215,7 +215,7 @@ def _configure_crest(self, ewin=6, optlevel='Normal', forcefield='gfn2', charge : int Charge of molecular system. Default : 0 (Neutral charge) - ''' + """ # Start base command s = 'crest ' @@ -275,7 +275,7 @@ def _configure_crest(self, ewin=6, optlevel='Normal', forcefield='gfn2', def configure(self, task='optimize', forcefield='gfn2', charge=None, ewin=6, ion=None, optlevel='Normal', dryrun=False, processes=1, solvation=None, ignore_topology=False): - ''' + """ Generate command line Parameters @@ -300,7 +300,7 @@ def configure(self, task='optimize', forcefield='gfn2', charge=None, charge : int Charge of molecular system. Default : 0 (Neutral charge) - ''' + """ if type(task) == list: raise TypeError('Initiate one xtb or crest job at a time.') @@ -348,9 +348,9 @@ def configure(self, task='optimize', forcefield='gfn2', charge=None, self.config = config def submit(self): - ''' + """ Run xtb or crest simulation according to configured inputs. - ''' + """ owd = os.getcwd() os.chdir(self.temp_dir) job = self.config @@ -358,9 +358,9 @@ def submit(self): os.chdir(owd) def finish(self): - ''' + """ Parse results, save xtb output files, and clean temporary directory - ''' + """ parser = XTBParser() @@ -385,8 +385,10 @@ def finish(self): else: self.geom = self.geom[0] - def run(self, geom, **kwargs): - ''' + def run(self, geom, task='optimize', forcefield='gfn2', charge=None, + ewin=6, ion=None, optlevel='Normal', dryrun=False, processes=1, + solvation=None, ignore_topology=False): + """ Optimize geometry via density functional theory using supplied functional and basis set. @@ -394,16 +396,28 @@ def run(self, geom, **kwargs): ---------- geom : :obj:`~isicle.geometry.Geometry` Molecule representation. - **kwargs - Keyword arguments to configure the simulation. - See :meth:`~isicle.md.XTBWrapper.configure`. + tasks : str + Set task to "optimize", "conformer", "protonate", "deprotonate", or "tautomerize". + forcefield : str + GFN forcefield for the optimization, including "gfn2", "gfn1", "gff". + ewin : int + Energy window (kcal/mol) for conformer(set to 6), (de)protomer(set to 30), or tautomer(set to 30) search. + ion : str + Ion for protomer calculation. + optlevel : str or list of str + Set optimization level. Supply globally or per task. + ion : str + Keyword to couple with protonate to ionize molecule with an ion other than a proton. + See :obj:`~isicle.adduct.parse_ion` for list of ion options. + charge : int + Charge of molecular system. Defaults to 0 (neutral). Returns ------- :obj:`~isicle.md.XTBWrapper` Wrapper object containing relevant outputs from the simulation. - ''' + """ # New instance self = XTBWrapper() @@ -423,7 +437,7 @@ def run(self, geom, **kwargs): return self def get_structures(self): - ''' + """ Extract all structures from containing object as a conformational ensemble. Returns @@ -431,7 +445,8 @@ def get_structures(self): :obj:`~isicle.conformers.ConformationalEnsemble` Conformational ensemble. - ''' + """ + if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): return self.geom @@ -439,7 +454,7 @@ def get_structures(self): 'Object does not contain multiple structures. Use `get_structure` instead.') def get_structure(self): - ''' + """ Extract structure from containing object. Returns @@ -447,7 +462,8 @@ def get_structure(self): :obj:`~isicle.geometry.XYZGeometry` Structure instance. - ''' + """ + if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): raise TypeError( 'Object contains multiple structures. Use `get_structures` instead.') @@ -456,8 +472,7 @@ def get_structure(self): class RDKitWrapper(Geometry, WrapperInterface): - - ''' + """ Wrapper for rdkit functionality. Implements :class:`~isicle.interfaces.WrapperInterface` to ensure required methods are exposed. @@ -476,7 +491,7 @@ class RDKitWrapper(Geometry, WrapperInterface): job_list : str List of commands for simulation. - ''' + """ _defaults = ['geom'] _default_value = None @@ -504,7 +519,7 @@ def finish(self): class TINKERWrapper(Geometry, WrapperInterface): - ''' + """ Wrapper for TINKER functionality. Implements :class:`~isicle.interfaces.WrapperInterface` to ensure required methods are exposed. @@ -523,7 +538,7 @@ class TINKERWrapper(Geometry, WrapperInterface): job_list : str List of commands for simulation. - ''' + """ _defaults = ['geom'] _default_value = None @@ -535,7 +550,7 @@ def __init__(self, **kwargs): def _convert_to_tinkerxyz(self): - ''' + """ Convert mol to TINKER XYZ format using code from DP5, Goodman Lab. Parameters @@ -543,7 +558,7 @@ def _convert_to_tinkerxyz(self): geom : :obj: `~isicle.geometry.Geometry` Molecule representation. - ''' + """ # getting MMFF values for large atom types def getMMFF_large_atom_type(mmff_props , atom, m): @@ -690,7 +705,7 @@ def getMMFF_large_atom_type(mmff_props , atom, m): return xyz def set_geometry(self, geom): - ''' + """ Set :obj:`~isicle.geometry.Geometry` instance for simulation. Parameters @@ -698,7 +713,7 @@ def set_geometry(self, geom): geom : :obj:`~isicle.geometry.Geometry` Molecule representation. - ''' + """ # Assign geometry self.geom = geom @@ -710,7 +725,7 @@ def set_geometry(self, geom): self.save_geometry() def save_geometry(self, fmt='xyz'): - ''' + """ Save internal :obj:`~isicle.geometry.Geometry` representation to file. Parameters @@ -719,7 +734,7 @@ def save_geometry(self, fmt='xyz'): Filetype used by xtb. Must be "xyz", "smi", ".inchi", ".mol", ".xyz", ".pdb", ".pkl". - ''' + """ # Path operationspyth self.temp_dir = isicle.utils.mkdtemp() self.fmt = fmt.lower() @@ -775,7 +790,7 @@ def finish(self): self.geom = self.geom[0] def run(self, geom, **kwargs): - ''' + """ Take geometry and conduct specified tasks using TINKER Parameters @@ -791,7 +806,7 @@ def run(self, geom, **kwargs): :obj:`~isicle.md.TINKERWrapper` Wrapper object containing relevant outputs from the simulation. - ''' + """ # New instance self = TINKERWrapper() @@ -809,6 +824,3 @@ def run(self, geom, **kwargs): self.finish() return self - - def finish(self): - return \ No newline at end of file diff --git a/isicle/mobility.py b/isicle/mobility.py index ec8d9bc..55a2e45 100644 --- a/isicle/mobility.py +++ b/isicle/mobility.py @@ -27,7 +27,7 @@ def __init__(self): pass def set_geometry(self, geom): - ''' + """ Set :obj:`~isicle.geometry.Geometry` instance for simulation. Parameters @@ -35,7 +35,7 @@ def set_geometry(self, geom): geom : :obj:`~isicle.geometry.Geometry` Molecule representation. - ''' + """ # Assign geometry self.geom = geom @@ -44,7 +44,7 @@ def set_geometry(self, geom): self.save_geometry() def save_geometry(self): - ''' + """ Save internal :obj:`~isicle.geometry.Geometry` representation to file. Raises @@ -52,7 +52,7 @@ def save_geometry(self): TypeError If geometry loaded from .xyz is saved to another format. - ''' + """ # Temp directory self.temp_dir = isicle.utils.mkdtemp() diff --git a/isicle/parse.py b/isicle/parse.py index 8a46935..8841032 100644 --- a/isicle/parse.py +++ b/isicle/parse.py @@ -9,7 +9,7 @@ import isicle class NWChemParser(FileParserInterface): - '''Extract text from an NWChem simulation output file.''' + """Extract text from an NWChem simulation output file.""" def __init__(self): self.contents = None @@ -17,7 +17,7 @@ def __init__(self): self.path = None def load(self, path: str): - '''Load in the data file''' + """Load in the data file""" with open(path, 'r') as f: self.contents = f.readlines() self.path = path @@ -274,7 +274,7 @@ def _parse_molden(self): def _parse_protocol(self): - '''Parse out dft protocol''' + """Parse out dft protocol""" functional = [] basis_set = [] solvation = [] @@ -337,14 +337,15 @@ def _parse_connectivity(self): return connectivity def parse(self): - ''' - Extract relevant information from NWChem output + """ + Extract relevant information from NWChem output. Parameters ---------- to_parse : list of str geometry, energy, shielding, spin, frequency, molden, charge, timing - ''' + + """ # Check that the file is valid first if len(self.contents) == 0: @@ -419,21 +420,21 @@ def save(self, path): return class ImpactParser(FileParserInterface): - '''Extract text from an Impact mobility calculation output file.''' + """Extract text from an Impact mobility calculation output file.""" def __init__(self): self.contents = None self.result = None def load(self, path: str): - '''Load in the data file''' + """Load in the data file""" with open(path, 'rb') as f: self.contents = f.readlines() return self.contents def parse(self): - '''Extract relevant information from data''' + """Extract relevant information from data""" # Check CCS results == 1 count = 0 @@ -470,26 +471,26 @@ def parse(self): return result # TODO: return CCS? def save(self, path: str, sep='\t'): - '''Write parsed object to file''' + """Write parsed object to file""" pd.DataFrame(self.result).to_csv(path, sep=sep, index=False) return class MobcalParser(FileParserInterface): - '''Extract text from a MOBCAL mobility calculation output file.''' + """Extract text from a MOBCAL mobility calculation output file.""" def __init__(self): self.contents = None self.result = {} def load(self, path: str): - '''Load in the data file''' + """Load in the data file""" with open(path, 'r') as f: self.contents = f.readlines() return self.contents def parse(self): - '''Extract relevant information from data''' + """Extract relevant information from data""" done = False for line in self.contents: # if "average (second order) TM mobility" in line: @@ -506,23 +507,23 @@ def parse(self): return self.result def save(self, path: str, sep='\t'): - '''Write parsed object to file''' + """Write parsed object to file""" pd.DataFrame(self.result).to_csv(path, sep=sep, index=False) return class SanderParser(FileParserInterface): - '''Extract text from an Sander simulated annealing simulation output file.''' + """Extract text from an Sander simulated annealing simulation output file.""" def load(self, path: str): - '''Load in the data file''' + """Load in the data file""" raise NotImplementedError def parse(self): - '''Extract relevant information from data''' + """Extract relevant information from data""" raise NotImplementedError def save(self, path: str): - '''Write parsed object to file''' + """Write parsed object to file""" raise NotImplementedError class XTBParser(FileParserInterface): @@ -532,7 +533,7 @@ def __init__(self): self.path = None def load(self, path: str): - '''Load in the data file''' + """Load in the data file""" with open(path, 'r') as f: self.contents = f.readlines() self.path = path @@ -711,9 +712,9 @@ def _parse_protocol(self): return protocol def _parse_xyz(self): - ''' + """ Split .xyz into separate XYZGeometry instances - ''' + """ FILE = self.xyz_path if len(list(pybel.readfile('xyz', FILE))) > 1: @@ -733,7 +734,7 @@ def _parse_xyz(self): return isicle.conformers.ConformationalEnsemble(x) def parse(self): - '''Extract relevant information from data''' + """Extract relevant information from data""" # Check that the file is valid first if len(self.contents) == 0: @@ -827,7 +828,7 @@ def __init__(self): self.path = None def load(self, path: str): - '''Load in the data file''' + """Load in the data file""" with open(path, 'r') as f: self.contents = f.readlines() self.path = path diff --git a/isicle/qm.py b/isicle/qm.py index 71059bd..0c6febe 100644 --- a/isicle/qm.py +++ b/isicle/qm.py @@ -11,7 +11,7 @@ def _program_selector(program): - ''' + """ Selects a supported quantum mechanical program for associated simulation. Currently only NWChem has been implemented. @@ -26,7 +26,7 @@ def _program_selector(program): Wrapped functionality of the selected program. Must implement :class:`~isicle.interfaces.WrapperInterface`. - ''' + """ program_map = {'nwchem': NWChemWrapper} @@ -37,10 +37,14 @@ def _program_selector(program): .format(program)) -def dft(geom, program='NWChem', template=None, **kwargs): - ''' - Optimize geometry via density functional theory using supplied functional - and basis set. +def dft(geom, program='NWChem', template=None, tasks='energy', functional='b3lyp', + basis_set='6-31g*', ao_basis='cartesian', charge=0, + atoms=['C', 'H'], bonds=1, temp=298.15, cosmo=False, solvent='H2O', + gas=False, max_iter=150, mem_global=1600, mem_heap=100, + mem_stack=600, scratch_dir=None, processes=12, command='nwchem'): + """ + Perform density functional theory calculations according to supplied task list + and configuration parameters. Parameters ---------- @@ -50,35 +54,75 @@ def dft(geom, program='NWChem', template=None, **kwargs): Alias for program selection (NWChem). template : str Path to optional template to bypass default configuration process. - **kwargs - Keyword arguments to configure the simulation. - See :meth:`~isicle.qm.NWChemWrapper.configure`. - + tasks : str or list of str + List of calculations to perform. One or more of "optimize", "energy", + "frequency", "shielding", "spin". + functional : str or list of str + Functional selection. Supply globally or per task. + basis_set : str or list of str + Basis set selection. Supply globally or per task. + ao_basis : str or list of str + Angular function selection ("spherical", "cartesian"). Supply + globally or per task. + charge : int + Nominal charge of the molecule to be optimized. + atoms : list of str + Atom types of interest. Only used for `spin` and `shielding` tasks. + temp : float + Temperature for frequency calculation. Only used if `frequency` is + a selected task. + cosmo : bool + Indicate whether to include COSMO block. Supply globally or per + task. + solvent : str + Solvent selection. Only used if `cosmo` is True. Supply globally or + per task. + gas : bool + Indicate whether to use gas phase calculations. Only used if + `cosmo` is True. Supply globally or per task. + max_iter : int + Maximum number of optimization iterations. + scratch_dir : str + Path to simulation scratch directory. + mem_global : int + Global memory allocation in MB. + mem_heap : int + Heap memory allocation in MB. + mem_stack : int + Stack memory allocation in MB. + Returns ------- :obj:`~isicle.qm.QMWrapper` - Instance of selected QMWrapper. + Wrapper object containing relevant outputs from the simulation. - ''' + """ # Select program - return _program_selector(program).run(geom, template=template, **kwargs) + return _program_selector(program).run(geom, template=template, tasks=tasks, + functional=functional, basis_set=basis_set, + ao_basis=ao_basis, charge=charge, + atoms=atoms, bonds=bonds, temp=temp, + cosmo=cosmo, solvent=solvent, gas=gas, + max_iter=max_iter, mem_global=mem_global, + mem_heap=mem_heap, mem_stack=mem_stack, + scratch_dir=scratch_dir, processes=processes, + command=command) class NWChemWrapper(XYZGeometry, WrapperInterface): - ''' + """ Wrapper for NWChem functionality. - Implements :class:`~isicle.interfaces.WrapperInterface` to ensure - required methods are exposed. - Attributes ---------- temp_dir : str Path to temporary directory used for simulation. task_map : dict Alias mapper for supported quantum mechanical presets. Thses include - "optimze", "shielding", and "spin". + "optimze", "energy", "frequency", "shielding", and "spin". + task_order : dict + Indicates order of tasks in `task_map`. geom : :obj:`~isicle.geometry.Geometry` Internal molecule representation. fmt : str @@ -86,17 +130,22 @@ class NWChemWrapper(XYZGeometry, WrapperInterface): config : str Configuration information for simulation. - ''' + """ + _defaults = ['geom'] _default_value = None def __init__(self, **kwargs): - ''' + """ Initialize :obj:`~isicle.qm.NWChemWrapper` instance. - Establishes aliases for preconfigured tasks. + Parameters + ---------- + kwargs + Keyword arguments to initialize instance attributes. + + """ - ''' self.__dict__.update(dict.fromkeys( self._defaults, self._default_value)) self.__dict__.update(**kwargs) @@ -117,7 +166,7 @@ def __init__(self, **kwargs): self.temp_dir = isicle.utils.mkdtemp() def set_geometry(self, geom): - ''' + """ Set :obj:`~isicle.geometry.Geometry` instance for simulation. Parameters @@ -125,39 +174,39 @@ def set_geometry(self, geom): geom : :obj:`~isicle.geometry.Geometry` Molecule representation. - ''' + """ # Assign geometry - self.geom = geom + self.geom = geom.__copy__() self.basename = self.geom.basename # Save self.save_geometry() def save_geometry(self, fmt='xyz'): - ''' + """ Save internal :obj:`~isicle.geometry.Geometry` representation to file. Parameters ---------- fmt : str - Filetype used by xtb. Must be "xyz", "smi", ".inchi", ".mol", ".xyz", - ".pdb", ".pkl". + File format. + + """ - ''' # Path operations self.fmt = fmt.lower() geomfile = os.path.join(self.temp_dir, '{}.{}'.format(self.geom.basename, self.fmt.lower())) - # All other formats + # Save isicle.io.save(geomfile, self.geom) self.geom.path = geomfile def _configure_header(self, scratch_dir=None, mem_global=1600, mem_heap=100, mem_stack=600): - ''' + """ Generate header block of NWChem configuration. Parameters @@ -176,7 +225,7 @@ def _configure_header(self, scratch_dir=None, mem_global=1600, str Header block of NWChem configuration. - ''' + """ d = {'basename': self.geom.basename, 'dirname': self.temp_dir, @@ -195,7 +244,7 @@ def _configure_header(self, scratch_dir=None, mem_global=1600, 'print low\n').format(**d) def _configure_load(self, charge=0): - ''' + """ Generate geometry load block of NWChem configuration. Parameters @@ -206,9 +255,9 @@ def _configure_load(self, charge=0): Returns ------- str - Geometry load block of NWChem configuration. + Load block of NWChem configuration. - ''' + """ d = {'basename': self.geom.basename, 'dirname': self.temp_dir, @@ -220,7 +269,7 @@ def _configure_load(self, charge=0): 'end\n').format(**d) def _configure_basis(self, basis_set='6-31G*', ao_basis='cartesian'): - ''' + """ Generate basis set block of NWChem configuration. Parameters @@ -235,7 +284,7 @@ def _configure_basis(self, basis_set='6-31G*', ao_basis='cartesian'): str Basis set block of NWChem configuration. - ''' + """ d = {'ao_basis': ao_basis, 'basis_set': basis_set} @@ -245,7 +294,7 @@ def _configure_basis(self, basis_set='6-31G*', ao_basis='cartesian'): 'end\n').format(**d) def _configure_dft(self, functional='b3lyp', odft=False): - ''' + """ Generate DFT block of NWChem configuration. Parameters @@ -261,7 +310,7 @@ def _configure_dft(self, functional='b3lyp', odft=False): str DFT block of NWChem configuration. - ''' + """ d = {'functional': functional, 'dft': 'odft' if odft is True else 'dft'} @@ -279,7 +328,7 @@ def _configure_dft(self, functional='b3lyp', odft=False): return s def _configure_driver(self, max_iter=150): - ''' + """ Generate driver block of NWChem configuration. Parameters @@ -292,7 +341,7 @@ def _configure_driver(self, max_iter=150): str Driver block of NWChem configuration. - ''' + """ d = {'basename': self.geom.basename, 'max_iter': max_iter} @@ -303,7 +352,7 @@ def _configure_driver(self, max_iter=150): 'end\n').format(**d) def _configure_cosmo(self, solvent='H2O', gas=False): - ''' + """ Generate COSMO block of NWChem configuration. Parameters @@ -318,7 +367,7 @@ def _configure_cosmo(self, solvent='H2O', gas=False): str COSMO block of NWChem configuration. - ''' + """ d = {'solvent': solvent, 'gas': gas} @@ -331,11 +380,9 @@ def _configure_cosmo(self, solvent='H2O', gas=False): def _configure_frequency(self, temp=298.15, basis_set='6-31G*', ao_basis='cartesian', functional='b3lyp', cosmo=False, solvent='H2O', gas=False, **kwargs): - ''' + """ Configure frequency block of NWChem configuration. - Includes basis and DFT blocks; can include COSMO block. - Parameters ---------- temp : float @@ -353,15 +400,13 @@ def _configure_frequency(self, temp=298.15, basis_set='6-31G*', ao_basis='cartes gas : bool Indicate whether to use gas phase calculations. Only used if `cosmo` is True. - **kwargs - Arbitrary additional arguments (unused). Returns ------- str Frequency block of NWChem configuration. - ''' + """ # Add basis block s = self._configure_basis(basis_set=basis_set, ao_basis=ao_basis) @@ -384,11 +429,9 @@ def _configure_frequency(self, temp=298.15, basis_set='6-31G*', ao_basis='cartes def _configure_energy(self, basis_set='6-31G*', ao_basis='cartesian', functional='b3lyp', cosmo=False, solvent='H2O', gas=False, **kwargs): - ''' + """ Configure energy block of NWChem configuration. - Includes basis and DFT blocks; can include COSMO block. - Parameters ---------- basis_set : str @@ -404,14 +447,13 @@ def _configure_energy(self, basis_set='6-31G*', ao_basis='cartesian', gas : bool Indicate whether to use gas phase calculations. Only used if `cosmo` is True. - **kwargs - Arbitrary additional arguments (unused). Returns ------- str Energy block of NWChem configuration. - ''' + + """ # Add basis block s = self._configure_basis(basis_set=basis_set, ao_basis=ao_basis) @@ -429,9 +471,8 @@ def _configure_energy(self, basis_set='6-31G*', ao_basis='cartesian', def _configure_optimize(self, basis_set='6-31G*', ao_basis='cartesian', functional='b3lyp', max_iter=150, - cosmo=False, solvent='H2O', gas=False, - **kwargs): - ''' + cosmo=False, solvent='H2O', gas=False, **kwargs): + """ Generate meta optimization block of NWChem configuration. Includes basis, DFT, and driver blocks; can include COSMO block. @@ -453,15 +494,13 @@ def _configure_optimize(self, basis_set='6-31G*', ao_basis='cartesian', gas : bool Indicate whether to u se gas phase calculations. Only used if `cosmo` is True. - **kwargs - Arbitrary additional arguments (unused). Returns ------- str Optimization meta block of NWChem configuration. - ''' + """ # Add basis block s = self._configure_basis(basis_set=basis_set, ao_basis=ao_basis) @@ -484,12 +523,9 @@ def _configure_optimize(self, basis_set='6-31G*', ao_basis='cartesian', def _configure_shielding(self, basis_set='6-31G*', ao_basis='cartesian', functional='b3lyp', cosmo=True, solvent='H2O', gas=False, **kwargs): - ''' + """ Generate meta shielding block of NWChem configuration. - Includes basis and DFT blocks; can include COSMO and/or single-point - energy calculation blocks. - Parameters ---------- basis_set : str @@ -507,15 +543,14 @@ def _configure_shielding(self, basis_set='6-31G*', ao_basis='cartesian', gas : bool Indicate whether to use gas phase calculations. Only used if `cosmo` is True. - **kwargs - Arbitrary additional arguments (unused). Returns ------- str Shielding meta block of NWChem configuration. - ''' + """ + # Extract atom index information #idx = self.geom.mol.get_atom_indices(atoms=atoms) #new_idx = [] @@ -546,12 +581,9 @@ def _configure_shielding(self, basis_set='6-31G*', ao_basis='cartesian', def _configure_spin(self, bonds=1, basis_set='6-31G*', ao_basis='spherical', functional='b3lyp', cosmo=True, solvent='H2O', gas=False, **kwargs): - ''' + """ Generate meta spin-spin coupling block of NWChem configuration. - Includes basis and DFT blocks; can include COSMO and/or single-point - energy calculation blocks. - Parameters ---------- max_pairs : int @@ -570,15 +602,13 @@ def _configure_spin(self, bonds=1, basis_set='6-31G*', gas : bool Indicate whether to use gas phase calculations. Only used if `cosmo` is True. - **kwargs - Arbitrary additional arguments (unused). Returns ------- str Spin-spin coupling meta block of NWChem configuration. - ''' + """ def generate_pairs(mol, bonds=bonds): @@ -662,7 +692,7 @@ def configure(self, tasks='energy', functional='b3lyp', atoms=['C', 'H'], bonds=1, temp=298.15, cosmo=False, solvent='H2O', gas=False, max_iter=150, mem_global=1600, mem_heap=100, mem_stack=600, scratch_dir=None, processes=12, command='nwchem'): - ''' + """ Configure NWChem simulation. Parameters @@ -709,7 +739,7 @@ def configure(self, tasks='energy', functional='b3lyp', str NWChem configuration. - ''' + """ # Cast to list safely tasks = safelist(tasks) @@ -789,7 +819,7 @@ def configure(self, tasks='energy', functional='b3lyp', def configure_from_template(self, path, basename_override=None, dirname_override=None, **kwargs): - ''' + """ Configure NWChem simulation from template file. Use for NWChem functionality not exposed by the wrapper. @@ -814,7 +844,7 @@ def configure_from_template(self, path, basename_override=None, str NWChem configuration. - ''' + """ # Add/override class-managed kwargs if basename_override is not None: @@ -840,10 +870,10 @@ def configure_from_template(self, path, basename_override=None, return self.config def save_config(self): - ''' + """ Write generated NWChem configuration to file. - ''' + """ # Write to file with open(os.path.join(self.temp_dir, @@ -851,10 +881,10 @@ def save_config(self): f.write(self.config) def submit(self): - ''' + """ Submit the NWChem simulation according to configured inputs. - ''' + """ infile = os.path.join(self.temp_dir, self.geom.basename + '.nw') outfile = os.path.join(self.temp_dir, self.geom.basename + '.out') @@ -873,7 +903,7 @@ def submit(self): subprocess.call(s, shell=True) def finish(self): - ''' + """ Parse NWChem simulation results. Returns @@ -881,24 +911,35 @@ def finish(self): :obj:`~isicle.parse.NWChemResult` Parsed result data. - ''' + """ + # Initialize parser parser = NWChemParser() - parser.load(os.path.join(self.temp_dir, + + # Open output file + self.output = parser.load(os.path.join(self.temp_dir, self.basename + '.out')) + + # Parse results result = parser.parse() + # Update this instance attributes self.__dict__.update(result) + + # Update geom attributes self.geom.add___dict__( {k: v for k, v in result.items() if k != 'geom'}) - self.output = parser.load(os.path.join(self.temp_dir, - self.basename + '.out')) + return self - def run(self, geom, template=None, **kwargs): - ''' - Optimize geometry via density functional theory using supplied functional - and basis set. + def run(self, geom, template=None, tasks='energy', functional='b3lyp', + basis_set='6-31g*', ao_basis='cartesian', charge=0, + atoms=['C', 'H'], bonds=1, temp=298.15, cosmo=False, solvent='H2O', + gas=False, max_iter=150, mem_global=1600, mem_heap=100, + mem_stack=600, scratch_dir=None, processes=12, command='nwchem'): + """ + Perform density functional theory calculations according to supplied task list + and configuration parameters. Parameters ---------- @@ -906,16 +947,49 @@ def run(self, geom, template=None, **kwargs): Molecule representation. template : str Path to optional template to bypass default configuration process. - **kwargs - Keyword arguments to configure the simulation. - See :meth:`~isicle.qm.NWChemWrapper.configure`. + tasks : str or list of str + List of calculations to perform. One or more of "optimize", "energy", + "frequency", "shielding", "spin". + functional : str or list of str + Functional selection. Supply globally or per task. + basis_set : str or list of str + Basis set selection. Supply globally or per task. + ao_basis : str or list of str + Angular function selection ("spherical", "cartesian"). Supply + globally or per task. + charge : int + Nominal charge of the molecule to be optimized. + atoms : list of str + Atom types of interest. Only used for `spin` and `shielding` tasks. + temp : float + Temperature for frequency calculation. Only used if `frequency` is + a selected task. + cosmo : bool + Indicate whether to include COSMO block. Supply globally or per + task. + solvent : str + Solvent selection. Only used if `cosmo` is True. Supply globally or + per task. + gas : bool + Indicate whether to use gas phase calculations. Only used if + `cosmo` is True. Supply globally or per task. + max_iter : int + Maximum number of optimization iterations. + scratch_dir : str + Path to simulation scratch directory. + mem_global : int + Global memory allocation in MB. + mem_heap : int + Heap memory allocation in MB. + mem_stack : int + Stack memory allocation in MB. Returns ------- :obj:`~isicle.qm.NWChemWrapper` Wrapper object containing relevant outputs from the simulation. - ''' + """ # Set geometry self.set_geometry(geom) @@ -924,7 +998,15 @@ def run(self, geom, template=None, **kwargs): if template is not None: self.configure_from_template(template) else: - self.configure(**kwargs) + self.configure(tasks=tasks, + functional=functional, basis_set=basis_set, + ao_basis=ao_basis, charge=charge, + atoms=atoms, bonds=bonds, temp=temp, + cosmo=cosmo, solvent=solvent, gas=gas, + max_iter=max_iter, mem_global=mem_global, + mem_heap=mem_heap, mem_stack=mem_stack, + scratch_dir=scratch_dir, processes=processes, + command=command) # Run QM simulation self.submit() @@ -935,14 +1017,14 @@ def run(self, geom, template=None, **kwargs): return self def get_structure(self): - ''' + """ Extract structure from containing object. Returns ------- :obj:`~isicle.geometry.XYZGeometry` - Structure instance. - - ''' + Structure instance. + + """ return self.geom diff --git a/isicle/utils.py b/isicle/utils.py index 486b980..da92f7e 100644 --- a/isicle/utils.py +++ b/isicle/utils.py @@ -8,7 +8,7 @@ def safelist(x): - ''' + """ Ensures passed object is of correct format. Parameters @@ -20,7 +20,7 @@ def safelist(x): list, :obj:`~pd.core.series.Series`, or :obj:`~np.ndarray` Input safely cast to list-like. - ''' + """ if not isinstance(x, (list, pd.core.series.Series, np.ndarray)): return [x].copy() @@ -28,7 +28,7 @@ def safelist(x): class TypedList(collections.abc.MutableSequence): - ''' + """ Mutable sequence that requires all members be of select type(s). Attributes @@ -38,10 +38,10 @@ class TypedList(collections.abc.MutableSequence): list : list Internal list representation. - ''' + """ def __init__(self, oktypes, *args): - ''' + """ Initialize :obj:`~isicle.utils.TypedList` instance. Parameters @@ -51,7 +51,7 @@ def __init__(self, oktypes, *args): *args Objects to comprise the type-restricted list. - ''' + """ self.oktypes = oktypes self.list = list() @@ -61,7 +61,7 @@ def __init__(self, oktypes, *args): self.extend(list(args)) def check(self, v): - ''' + """ Check if supplied value is of allowed type(s). Raises @@ -69,7 +69,7 @@ def check(self, v): TypeError If value is not of allowed type(s). - ''' + """ if not isinstance(v, self.oktypes): raise TypeError(v) @@ -104,7 +104,7 @@ def atomic_masses(): def gettempdir(): - ''' + """ Return the name of the directory used for temporary files. Returns @@ -112,7 +112,7 @@ def gettempdir(): str Path to temporary directory. - ''' + """ root = os.path.join(tempfile.gettempdir(), 'isicle') @@ -123,7 +123,7 @@ def gettempdir(): def mkdtemp(prefix=None, suffix=None): - ''' + """ An ISiCLE-specific wrapper of :func:`~tempfile.mkdtemp` to create a temporary directory for temporary ISiCLE files. The temporary directory is not automatically removed. @@ -139,17 +139,16 @@ def mkdtemp(prefix=None, suffix=None): ------- str Path to temporary directory. - - ''' - + """ + return tempfile.mkdtemp(dir=gettempdir(), prefix=prefix, suffix=suffix) def rmdtemp(): - ''' + """ Removes all temporary directories and files created by ISiCLE. - ''' + """ shutil.rmtree(gettempdir(), ignore_errors=True) From 6dec1ddf615aaa631de155aae1fd87ff91a7fa08 Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Thu, 17 Aug 2023 13:50:25 -0700 Subject: [PATCH 03/14] Add newline --- isicle/geometry.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/isicle/geometry.py b/isicle/geometry.py index 6f0f2b3..20f8c62 100644 --- a/isicle/geometry.py +++ b/isicle/geometry.py @@ -733,7 +733,9 @@ def to_smarts(self): ------- str SMARTS string. + """ + return Chem.MolToSmarts(self.to_mol()) def to_xyzblock(self): From d718fb63f939d0708d62f50bde2748c789c7e0a9 Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Thu, 17 Aug 2023 13:59:32 -0700 Subject: [PATCH 04/14] Enforce copy-on-set for md --- isicle/geometry.py | 2 +- isicle/md.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/isicle/geometry.py b/isicle/geometry.py index 20f8c62..3b575d0 100644 --- a/isicle/geometry.py +++ b/isicle/geometry.py @@ -106,7 +106,7 @@ def md(self, program="xtb", **kwargs): """ - return isicle.md.md(self.__copy__(), program=program, **kwargs) + return isicle.md.md(self, program=program, **kwargs) def ionize(self, ion_path=None, ion_list=None, **kwargs): """ diff --git a/isicle/md.py b/isicle/md.py index aaa94e2..21f0562 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -106,7 +106,7 @@ def set_geometry(self, geom): """ # Assign geometry - self.geom = geom + self.geom = geom.__copy__() self.basename = self.geom.basename # Save geometry @@ -426,7 +426,9 @@ def run(self, geom, task='optimize', forcefield='gfn2', charge=None, self.set_geometry(geom) # Configure - self.configure(**kwargs) + self.configure(task=task, forcefield=forcefield, charge=charge, + ewin=ewin, ion=ion, optlevel=optlevel, dryrun=dryrun, processes=processes, + solvation=solvation, ignore_topology=ignore_topology) # Run QM simulation self.submit() @@ -716,7 +718,7 @@ def set_geometry(self, geom): """ # Assign geometry - self.geom = geom + self.geom = geom.__copy__() self.basename = self.geom.basename self.tinkerxyz = self._convert_to_tinkerxyz() From 8d2f450c2f097c3a0b2137a4670307d8c1329b9e Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Wed, 13 Sep 2023 16:15:25 -0700 Subject: [PATCH 05/14] Fix for folder creation race condition --- isicle/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/isicle/utils.py b/isicle/utils.py index da92f7e..8619483 100644 --- a/isicle/utils.py +++ b/isicle/utils.py @@ -116,8 +116,7 @@ def gettempdir(): root = os.path.join(tempfile.gettempdir(), 'isicle') - if not os.path.exists(root): - os.makedirs(root) + os.makedirs(root, exist_ok=True) return root From 22025a8d1f785cbddf68c2cd22710d19d1d7105e Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Wed, 20 Sep 2023 13:50:41 -0700 Subject: [PATCH 06/14] Add method to add implicit hydrogens --- isicle/geometry.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/isicle/geometry.py b/isicle/geometry.py index 3b575d0..ab96012 100644 --- a/isicle/geometry.py +++ b/isicle/geometry.py @@ -317,6 +317,27 @@ def _is_embedded(self): return True except: return False + + + def addHs(self): + """ + Add implicit hydrogens to molecule. + + Returns + ------- + :obj:`~isicle.geometry.Geometry` + Molecule representation. + + """ + + # Get copy of mol object + mol = self.to_mol() + + # Add Hs with coordinates + mol = Chem.AddHs(mol, addCoords=True) + + # Return new geometry instance + return self.__copy__(mol=mol) def initial_optimize(self, embed=False, forcefield="UFF", ff_iter=200): """ From a11c908447b14739d27320e234eb0efad7704721 Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Thu, 21 Sep 2023 15:38:09 -0700 Subject: [PATCH 07/14] Fix XTB single point optimization functionality --- isicle/md.py | 152 ++++++++++++++++++++++++++++++------------------ isicle/parse.py | 28 +++++---- 2 files changed, 110 insertions(+), 70 deletions(-) diff --git a/isicle/md.py b/isicle/md.py index 21f0562..5507a31 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -134,23 +134,29 @@ def save_geometry(self, fmt='xyz'): isicle.io.save(geomfile, self.geom) self.geom.path = geomfile - def _configure_xtb(self, forcefield='gfn2', optlevel='normal', charge=None, solvation=None): + def _configure_xtb(self, forcefield='gfn2', optlevel='normal', charge=0, solvation=None, + ignore_topology=False, cycles=None, dryrun=False): """ Set command line for xtb simulations. Parameters ---------- forcefield : str - GFN forcefield for the optimization - Default: gff - Supported forcefields: gfn2, gfn1, gff + GFN forcefield for the optimization. + Supported : gfn2, gfn1, gff optlevel : str - Optimization convergence level - Default : normal + Optimization convergence level. Supported : crude, sloppy, loose, lax, normal, tight, vtight extreme charge : int Charge of molecular system. - Default : 0 (Neutral charge) + solvation : str + Specify solvent for simulation. + ignore_topolgy : bool + Turn off only the initial topology check prior to the conformational search. + cycles : int + Maximum number of optimization cycles. + dryrun : bool + Signal whether to perform a dry run. """ @@ -166,55 +172,69 @@ def _configure_xtb(self, forcefield='gfn2', optlevel='normal', charge=None, solv # Add forcefield s += '--' + forcefield + ' ' - # Add optional charge + # Add charge if charge is not None: - s += '--chrg ' + charge + ' ' + s += '--chrg ' + str(charge) + ' ' + + # Add dryrun option + if dryrun: + s += '--dryrun ' # Add optional implicit solvation if solvation is not None: s += '--alpb ' + solvation + ' ' + if ignore_topology: + s += '--noreftopo ' + + if cycles: + s += '--cycles ' + str(cycles) + ' ' + # Add output s += '&>' + ' ' s += '{}.{}'.format(self.basename, "out") return s - def _configure_crest(self, ewin=6, optlevel='Normal', forcefield='gfn2', + def _configure_crest(self, forcefield='gfn2', optlevel='Normal', ewin=6, protonate=False, deprotonate=False, tautomerize=False, - ion=None, charge=None, dryrun=False, processes=1, - solvation=None, ignore_topology=False): + ion=None, charge=None, solvation=None, ignore_topology=False, + cycles=None, dryrun=False, processes=1): """ Set command line for crest simulations. Parameters ---------- - ewin : int - Energy window (kcal/mol) for conformer, (de)protomer, or tautomer search. - Default : 6 + forcefield : str + GFN forcefield for the optimization + Supported : gfn2, gfn1, gff optlevel : str Optimization convergence level - Default : normal Supported : crude, sloppy, loose, lax, normal, tight, vtight extreme - forcefield : str - GFN forcefield for the optimization - Default: gff - Supported forcefields: gfn2, gfn1, gff + ewin : int + Energy window (kcal/mol) for conformer, (de)protomer, or tautomer search. protonate : bool Signal to initiate protomer search. Suggested ewin = 30. - Default : False deprotonate : bool Signal to initiate deprotonated conformers. Suggesting ewin = 30. - Default : False - tautomer : bool + tautomerize : bool Signal to initiate tautomer search. - Default : False ion : str Keyword to couple with protonate to ionize molecule with an ion other than a proton. See :obj:`~isicle.adduct.parse_ion` for list of ion options. charge : int Charge of molecular system. - Default : 0 (Neutral charge) + solvation : str + Specify solvent for simulation. + ignore_topolgy : bool + Turn off only the initial topology check prior to the conformational search. + cycles : int + Maximum number of optimization cycles. + dryrun : bool + Signal whether to perform a dry run. + processes : int + Number of parallel processes. + """ # Start base command @@ -237,8 +257,9 @@ def _configure_crest(self, ewin=6, optlevel='Normal', forcefield='gfn2', if ion is not None: s += '-swel ' + ion + ' ' + # Add charge if charge is not None: - s += '-chrg ' + str(charge) + ' ' + s += '--chrg ' + str(charge) + ' ' # Add dryrun option if dryrun: @@ -263,6 +284,9 @@ def _configure_crest(self, ewin=6, optlevel='Normal', forcefield='gfn2', if ignore_topology: s += '--noreftopo ' + if cycles: + s += '--cycles ' + str(cycles) + ' ' + # Add output s += '&>' + ' ' @@ -272,34 +296,39 @@ def _configure_crest(self, ewin=6, optlevel='Normal', forcefield='gfn2', return s - def configure(self, task='optimize', forcefield='gfn2', charge=None, - ewin=6, ion=None, optlevel='Normal', dryrun=False, processes=1, - solvation=None, ignore_topology=False): + def configure(self, task='optimize', forcefield='gfn2', optlevel='Normal', + ewin=6, charge=None, ion=None, solvation=None, + ignore_topology=False, cycles=None, dryrun=False, processes=1): """ Generate command line Parameters ---------- - tasks : str + task : str Set task to "optimize", "conformer", "protonate", "deprotonate", or "tautomerize". - Default : "optimize" forcefield : str GFN forcefield for the optimization - Default: gff - Supported forcefields: gfn2, gfn1, gff - ewin : int - Energy window (kcal/mol) for conformer(set to 6), (de)protomer(set to 30), or tautomer(set to 30) search. - Default : 6 - ion : str - Ion for protomer calculation. + Supported : gfn2, gfn1, gff optlevel : str or list of str Set optimization level. Supply globally or per task. + ewin : int + Energy window (kcal/mol) for conformer(set to 6), (de)protomer(set to 30), or tautomer(set to 30) search + charge : int + Charge of molecular system. ion : str Keyword to couple with protonate to ionize molecule with an ion other than a proton. See :obj:`~isicle.adduct.parse_ion` for list of ion options. - charge : int - Charge of molecular system. - Default : 0 (Neutral charge) + solvation : str + Specify solvent for simulation. + ignore_topolgy : bool + Turn off only the initial topology check prior to the conformational search. + cycles : int + Maximum number of optimization cycles. + dryrun : bool + Signal whether to perform a dry run. + processes : int + Number of parallel processes. + """ if type(task) == list: @@ -311,7 +340,12 @@ def configure(self, task='optimize', forcefield='gfn2', charge=None, if task == 'optimize': config = self._configure_xtb(optlevel=optlevel, - forcefield=forcefield) + forcefield=forcefield, + charge=charge, + dryrun=dryrun, + solvation=solvation, + ignore_topology=ignore_topology, + cycles=cycles) else: if task == 'conformer': @@ -338,7 +372,8 @@ def configure(self, task='optimize', forcefield='gfn2', charge=None, dryrun=dryrun, processes=processes, solvation=solvation, - ignore_topology=ignore_topology) + ignore_topology=ignore_topology, + cycles=cycles) else: raise Error( 'Task not assigned properly, please choose optimize, conformer, protonate, deprotonate, or tautomerize') @@ -369,7 +404,6 @@ def finish(self): self.temp_dir, self.basename + '.out')) result = parser.parse() - self.__dict__.update(result) for i in self.geom: @@ -385,9 +419,9 @@ def finish(self): else: self.geom = self.geom[0] - def run(self, geom, task='optimize', forcefield='gfn2', charge=None, - ewin=6, ion=None, optlevel='Normal', dryrun=False, processes=1, - solvation=None, ignore_topology=False): + def run(self, geom, task='optimize', forcefield='gfn2', optlevel='Normal', + ewin=6, charge=None, ion=None, solvation=None, ignore_topology=False, + cycles=None, dryrun=False, processes=1): """ Optimize geometry via density functional theory using supplied functional and basis set. @@ -396,21 +430,29 @@ def run(self, geom, task='optimize', forcefield='gfn2', charge=None, ---------- geom : :obj:`~isicle.geometry.Geometry` Molecule representation. - tasks : str + task : str Set task to "optimize", "conformer", "protonate", "deprotonate", or "tautomerize". forcefield : str GFN forcefield for the optimization, including "gfn2", "gfn1", "gff". - ewin : int - Energy window (kcal/mol) for conformer(set to 6), (de)protomer(set to 30), or tautomer(set to 30) search. - ion : str - Ion for protomer calculation. optlevel : str or list of str Set optimization level. Supply globally or per task. + ewin : int + Energy window (kcal/mol) for conformer(set to 6), (de)protomer(set to 30), or tautomer(set to 30) search. + charge : int + Charge of molecular system. ion : str Keyword to couple with protonate to ionize molecule with an ion other than a proton. See :obj:`~isicle.adduct.parse_ion` for list of ion options. - charge : int - Charge of molecular system. Defaults to 0 (neutral). + solvation : str + Specify solvent for simulation. + ignore_topolgy : bool + Turn off only the initial topology check prior to the conformational search. + cycles : int + Maximum number of optimization cycles. + dryrun : bool + Signal whether to perform a dry run. + processes : int + Number of parallel processes. Returns ------- @@ -428,7 +470,7 @@ def run(self, geom, task='optimize', forcefield='gfn2', charge=None, # Configure self.configure(task=task, forcefield=forcefield, charge=charge, ewin=ewin, ion=ion, optlevel=optlevel, dryrun=dryrun, processes=processes, - solvation=solvation, ignore_topology=ignore_topology) + solvation=solvation, ignore_topology=ignore_topology, cycles=cycles) # Run QM simulation self.submit() diff --git a/isicle/parse.py b/isicle/parse.py index 8841032..9a69055 100644 --- a/isicle/parse.py +++ b/isicle/parse.py @@ -739,9 +739,12 @@ def parse(self): # Check that the file is valid first if len(self.contents) == 0: raise RuntimeError('No contents to parse: {}'.format(self.path)) - if "terminated normally" not in self.contents[-1]: - if "ratio" not in self.contents[-2]: - raise RuntimeError('XTB job failed: {}'.format(self.path)) + + last_lines = ''.join(self.contents[-10:]) + if ("terminat" not in last_lines) \ + & ("normal" not in last_lines) \ + & ("ratio" not in last_lines): + raise RuntimeError('XTB job failed: {}'.format(self.path)) self.parse_crest = False self.parse_opt = False @@ -749,11 +752,8 @@ def parse(self): # Initialize result object to store info result = {} + result['protocol'] = self._parse_protocol() - try: - result['protocol'] = self._parse_protocol() - except: - pass try: result['timing'] = self._parse_timing() except: @@ -767,14 +767,12 @@ def parse(self): # Parse geometry from assoc. XYZ file try: if self.path.endswith('xyz'): - - if 'geometry' in to_parse: - try: - self.xyz_path = self.path - result['geom'] = self._parse_xyz() - - except: - pass + try: + self.xyz_path = self.path + result['geom'] = self._parse_xyz() + + except: + pass if self.path.endswith('out') or self.path.endswith('log'): From 77f7231b8e24822cd0046098848799cb96cb3f2a Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Thu, 28 Dec 2023 17:09:46 -0800 Subject: [PATCH 08/14] Add initial ORCA implementation --- isicle/qm.py | 420 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 298 insertions(+), 122 deletions(-) diff --git a/isicle/qm.py b/isicle/qm.py index 0c6febe..bf06e9a 100644 --- a/isicle/qm.py +++ b/isicle/qm.py @@ -1,3 +1,4 @@ +import glob import os import subprocess from itertools import combinations, cycle @@ -10,25 +11,26 @@ from isicle.utils import safelist -def _program_selector(program): +def _backend_selector(program): """ - Selects a supported quantum mechanical program for associated simulation. - Currently only NWChem has been implemented. + Selects a supported quantum mechanical backend for associated simulation. + Currently only NWChem and ORCA have been implemented. Parameters ---------- program : str - Alias for program selection (e.g. NWChem). + Alias for backend selection (e.g. NWChem, ORCA). Returns ------- program - Wrapped functionality of the selected program. Must implement + Wrapped functionality of the selected backend. Must implement :class:`~isicle.interfaces.WrapperInterface`. """ - program_map = {'nwchem': NWChemWrapper} + program_map = {'nwchem': NWChemWrapper, + 'orca': ORCAWrapper} if program.lower() in program_map.keys(): return program_map[program.lower()]() @@ -37,11 +39,7 @@ def _program_selector(program): .format(program)) -def dft(geom, program='NWChem', template=None, tasks='energy', functional='b3lyp', - basis_set='6-31g*', ao_basis='cartesian', charge=0, - atoms=['C', 'H'], bonds=1, temp=298.15, cosmo=False, solvent='H2O', - gas=False, max_iter=150, mem_global=1600, mem_heap=100, - mem_stack=600, scratch_dir=None, processes=12, command='nwchem'): +def dft(geom, backend='NWChem', **kwargs): """ Perform density functional theory calculations according to supplied task list and configuration parameters. @@ -50,47 +48,11 @@ def dft(geom, program='NWChem', template=None, tasks='energy', functional='b3lyp ---------- geom : :obj:`~isicle.geometry.Geometry` Molecule representation. - program : str - Alias for program selection (NWChem). - template : str - Path to optional template to bypass default configuration process. - tasks : str or list of str - List of calculations to perform. One or more of "optimize", "energy", - "frequency", "shielding", "spin". - functional : str or list of str - Functional selection. Supply globally or per task. - basis_set : str or list of str - Basis set selection. Supply globally or per task. - ao_basis : str or list of str - Angular function selection ("spherical", "cartesian"). Supply - globally or per task. - charge : int - Nominal charge of the molecule to be optimized. - atoms : list of str - Atom types of interest. Only used for `spin` and `shielding` tasks. - temp : float - Temperature for frequency calculation. Only used if `frequency` is - a selected task. - cosmo : bool - Indicate whether to include COSMO block. Supply globally or per - task. - solvent : str - Solvent selection. Only used if `cosmo` is True. Supply globally or - per task. - gas : bool - Indicate whether to use gas phase calculations. Only used if - `cosmo` is True. Supply globally or per task. - max_iter : int - Maximum number of optimization iterations. - scratch_dir : str - Path to simulation scratch directory. - mem_global : int - Global memory allocation in MB. - mem_heap : int - Heap memory allocation in MB. - mem_stack : int - Stack memory allocation in MB. - + backend : str + Alias for backend selection (NWChem, ORCA). + kwargs + Keyword arguments passed to selected backend. + Returns ------- :obj:`~isicle.qm.QMWrapper` @@ -99,18 +61,10 @@ def dft(geom, program='NWChem', template=None, tasks='energy', functional='b3lyp """ # Select program - return _program_selector(program).run(geom, template=template, tasks=tasks, - functional=functional, basis_set=basis_set, - ao_basis=ao_basis, charge=charge, - atoms=atoms, bonds=bonds, temp=temp, - cosmo=cosmo, solvent=solvent, gas=gas, - max_iter=max_iter, mem_global=mem_global, - mem_heap=mem_heap, mem_stack=mem_stack, - scratch_dir=scratch_dir, processes=processes, - command=command) - - -class NWChemWrapper(XYZGeometry, WrapperInterface): + return _backend_selector(backend).run(geom, **kwargs) + + +class NWChemWrapper(WrapperInterface): """ Wrapper for NWChem functionality. @@ -125,31 +79,17 @@ class NWChemWrapper(XYZGeometry, WrapperInterface): Indicates order of tasks in `task_map`. geom : :obj:`~isicle.geometry.Geometry` Internal molecule representation. - fmt : str - File extension indicator. config : str Configuration information for simulation. """ - _defaults = ['geom'] - _default_value = None - - def __init__(self, **kwargs): + def __init__(self): """ Initialize :obj:`~isicle.qm.NWChemWrapper` instance. - Parameters - ---------- - kwargs - Keyword arguments to initialize instance attributes. - """ - self.__dict__.update(dict.fromkeys( - self._defaults, self._default_value)) - self.__dict__.update(**kwargs) - self.task_map = {'optimize': self._configure_optimize, 'energy': self._configure_energy, 'frequency': self._configure_frequency, @@ -181,9 +121,9 @@ def set_geometry(self, geom): self.basename = self.geom.basename # Save - self.save_geometry() + self._save_geometry() - def save_geometry(self, fmt='xyz'): + def _save_geometry(self): """ Save internal :obj:`~isicle.geometry.Geometry` representation to file. @@ -195,10 +135,8 @@ def save_geometry(self, fmt='xyz'): """ # Path operations - self.fmt = fmt.lower() geomfile = os.path.join(self.temp_dir, - '{}.{}'.format(self.geom.basename, - self.fmt.lower())) + '{}.xyz'.format(self.geom.basename)) # Save isicle.io.save(geomfile, self.geom) @@ -552,12 +490,12 @@ def _configure_shielding(self, basis_set='6-31G*', ao_basis='cartesian', """ # Extract atom index information - #idx = self.geom.mol.get_atom_indices(atoms=atoms) - #new_idx = [] + # idx = self.geom.mol.get_atom_indices(atoms=atoms) + # new_idx = [] # for i in idx: # new_idx.append(str(int(i)+1)) - #self.idx = new_idx + # self.idx = new_idx # Add basis block s = self._configure_basis(basis_set=basis_set, ao_basis=ao_basis) @@ -691,7 +629,7 @@ def configure(self, tasks='energy', functional='b3lyp', basis_set='6-31g*', ao_basis='cartesian', charge=0, atoms=['C', 'H'], bonds=1, temp=298.15, cosmo=False, solvent='H2O', gas=False, max_iter=150, mem_global=1600, mem_heap=100, - mem_stack=600, scratch_dir=None, processes=12, command='nwchem'): + mem_stack=600, scratch_dir=None, processes=12): """ Configure NWChem simulation. @@ -803,9 +741,6 @@ def configure(self, tasks='energy', functional='b3lyp', # Store tasks as attribute self.tasks = tasks - # Store cluster as attribute - self.command = command - # Store number of processes as attribute self.processes = processes @@ -890,15 +825,10 @@ def submit(self): outfile = os.path.join(self.temp_dir, self.geom.basename + '.out') logfile = os.path.join(self.temp_dir, self.geom.basename + '.log') - if self.command == 'nwchem': - s = 'mpirun -n {} nwchem {} > {}'.format(self.processes, - infile, - outfile) - else: - s = '{} {} {} {}'.format(self.command, - self.processes, - infile, - outfile) + s = 'mpirun -n {} nwchem {} > {} 2> {}'.format(self.processes, + infile, + outfile, + logfile) subprocess.call(s, shell=True) @@ -908,8 +838,8 @@ def finish(self): Returns ------- - :obj:`~isicle.parse.NWChemResult` - Parsed result data. + dict + Dictionary containing simulation results. """ @@ -918,19 +848,12 @@ def finish(self): # Open output file self.output = parser.load(os.path.join(self.temp_dir, - self.basename + '.out')) - + self.basename + '.out')) + # Parse results result = parser.parse() - # Update this instance attributes - self.__dict__.update(result) - - # Update geom attributes - self.geom.add___dict__( - {k: v for k, v in result.items() if k != 'geom'}) - - return self + return result def run(self, geom, template=None, tasks='energy', functional='b3lyp', basis_set='6-31g*', ao_basis='cartesian', charge=0, @@ -986,8 +909,8 @@ def run(self, geom, template=None, tasks='energy', functional='b3lyp', Returns ------- - :obj:`~isicle.qm.NWChemWrapper` - Wrapper object containing relevant outputs from the simulation. + dict + Dictionary containing simulation results. """ @@ -1012,19 +935,272 @@ def run(self, geom, template=None, tasks='energy', functional='b3lyp', self.submit() # Finish/clean up - self.finish() + result = self.finish() + + return result + + +class ORCAWrapper(WrapperInterface): + """ + Wrapper for ORCA functionality. + + Implements :class:`~isicle.interfaces.WrapperInterface` to ensure + required methods are exposed. + + Attributes + ---------- + temp_dir : str + Path to temporary directory used for simulation. + geom : :obj:`~isicle.geometry.Geometry` + Internal molecule representation. + fmt : str + File extension indicator. + config : str + Configuration information for simulation. + result : dict + Dictionary containing simulation results. + + """ + + def __init__(self): + """ + Initialize :obj:`~isicle.qm.ORCAWrapper` instance. + + """ + + # Set up temporary directory + self.temp_dir = isicle.utils.mkdtemp() + + def set_geometry(self, geom): + """ + Set :obj:`~isicle.geometry.Geometry` instance for simulation. + + Parameters + ---------- + geom : :obj:`~isicle.geometry.Geometry` + Molecule representation. + + """ + + # Assign geometry + self.geom = geom.__copy__() + self.basename = self.geom.basename + + # Save + self._save_geometry() + + def _save_geometry(self): + """ + Save internal :obj:`~isicle.geometry.Geometry` representation to file in + XYZ format. + + """ + + # Path to output + geomfile = os.path.join(self.temp_dir, + '{}.xyz'.format(self.geom.basename)) + + # Save + isicle.save(geomfile, self.geom) + + # Store path + self.geom.path = geomfile + + def configure(self, simple_input=[], block_input={}, processes=1, **kwargs): + """ + Configure ORCA simulation. + + Parameters + ---------- + simple_input : list + List of simple input keywords. See `this `__ + section of the ORCA docs. + block_input : dict + Dictionary defining configuration "blocks". Use names of blocks as keys, + lists of each block's content as values. To configure a line of block content + directly, include as a complete string. Include key:value pairs as tuples. + See `this `__ + section of the ORCA docs. + processes : int + Number of parallel processes. + kwargs + Additional keyword arguments fed as key:value pairs. + + Returns + ------- + str + ORCA configuration text. - return self + """ + + # Safely cast to list + simple_input = isicle.utils.safelist(simple_input) + + # Expand simple inputs + config = '! ' + ' '.join(simple_input) + '\n' + + # Add processes + if processes > 1: + config += '%PAL NPROCS {} END\n'.format(processes) + + # Add geometry context + config += '* xyzfile 0 1 {}\n'.format(self.geom.path) + + # Expand keyword args + for k, v in kwargs.items(): + config += '%{} {}\n'.format(k, v) + + # Expand block inputs + for block, params in block_input.items(): + # Safely cast to list + params = isicle.utils.safelist(params) + + # Block header + block_text = '%{}\n'.format(block) + + # Block configuration + for param in params: + if type(param) is str: + block_text += param + '\n' + elif type(param) is tuple: + block_text += ' '.join(map(str, param)) + '\n' + else: + raise TypeError + + # End block + block_text += 'end\n' + + # Append block to config + config += block_text + + # Assign to attribute + self.config = config + + # Save configuration + self._save_config() + + return self.config + + def _save_config(self): + """ + Write generated ORCA configuration to file. + + """ + + # Write to file + with open(os.path.join(self.temp_dir, + self.geom.basename + '.inp'), 'w') as f: + f.write(self.config) + + def submit(self): + """ + Submit the ORCA simulation according to configured inputs. + + """ + + infile = os.path.join(self.temp_dir, self.geom.basename + '.inp') + outfile = os.path.join(self.temp_dir, self.geom.basename + '.out') + logfile = os.path.join(self.temp_dir, self.geom.basename + '.log') + + s = '`which orca` {} > {} 2> {}'.format(infile, + outfile, + logfile) + + subprocess.call(s, shell=True) + + def finish(self): + """ + Collect ORCA simulation results. + + Returns + ------- + dict + Dictionary containing relevant outputs from the simulation. + + """ + + # Get list of outputs + outfiles = glob.glob(os.path.join(self.temp_dir, '*')) + + # Filter out temp files + outfiles = [x for x in outfiles if not x.endswith('.tmp')] + + # Result container + result = {} + + # Enumerate output files + for outfile in outfiles: + # Split name and extension + basename, ext = os.path.basename(outfile).rsplit('.', 1) + + # Grab suitable variable names + if any(basename.endswith(x) for x in ['_property', '_trj']): + var_name = basename.split('_')[-1] + else: + var_name = ext + + # Read output content + with open(outfile, 'rb') as f: + contents = f.read() + + # Attempt utf-8 decode + try: + result[var_name] = contents.decode('utf-8') + except UnicodeDecodeError: + result[var_name] = contents - def get_structure(self): + # Assign to attribute + self.result = result + return self.result + + def run(self, geom, **kwargs): """ - Extract structure from containing object. + Optimize geometry via density functional theory using supplied functional + and basis set. + + Parameters + ---------- + geom : :obj:`~isicle.geometry.Geometry` + Molecule representation. + template : str + Path to optional template to bypass default configuration process. + **kwargs + Keyword arguments to configure the simulation. + See :meth:`~isicle.qm.ORCAWrapper.configure`. Returns ------- - :obj:`~isicle.geometry.XYZGeometry` - Structure instance. - + dict + Dictionary containing relevant outputs from the simulation. + """ - return self.geom + # Set geometry + self.set_geometry(geom) + + # Configure + self.configure(**kwargs) + + # Run QM simulation + self.submit() + + # Parse outputs + self.finish() + + return self.result + + def save(self, path): + """ + Save result as pickle file. + + Parameters + ---------- + path : str + Path to output file. + + """ + + if hasattr(self, 'result'): + isicle.io.save_pickle(path, self.result) + else: + raise AttributeError("Object must have `result` attribute") From 7b5abc62881dee32f1eac7b08eb471ecd948d0a2 Mon Sep 17 00:00:00 2001 From: Jessica Bade Date: Fri, 26 Jan 2024 10:38:10 -0800 Subject: [PATCH 09/14] Version change (#19) * Modify python and rdkit osx conda versions * Modify python requirements for isicle * Modify python and rdkit linux conda versions * Update rdkit version for bug fix * Rollback python versioning to master, 3.9 * Comment out rdkit in reqs * Swap rdkit-dev for rdkit * Uptick actions steps checkout to v4 * Uptick actions miniconda to v3 * Remove miniforge version (test) * Specific miniforge version (test) * Rollback rdkit-dev spec to rdkit * Update rdkit version in conda recipe --- .github/workflows/actions.yml | 20 ++++++++++---------- conda-recipe/meta.yaml | 2 +- envs/linux.yml | 2 +- envs/osx.yml | 2 +- requirements.txt | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index b59fae8..e4ff9bd 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -6,7 +6,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Apply formatting uses: github/super-linter@v4 @@ -29,13 +29,13 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup environment - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge - miniforge-version: latest + miniforge-version: 4.9.2-4 use-mamba: true environment-file: envs/linux.yml use-only-tar-bz2: true @@ -59,13 +59,13 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup environment - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge - miniforge-version: latest + miniforge-version: 4.9.2-4 use-mamba: true environment-file: envs/osx.yml use-only-tar-bz2: true @@ -91,13 +91,13 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup environment - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge - miniforge-version: latest + miniforge-version: 4.9.2-4 use-mamba: true python-version: 3.9 channels: conda-forge,bioconda diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index 49e3e27..ee75ce6 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -33,7 +33,7 @@ requirements: - openmpi - pandas >=1.1.4 - python =3.9 - - rdkit <=2022.09.1 + - rdkit >=2023.09.1 - snakemake >=6.3.0 - statsmodels >=0.11.1 - xtb >=6.5.1 diff --git a/envs/linux.yml b/envs/linux.yml index b1e508b..d981b2f 100644 --- a/envs/linux.yml +++ b/envs/linux.yml @@ -14,7 +14,7 @@ dependencies: - openmpi - pandas >=1.1.4 - python =3.9 - - rdkit <=2022.09.1 + - rdkit >=2023.09.1 - snakemake >=6.3.0 - statsmodels >=0.11.1 - xtb >=6.5.1 diff --git a/envs/osx.yml b/envs/osx.yml index a388eb1..38098da 100644 --- a/envs/osx.yml +++ b/envs/osx.yml @@ -12,7 +12,7 @@ dependencies: - openmpi - pandas >=1.1.4 - python =3.9 - - rdkit <=2022.09.1 + - rdkit >=2023.09.1 - snakemake >=6.3.0 - statsmodels >=0.11.1 - xtb >=6.5.1 diff --git a/requirements.txt b/requirements.txt index aecc600..7a559a9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,6 @@ joblib numpy >= 1.19.4 # openbabel pandas >= 1.1.4 -# rdkit <= 2022.09.1 +# rdkit >= 2023.09.1 snakemake >= 6.3.0 statsmodels >= 0.11.1 From 97edbf318f57545286c6457c625f47f2430d87bd Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Tue, 20 Feb 2024 09:45:17 -0800 Subject: [PATCH 10/14] Add orca parser --- isicle/parse.py | 351 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 335 insertions(+), 16 deletions(-) diff --git a/isicle/parse.py b/isicle/parse.py index dd2124d..834dc4e 100644 --- a/isicle/parse.py +++ b/isicle/parse.py @@ -1,12 +1,329 @@ -from isicle.interfaces import FileParserInterface -import pandas as pd -from os.path import splitext import glob import os import pickle +from os.path import splitext + import numpy as np +import pandas as pd from openbabel import pybel + import isicle +from isicle.interfaces import FileParserInterface + + +class ORCAParser(FileParserInterface): + """Extract information from an ORCA simulation output files.""" + + def __init__(self, data=None): + self.data = data + + self.result = {} + + def load(self, path): + self.data = isicle.io.load_pickle(path) + + def _find_output_by_header(self, header): + # Fat regex + pattern = ( + r"(-{2,})\n\s{0,}(" + + re.escape(header) + + ")\s{0,}\n-{2,}\n([\s\S]*?)(?=-{2,}\n[^\n]*\n-{2,}\n|$)" + ) + + # Search ORCA output file + matches = re.findall(pattern, self.data["out"]) + + # Return "body" of each match + return [x[2].strip() for x in matches] + + def _parse_protocol(self): + return self.data["inp"] + + def _parse_geom(self): + return self.data["xyz"] + + def _parse_energy(self): + # Split text + lines = self.data["property"].split("\n") + + # Search for energy values + elines = [x for x in lines if "Total DFT Energy" in x] + + # Energy values not found + if len(elines) == 0: + return None + + # Map float over values + evals = [float(x.split()[-1].strip()) for x in elines] + + # Return last energy value + return evals[-1] + + def _parse_frequency(self): + # Define columns + columns = ["wavenumber", "eps", "intensity", "TX", "TY", "TZ"] + + # Split sections by delimiter + blocks = self.data["hess"].split("$") + + # Search for frequency values + freq_block = [x for x in blocks if x.startswith("ir_spectrum")] + + # Frequency values not found + if len(freq_block) == 0: + return None + + # Grab last frequency block + # Doubtful if more than one, but previous results in list + freq_block = freq_block[-1] + + # Split block into lines + lines = freq_block.split("\n") + + # Map float over values + vals = np.array( + [ + list(map(float, x.split())) + for x in lines + if len(x.split()) == len(columns) + ] + ) + + # Zip columns and values + return dict(zip(columns, vals.T)) + + def _parse_timing(self): + # Grab only last few lines + lines = self.data["out"].split("\n")[-100:] + + # Find start of timing section + parts = [] + start_idx = None + for i, line in enumerate(lines): + if line.startswith("Timings for individual modules"): + start_idx = i + 2 + + # Strip out extraneous info + parts.append( + [x.strip() for x in line.split(" ") if x and x.strip() != "..."] + ) + + # Timing not found + if start_idx is None: + return None + + # Split out timing section + tlines = lines[start_idx:] + tparts = parts[start_idx:] + + # Individual timings + timings = [x for x in tparts if any([" sec " in y for y in x])] + timings = {x[0].strip("..."): float(x[1].split()[0]) for x in timings} + + # Boolean indication of success + success = len([x for x in tlines if "ORCA TERMINATED NORMALLY" in x]) > 0 + timings["success"] = success + + # Total time + total_time = [x for x in tlines if "TOTAL RUN TIME" in x] + + if len(total_time) > 0: + total_time = total_time[-1].split(":")[-1].strip() + times = list(map(int, total_time.split()[::2])) + units = total_time.split()[1::2] + else: + total_time = None + + timings["Total run time"] = dict(zip(units, times)) + + return timings + + def _parse_shielding(self): + # Filter comments + property = [ + x.strip() + for x in self.data["property"].split("\n") + if not x.startswith("#") + ] + property = "\n".join(property) + + # Split sections by delimiter + blocks = property.split("$ ") + + # Search for shielding values + shielding_block = [x for x in blocks if x.startswith("EPRNMR_OrbitalShielding")] + + # Shielding values not found + if len(shielding_block) == 0: + return None + + # Grab last shielding block + # Doubtful if more than one, but previous results in list + shielding_block = shielding_block[-1] + + # Define a pattern for extracting relevant information + pattern = re.compile( + r"Nucleus: (\d+) (\w+)\n(Shielding tensor.*?P\(iso\) \s*[-+]?\d*\.\d+)", + re.DOTALL, + ) + + # Match against pattern + matches = pattern.findall(shielding_block) + + # Result container + shielding = {} + + # Enumerate matches + for match in matches: + # Per-nucleus info + nucleus_index = match[0] + nucleus_name = match[1] + nucleus_data = match[2] + + # Extracting values using regex + tensors = re.findall(r"(-?\d+\.\d+|-?\d+.\d+e[+-]\d+)", nucleus_data) + tensors = [float(val) for val in tensors] + + # Creating arrays from extracted values + shielding_tensor = np.array(tensors[:9]).reshape(3, 3) + p_tensor_eigenvectors = np.array(tensors[9:18]).reshape(3, 3) + p_eigenvalues = np.array(tensors[18:21]) + p_iso = float(tensors[21]) + + # Constructing the dictionary with nuclei index and name + shielding[f"{nucleus_index}{nucleus_name}"] = { + "shielding tensor": shielding_tensor, + "P tensor eigenvectors": p_tensor_eigenvectors, + "P eigenvalues": p_eigenvalues, + "P(iso)": p_iso, + } + + # Add shielding summary + shielding["shielding_summary"] = self._parse_shielding_summary() + + return shielding + + def _parse_orbital_energies(self): + header = "ORBITAL ENERGIES" + text = self._find_output_by_header(header) + + # Orbital energies not found + if len(text) == 0: + return None + + # Get last relevant output + text = text[-1].split("\n") + + # Parse table + text = [x.strip() for x in text if x.strip() != "" and "*" not in x] + columns = text[0].split() + body = [x.split() for x in text[1:]] + + # Construct data frame + df = pd.DataFrame(body, columns=columns, dtype=float) + + # Map correct types + df["NO"] = df["NO"].astype(int) + + # Drop unoccupied orbitals? + return df + + def _parse_spin(self): + header = "SUMMARY OF ISOTROPIC COUPLING CONSTANTS (Hz)" + text = self._find_output_by_header(header) + + # Spin couplings not found + if len(text) == 0: + return None + + # Get last relevant output + text = text[-1].split("\n") + + # Parse table + text = [x.strip() for x in text if x.strip() != "" and "*" not in x] + columns = [x.replace(" ", "") for x in re.split("\s{2,}", text[0])] + body = [re.split("\s{2,}", x)[1:] for x in text[1:-1]] + + # Construct data frame + return pd.DataFrame(body, dtype=float, columns=columns, index=columns) + + def _parse_shielding_summary(self): + header = "CHEMICAL SHIELDING SUMMARY (ppm)" + text = self._find_output_by_header(header) + + # Shielding values not found + if len(text) == 0: + return None + + # Get last relevant output + text = text[-1].split("\n") + + # Parse table + text = [x.strip() for x in text if x.strip() != ""] + + # Find stop index + stop_idx = -1 + for i, row in enumerate(text): + if all([x == "-" for x in row]): + stop_idx = i + break + + # Split columns and body + columns = text[0].split() + body = [x.split() for x in text[2:stop_idx]] + + # Construct data frame + df = pd.DataFrame(body, columns=columns) + + # Map correct types + for col, dtype in zip(df.columns, (int, str, float, float)): + df[col] = df[col].astype(dtype) + return df + + def _parse_thermo(self): + # In hessian file + header = "THERMOCHEMISTRY_Energies" + + def _parse_molden(self): + return None + + def _parse_charge(self): + return None + + def _parse_connectivity(self): + return None + + def parse(self): + result = { + "protocol": self._parse_protocol(), + "geom": self._parse_geom(), + "total_dft_energy": self._parse_energy(), + "orbital_energies": self._parse_orbital_energies(), + "shielding": self._parse_shielding(), + "spin": self._parse_spin(), + "frequency": self._parse_frequency(), + "molden": self._parse_molden(), + "charge": self._parse_charge(), + "timing": self._parse_timing(), + "connectivity": self._parse_connectivity(), + } + + # Pop success from timing + if result["timing"] is not None: + result["success"] = result["timing"].pop("success") + else: + result["success"] = False + + # Filter empty fields + result = {k: v for k, v in result.items() if v is not None} + + # Store attribute + self.result = result + + return result + + def save(self, path): + isicle.io.save_pickle(path, self.result) class NWChemParser(FileParserInterface): @@ -365,7 +682,7 @@ def parse(self): Parameters ---------- to_parse : list of str - geometry, energy, shielding, spin, frequency, molden, charge, timing + geometry, energy, shielding, spin, frequency, molden, charge, timing """ @@ -760,13 +1077,15 @@ def parse(self): # Check that the file is valid first if len(self.contents) == 0: - raise RuntimeError('No contents to parse: {}'.format(self.path)) - - last_lines = ''.join(self.contents[-10:]) - if ("terminat" not in last_lines) \ - & ("normal" not in last_lines) \ - & ("ratio" not in last_lines): - raise RuntimeError('XTB job failed: {}'.format(self.path)) + raise RuntimeError("No contents to parse: {}".format(self.path)) + + last_lines = "".join(self.contents[-10:]) + if ( + ("terminat" not in last_lines) + & ("normal" not in last_lines) + & ("ratio" not in last_lines) + ): + raise RuntimeError("XTB job failed: {}".format(self.path)) self.parse_crest = False self.parse_opt = False @@ -774,10 +1093,10 @@ def parse(self): # Initialize result object to store info result = {} - result['protocol'] = self._parse_protocol() + result["protocol"] = self._parse_protocol() try: - result['timing'] = self._parse_timing() + result["timing"] = self._parse_timing() except: pass @@ -788,11 +1107,11 @@ def parse(self): # Parse geometry from assoc. XYZ file try: - if self.path.endswith('xyz'): + if self.path.endswith("xyz"): try: self.xyz_path = self.path - result['geom'] = self._parse_xyz() - + result["geom"] = self._parse_xyz() + except: pass From 2de83dad687e6195977c54fcf584e48206a58547 Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Tue, 20 Feb 2024 13:21:45 -0800 Subject: [PATCH 11/14] Add regex library import --- isicle/parse.py | 1 + 1 file changed, 1 insertion(+) diff --git a/isicle/parse.py b/isicle/parse.py index 834dc4e..449e6da 100644 --- a/isicle/parse.py +++ b/isicle/parse.py @@ -1,6 +1,7 @@ import glob import os import pickle +import re from os.path import splitext import numpy as np From 92419df43b6871424d52ad6eeb04bf07e330082a Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Wed, 21 Feb 2024 09:41:25 -0800 Subject: [PATCH 12/14] Clean up imports, backend versus program usage --- isicle/qm.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/isicle/qm.py b/isicle/qm.py index bf06e9a..aef2526 100644 --- a/isicle/qm.py +++ b/isicle/qm.py @@ -1,42 +1,41 @@ import glob import os import subprocess -from itertools import combinations, cycle +from itertools import cycle from string import Template import isicle -from isicle.geometry import XYZGeometry from isicle.interfaces import WrapperInterface from isicle.parse import NWChemParser from isicle.utils import safelist -def _backend_selector(program): +def _backend_selector(backend): """ Selects a supported quantum mechanical backend for associated simulation. Currently only NWChem and ORCA have been implemented. Parameters ---------- - program : str + backend : str Alias for backend selection (e.g. NWChem, ORCA). Returns ------- - program + backend Wrapped functionality of the selected backend. Must implement :class:`~isicle.interfaces.WrapperInterface`. """ - program_map = {'nwchem': NWChemWrapper, + backend_map = {'nwchem': NWChemWrapper, 'orca': ORCAWrapper} - if program.lower() in program_map.keys(): - return program_map[program.lower()]() + if backend.lower() in backend_map.keys(): + return backend_map[backend.lower()]() else: - raise ValueError(('{} not a supported quantum mechanical program.') - .format(program)) + raise ValueError(('{} not a supported quantum mechanical backend.') + .format(backend)) def dft(geom, backend='NWChem', **kwargs): @@ -60,7 +59,7 @@ def dft(geom, backend='NWChem', **kwargs): """ - # Select program + # Select backend return _backend_selector(backend).run(geom, **kwargs) @@ -859,7 +858,7 @@ def run(self, geom, template=None, tasks='energy', functional='b3lyp', basis_set='6-31g*', ao_basis='cartesian', charge=0, atoms=['C', 'H'], bonds=1, temp=298.15, cosmo=False, solvent='H2O', gas=False, max_iter=150, mem_global=1600, mem_heap=100, - mem_stack=600, scratch_dir=None, processes=12, command='nwchem'): + mem_stack=600, scratch_dir=None, processes=12): """ Perform density functional theory calculations according to supplied task list and configuration parameters. @@ -928,8 +927,7 @@ def run(self, geom, template=None, tasks='energy', functional='b3lyp', cosmo=cosmo, solvent=solvent, gas=gas, max_iter=max_iter, mem_global=mem_global, mem_heap=mem_heap, mem_stack=mem_stack, - scratch_dir=scratch_dir, processes=processes, - command=command) + scratch_dir=scratch_dir, processes=processes) # Run QM simulation self.submit() From 0db9a622dbe664bc2f6df701a491895aa9cc77c5 Mon Sep 17 00:00:00 2001 From: jessbade Date: Wed, 21 Feb 2024 10:50:17 -0800 Subject: [PATCH 13/14] Update geometry.dft with backend flags --- isicle/geometry.py | 61 ++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 34 deletions(-) diff --git a/isicle/geometry.py b/isicle/geometry.py index ab96012..b2168fa 100644 --- a/isicle/geometry.py +++ b/isicle/geometry.py @@ -31,8 +31,7 @@ def __init__(self, **kwargs): """ - self.__dict__.update(dict.fromkeys( - self._defaults, self._default_value)) + self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) self.__dict__.update(kwargs) def get_basename(self): @@ -69,13 +68,15 @@ def add___dict__(self, d, override=False): # Add to attributes self.__dict__.update(d) - def dft(self, program='NWChem', **kwargs): + def dft(self, backend="NWChem", **kwargs): """ Perform density functional theory calculations according to supplied task list - and configuration parameters for specified program (currently only NWChem). + and configuration parameters for specified backend. Parameters ---------- + backend : str + Alias for backend selection (NWChem, ORCA). kwargs Keyword arguments supplied to selected program. See `run` method of relevant wrapper for configuration parameters, e.g. :meth:`~isicle.qm.NWChemWrapper.run`. @@ -87,7 +88,7 @@ def dft(self, program='NWChem', **kwargs): """ - return isicle.qm.dft(self, program=program, **kwargs) + return isicle.qm.dft(self, backend=backend, **kwargs) def md(self, program="xtb", **kwargs): """ @@ -240,8 +241,8 @@ def __copy__(self, **kwargs): d = self.__dict__.copy() # Safely copy mol if present - if 'mol' in d: - d['mol'] = self.to_mol() + if "mol" in d: + d["mol"] = self.to_mol() # Build self instance from scratch instance = type(self)(**d) @@ -254,7 +255,7 @@ def __copy__(self, **kwargs): def to_xyzblock(self): """ Get XYZ text for this structure. - + Returns ------- str @@ -275,9 +276,7 @@ def to_mol(self): self.temp_dir = isicle.utils.mkdtemp() - geomfile = os.path.join(self.temp_dir, - '{}.{}'.format(self.basename, - "xyz")) + geomfile = os.path.join(self.temp_dir, "{}.{}".format(self.basename, "xyz")) isicle.io.save(geomfile, self) mols = list(pybel.readfile("xyz", geomfile)) @@ -297,8 +296,7 @@ class Geometry(XYZGeometry, GeometryInterface): _default_value = None def __init__(self, **kwargs): - self.__dict__.update(dict.fromkeys( - self._defaults, self._default_value)) + self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) self.__dict__.update(kwargs) def _is_embedded(self): @@ -317,8 +315,7 @@ def _is_embedded(self): return True except: return False - - + def addHs(self): """ Add implicit hydrogens to molecule. @@ -370,8 +367,7 @@ def _forcefield_selector(forcefield, mw): if Chem.rdForceFieldHelpers.UFFHasAllMoleculeParams(mw) is True: return Chem.AllChem.UFFOptimizeMolecule else: - raise ValueError( - "UFF is not available for all atoms in molecule.") + raise ValueError("UFF is not available for all atoms in molecule.") elif forcefield in ["mmff", "mmff94", "mmff94s"]: if Chem.rdForceFieldHelpers.MMFFHasAllMoleculeParams(mw) is True: return Chem.rdForceFieldHelpers.MMFFOptimizeMolecule @@ -399,10 +395,8 @@ def _forcefield_selector(forcefield, mw): # Optimize according to supplied forcefield if forcefield is not None: - # Check if embedded if self._is_embedded(): - # Forcefield selection if "mmff94s" in forcefield.lower(): _forcefield_selector(forcefield, mol)( @@ -410,10 +404,10 @@ def _forcefield_selector(forcefield, mw): ) else: _forcefield_selector(forcefield, mol)(mol, maxIters=ff_iter) - + # Not embedded else: - raise ValueError('Molecule must have embedded 3D coordinates.') + raise ValueError("Molecule must have embedded 3D coordinates.") # Return copy with updated mol return self.__copy__(mol=mol) @@ -525,8 +519,7 @@ def tautomerize(self, return_all=False): res = [mol] tauts = enumerator.Enumerate(mol) smis = [Chem.MolToSmiles(x) for x in tauts] - s_smis = sorted((x, y) - for x, y in zip(smis, tauts) if x != self.to_smiles()) + s_smis = sorted((x, y) for x, y in zip(smis, tauts) if x != self.to_smiles()) res += [y for x, y in s_smis] # Ensure res is a list of mol objects @@ -557,7 +550,7 @@ def ionize(self, ion_path=None, ion_list=None, method="explicit", **kwargs): Parameters ---------- ion_path : str - Filepath to text file containing ions with charge (eg. `H+`) to be + Filepath to text file containing ions with charge (eg. `H+`) to be considered. Either ion_path or ion_list must be specified. ion_list : list List of strings of adducts to be considered. Must be specifed in @@ -588,7 +581,7 @@ def ionize(self, ion_path=None, ion_list=None, method="explicit", **kwargs): def get_natoms(self): """ Calculate total number of atoms. - + Returns ------- int @@ -650,7 +643,7 @@ def get_total_partial_charge(self): def get_charge(self): """ Get formal charge of the molecule. - + Returns ------- int @@ -668,12 +661,12 @@ def set_charge(self, charge=None): ---------- charge : int Formal charge of molecule. - + """ if charge is None: charge = self.get_charge() - + self.__dict__.update(charge=int(charge)) def __copy__(self, **kwargs): @@ -684,7 +677,7 @@ def __copy__(self, **kwargs): ---------- kwargs Keyword arguments to update copy. - + Returns ------- :obj:`~isicle.geometry.Geometry` @@ -696,12 +689,12 @@ def __copy__(self, **kwargs): d = self.__dict__.copy() # Safely copy mol if present - if 'mol' in d: - d['mol'] = self.to_mol() + if "mol" in d: + d["mol"] = self.to_mol() # Build self instance from scratch instance = type(self)(**d) - + # Update from kwargs instance.__dict__.update(kwargs) @@ -775,7 +768,7 @@ def to_xyzblock(self): def to_pdbblock(self): """ Get PDB text for this structure. - + Returns ------- str @@ -788,7 +781,7 @@ def to_pdbblock(self): def to_molblock(self): """ Get :obj:`~rdkit.Chem.rdchem.Mol` text for this structure. - + Returns ------- str From 01dc2541fd8460e7dd9af7ac4df96037f101b96c Mon Sep 17 00:00:00 2001 From: jessbade Date: Wed, 21 Feb 2024 10:51:55 -0800 Subject: [PATCH 14/14] Update md wrapper docstring --- isicle/md.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/isicle/md.py b/isicle/md.py index ab865d7..0378d52 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -30,7 +30,7 @@ def _program_selector(program): ------- program Wrapped functionality of the selected program. Must implement - :class:`~isicle.interfaces.MDWrapperInterface`. + :class:`~isicle.interfaces.WrapperInterface`. """