diff --git a/src/MolecularDockingKit/combine.py b/src/MolecularDockingKit/combine.py index d5e589d..372d8f2 100644 --- a/src/MolecularDockingKit/combine.py +++ b/src/MolecularDockingKit/combine.py @@ -1,50 +1,62 @@ -import os, glob, sys -from rdkit import Chem -from multiprocessing import Pool +import os +import sys from rdkit import Chem from rdkit.Chem import AllChem import numpy as np -########################################################################### -# # -# This program takes in one argument for the file index for the file # -# inside the directory called moleculeLists.txt named something like # -# file000, file001 etc. that contains the names of five drugs. The energy # -# calculations for these drugs is submitted together. This way, I # -# parallelize the calculations with brute force separate job submitions. # -# # -########################################################################### - -molList = open('moleculeLists/fiveSplits/file' + sys.argv[1] + '.txt', 'r').readlines() -molList = [mol.strip() for mol in molList] -enzymeFile = '../../3sxr_dasatinib_removed.pdb' - -os.chdir('rDock_inputs/') -for ligandName in molList: - os.chdir(ligandName) - suppl = Chem.SDMolSupplier(ligandName + '_docking_out_sorted.sd') - mols = [x for x in suppl] - m1 = mols[-1] - m2 = Chem.rdmolfiles.MolFromPDBFile(enzymeFile) +def calculate_energy(file_index: str) -> None: + """ + Perform energy calculations for a list of drugs specified in the file indexed by `file_index`. + + Args: + file_index (str): Index of the file containing the list of drugs. + + Returns: + None + """ + mol_list = open(f'moleculeLists/fiveSplits/file{file_index}.txt', 'r').readlines() + mol_list = [mol.strip() for mol in mol_list] + enzyme_file = '../../3sxr_dasatinib_removed.pdb' - #Generating the ligand + protein geometry - combo = Chem.CombineMols(m1, m2) - combo = Chem.AddHs(combo, addCoords=True) - Chem.rdmolfiles.MolToPDBFile(combo, 'combo.pdb') + os.chdir('rDock_inputs/') + for ligand_name in mol_list: + os.chdir(ligand_name) + m1, m2 = load_molecules(ligand_name, enzyme_file) - #Calculating the energy for the (ligand + protein) geometry - #As I say in the other document, xtb did not work and I ended up using rdkit - #for this step - #os.system('xtb --gfnff combo.xyz > combo.log') - res = AllChem.MMFFOptimizeMoleculeConfs(combo, maxIters=0, numThreads=0) + # Generate the ligand + protein geometry + combo = Chem.CombineMols(m1, m2) + combo = Chem.AddHs(combo, addCoords=True) + Chem.rdmolfiles.MolToPDBFile(combo, 'combo.pdb') - #Calculating the energies for the ligand only geometry - m1 = Chem.AddHs(m1, addCoords=True) - res1 = AllChem.MMFFOptimizeMoleculeConfs(m1, maxIters=0, numThreads=0) + # Calculate the energy for the (ligand + protein) geometry + res = AllChem.MMFFOptimizeMoleculeConfs(combo, maxIters=0, numThreads=0) - #Saving the energies for the (ligand + protein) and the ligand only geometry in a file - np.savetxt('energies.txt', [res[0][1], res1[0][1]]) + # Calculate the energies for the ligand only geometry + res1 = AllChem.MMFFOptimizeMoleculeConfs(m1, maxIters=0, numThreads=0) + + # Save the energies for the (ligand + protein) and the ligand only geometry in a file + np.savetxt('energies.txt', [res[0][1], res1[0][1]]) + os.chdir('../../../') os.chdir('../../../') -os.chdir('../../../') +def load_molecules(ligand_name: str, enzyme_file: str) -> tuple[Chem.Mol, Chem.Mol]: + """ + Load ligand and enzyme molecules from SD and PDB files, respectively. + + Args: + ligand_name (str): Name of the ligand molecule. + enzyme_file (str): Path to the enzyme PDB file. + + Returns: + Tuple containing the ligand and enzyme molecules. + """ + suppl = Chem.SDMolSupplier(f'{ligand_name}_docking_out_sorted.sd') + mols = [x for x in suppl] + m1 = mols[-1] + m2 = Chem.rdmolfiles.MolFromPDBFile(enzyme_file) + return m1, m2 +# Example usage +if __name__ == "__main__": + file_index = sys.argv[1] + calculate_energy(file_index)