From 7bf1618f269a93dd36c5d5b74a0d7c11d6cafe51 Mon Sep 17 00:00:00 2001 From: Olga Tsiouri Date: Mon, 22 Nov 2021 22:34:47 +0200 Subject: [PATCH] fix index count & add subset_pdb_to_fasta,_gui.py and executable --- README.md | 4 +- fasta_manipulation/trim_multifasta_gui.py | 39 ++++++++----- fasta_manipulation/trim_singlefastas_gui.py | 23 ++++++-- pdb_corner/subset_pdb_to_fasta_gui.py | 62 +++++++++++++++++++++ 4 files changed, 108 insertions(+), 20 deletions(-) create mode 100644 pdb_corner/subset_pdb_to_fasta_gui.py diff --git a/README.md b/README.md index 4266864..efd4887 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# bioinfo_gui_scripts [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5703709.svg)](https://doi.org/10.5281/zenodo.5703709) +# bioinfo_gui_scripts python scripts that can be easily transformed to gui programs for wet lab scientists to use(see the wiki page for documentation and depedences) ## GUI stadalone programs(.exe) 1. DSSP statistics GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4838997.svg)](https://doi.org/10.5281/zenodo.4838997) @@ -19,3 +19,5 @@ python scripts that can be easily transformed to gui programs for wet lab scient 16. tabular file to single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5652249.svg)](https://doi.org/10.5281/zenodo.5652249) 17. fasta formatter GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5703665.svg)](https://doi.org/10.5281/zenodo.5703665) 18. chain pdb to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5706468.svg)](https://doi.org/10.5281/zenodo.5706468) +19. subset pdb to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5718967.svg)](https://doi.org/10.5281/zenodo.5718967) + diff --git a/fasta_manipulation/trim_multifasta_gui.py b/fasta_manipulation/trim_multifasta_gui.py index d9cf286..8330425 100644 --- a/fasta_manipulation/trim_multifasta_gui.py +++ b/fasta_manipulation/trim_multifasta_gui.py @@ -2,21 +2,34 @@ from gooey import * from Bio import SeqIO # input parameters -@Gooey(required_cols=5, program_name='trim multifasta', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC') +@Gooey(required_cols=3, program_name='trim multifasta', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC') def main(): - ap = GooeyParser() - ap.add_argument("-in", "--input", required=True, widget='FileChooser', help="input fasta file") - ap.add_argument("-start", "--start", required=True, type=int, help="region to start writing the fasta file(starts from 0)") - ap.add_argument("-stop", "--stop", required=True, type=int, help="region to stop writing the fasta file(it can also be a negative number to remove nucleotides from the end of the sequence)") - ap.add_argument("-out", "--output", required=True, widget='FileSaver', help="output fasta file") - args = vars(ap.parse_args()) -# main - sequences = [] # setup an empty list - for record in SeqIO.parse(args['input'], "fasta"): - # add this record to the list - sequences.append(record[args['start']:args['stop']]) + ap = GooeyParser() + ap.add_argument("-in", "--input", required=True, widget='FileChooser', help="input fasta file") + ap.add_argument("-start", "--start", required=False, default=1, type=int, help="region to start writing the fasta file") + ap.add_argument("-stop", "--stop", required=True, type=int, help="region to stop writing the fasta file(it can be both a positive and a negative number)") + ap.add_argument("-out", "--output", required=True, widget='FileSaver', help="output fasta file") + args = vars(ap.parse_args()) + # main + sequences = [] # setup an empty list + # fix the index for start parameter + if args['start'] > 0: + seq_start = args['start'] -1 + else: + print("-start parameter must be a positive integer") + exit(1) + # fix the index for end parameter + if args['stop'] > 0: + seq_end = args['stop'] -1 + else: + seq_end = args['stop'] + # iterate for each record + for record in SeqIO.parse(args['input'], "fasta"): + # add this record to the list + sequences.append(record[seq_start:seq_end]) - SeqIO.write(sequences, args['output'], "fasta") + # export to fasta + SeqIO.write(sequences, args['output'], "fasta") if __name__ == '__main__': main() diff --git a/fasta_manipulation/trim_singlefastas_gui.py b/fasta_manipulation/trim_singlefastas_gui.py index df66cd1..c3f3b40 100644 --- a/fasta_manipulation/trim_singlefastas_gui.py +++ b/fasta_manipulation/trim_singlefastas_gui.py @@ -3,20 +3,31 @@ from gooey import * from Bio import SeqIO # input parameters -@Gooey(required_cols=3, program_name='trim multiple single-fasta files', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC') +@Gooey(required_cols=2, program_name='trim multiple single-fasta files', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC') def main(): ap = GooeyParser() - ap.add_argument("-start", "--start_fasta", required=True, type=int, help="region to start writing the fasta file(min number 0)") - ap.add_argument("-stop", "--stop_fasta", required=True, type=int, help="region to stop writing the fasta file(negative number to remove nucleotides from the end of the sequence") + ap.add_argument("-start", "--start_fasta", required=False, default=1, type=int, help="region to start writing the fasta file") + ap.add_argument("-stop", "--stop", required=True, type=int, help="region to stop writing the fasta file(it can be both a positive and a negative number)") ap.add_argument("-dir", "--directory", required=True, type=str, widget='DirChooser', help="directory to search for fasta files") args = vars(ap.parse_args()) - # main - # import each fasta file from a working directory of choice +# main +# fix the index for start parameter + if args['start'] > 0: + seq_start = args['start'] -1 + else: + print("-start parameter must be a positive integer") + exit(1) +# fix the index for end parameter + if args['stop'] > 0: + seq_end = args['stop'] -1 + else: + seq_end = args['stop'] +# import each fasta file from a working directory of choice for filename in sorted(os.listdir(os.chdir(args['directory']))): if filename.endswith(".fa") or filename.endswith(".fasta"): # read each file, trim and create SeqRecord to export record = SeqIO.read(filename, "fasta") - sequence = record[args['start_fasta']:args['stop_fasta']] + sequence = record[seq_start:seq_end] # export to fasta SeqIO.write(sequence, "".join([filename.split(".")[0],"_","trimmed",".fasta"]), "fasta") diff --git a/pdb_corner/subset_pdb_to_fasta_gui.py b/pdb_corner/subset_pdb_to_fasta_gui.py new file mode 100644 index 0000000..dac49a8 --- /dev/null +++ b/pdb_corner/subset_pdb_to_fasta_gui.py @@ -0,0 +1,62 @@ +# python3 +import os +from gooey import * +from Bio.PDB import * +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord +# input parameters +@Gooey(required_cols=2, program_name='subset pdb to fasta', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC') +def main(): + ap = GooeyParser() + ap.add_argument("-pdb", "--pdb", required=True, widget='FileChooser', help="input pdb file") + ap.add_argument("-model", "--model",required=False, default=0, help="model from pdb file to select(integer). Default is 0(1 model only)") + ap.add_argument("-chain", "--chain", required=True, help="chain from pdb file to select") + ap.add_argument("-start", "--start", required=False, default=1, type=int, help="amino acid in chain to start writing the fasta file") + ap.add_argument("-end", "--end", required=False, type=int, help="amino acid in chain to end writing the fasta file") + ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and end location 2) the end location with be that of the latest amino acid in the chain. Default is 1") + args = vars(ap.parse_args()) +# main +# select chain + parser = PDBParser() + s = parser.get_structure("name", args['pdb']) + fill = s[int(args['model'])][args['chain']] +# retrieve the pdb id of the input file + filename = os.path.split(args['pdb'])[1] + pdb_id = filename.split(".")[0] +# retrieve chain amino acids + ppb = PPBuilder() + for pp in ppb.build_peptides(fill): + aa_chain = str(pp.get_sequence()) +# choose program + if args['program'] == 1: + # fix the index for start parameter + if args['start'] > 0: + aa_start = args['start'] -1 + else: + print("-start parameter must be a positive integer") + exit(1) + # fix the index for end parameter + if args['end'] > 0: + aa_end = args['end'] -1 + else: + aa_end = args['end'] + else: + + # fix the index for start parameter + if args['start'] > 0: + aa_start = args['start'] -1 + else: + print("-start parameter must be a positive integer") + exit(1) + # fix the index for end parameter + args['end'] = len(aa_chain) -1 + aa_end = args['end'] + # subset based on aa in chain + sub_seq = aa_chain[aa_start:aa_end] + # export to fasta + record = SeqRecord(Seq(sub_seq),id="".join([str(pdb_id),"_",str(args['chain']),"_",str(args['start']),"_",str(args['end'])]),description="") + SeqIO.write(record, "".join([str(pdb_id),"_",str(args['chain']),"_",str(args['start']),"_",str(args['end']),".fasta"]), "fasta") + +if __name__ == '__main__': + main() \ No newline at end of file