From 9dedc9c5cc0e7576a9c341382911494cf96dfb92 Mon Sep 17 00:00:00 2001 From: Olga Tsiouri Date: Tue, 21 Dec 2021 13:54:20 +0200 Subject: [PATCH] merge single-fasta and multi-fasta scripts and executables to 1 --- README.md | 25 ++++---- ...fastas_gui.py => tab_trim_to_fasta_gui.py} | 24 ++++--- .../tab_trim_to_multifasta_gui.py | 56 ----------------- fasta_manipulation/trim_fasta_gui.py | 63 +++++++++++++++++++ ...to_tab_gui.py => trim_fasta_to_tab_gui.py} | 29 ++++++--- fasta_manipulation/trim_multifasta_gui.py | 49 --------------- .../trim_multifasta_to_tab_gui.py | 59 ----------------- fasta_manipulation/trim_singlefastas_gui.py | 49 --------------- 8 files changed, 110 insertions(+), 244 deletions(-) rename fasta_manipulation/{tab_trim_to_singlefastas_gui.py => tab_trim_to_fasta_gui.py} (65%) delete mode 100644 fasta_manipulation/tab_trim_to_multifasta_gui.py create mode 100644 fasta_manipulation/trim_fasta_gui.py rename fasta_manipulation/{trim_singlefastas_to_tab_gui.py => trim_fasta_to_tab_gui.py} (62%) delete mode 100644 fasta_manipulation/trim_multifasta_gui.py delete mode 100644 fasta_manipulation/trim_multifasta_to_tab_gui.py delete mode 100644 fasta_manipulation/trim_singlefastas_gui.py diff --git a/README.md b/README.md index 2bd7c56..c8600b4 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# bioinfo_gui_scripts [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5787392.svg)](https://doi.org/10.5281/zenodo.5787392) +# bioinfo_gui_scripts python scripts that can be easily transformed to gui programs for wet lab scientists to use(see the wiki page for documentation and depedences) ## GUI stadalone programs(.exe) 1. DSSP statistics GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4838997.svg)](https://doi.org/10.5281/zenodo.4838997) @@ -11,18 +11,15 @@ python scripts that can be easily transformed to gui programs for wet lab scient 8. amino acids content multifasta calculator GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5275827.svg)](https://doi.org/10.5281/zenodo.5275827) 9. pdbs secondary structure statistics GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5348006.svg)](https://doi.org/10.5281/zenodo.5348006) 10. add adapters on single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5559117.svg)](https://doi.org/10.5281/zenodo.5559117) -11. Trim multi-fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5725555.svg)](https://doi.org/10.5281/zenodo.5725555) -12. Trim single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5725465.svg)](https://doi.org/10.5281/zenodo.5725465) -13. fasta to tab GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5699003.svg)](https://doi.org/10.5281/zenodo.5699003) -14. tab to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5703366.svg)](https://doi.org/10.5281/zenodo.5703366) -15. single-fastas to tabular GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5672075.svg)](https://doi.org/10.5281/zenodo.5672075) -16. tabular file to single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5652249.svg)](https://doi.org/10.5281/zenodo.5652249) -17. fasta formatter GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5786883.svg)](https://doi.org/10.5281/zenodo.5786883) -18. chain pdb to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5706468.svg)](https://doi.org/10.5281/zenodo.5706468) -19. subset pdb to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5725658.svg)](https://doi.org/10.5281/zenodo.5725658) -20. tab trim to single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5760004.svg)](https://doi.org/10.5281/zenodo.5760004) -21. trim single-fastas to tab GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5759929.svg)](https://doi.org/10.5281/zenodo.5759929) -22. trim multi-fasta to tab GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5759811.svg)](https://doi.org/10.5281/zenodo.5759811) -23. tab trim to multi-fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5759141.svg)](https://doi.org/10.5281/zenodo.5759141) +11. fasta to tab GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5699003.svg)](https://doi.org/10.5281/zenodo.5699003) +12. tab to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5703366.svg)](https://doi.org/10.5281/zenodo.5703366) +13. single-fastas to tabular GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5672075.svg)](https://doi.org/10.5281/zenodo.5672075) +14. tabular file to single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5652249.svg)](https://doi.org/10.5281/zenodo.5652249) +15. fasta formatter GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5786883.svg)](https://doi.org/10.5281/zenodo.5786883) +16. chain pdb to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5706468.svg)](https://doi.org/10.5281/zenodo.5706468) +17. subset pdb to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5725658.svg)](https://doi.org/10.5281/zenodo.5725658) +18. trim fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5794123.svg)](https://doi.org/10.5281/zenodo.5794123) +19. trim fasta to tab GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5794534.svg)](https://doi.org/10.5281/zenodo.5794534) +20. tab trim to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5794334.svg)](https://doi.org/10.5281/zenodo.5794334) diff --git a/fasta_manipulation/tab_trim_to_singlefastas_gui.py b/fasta_manipulation/tab_trim_to_fasta_gui.py similarity index 65% rename from fasta_manipulation/tab_trim_to_singlefastas_gui.py rename to fasta_manipulation/tab_trim_to_fasta_gui.py index dd4569e..8c63806 100644 --- a/fasta_manipulation/tab_trim_to_singlefastas_gui.py +++ b/fasta_manipulation/tab_trim_to_fasta_gui.py @@ -8,11 +8,13 @@ # input arguments @Gooey(required_cols=1, program_name='tabular trim to single-fasta files', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC') def main(): - ap = GooeyParser(description="convert each row of a tabular file with the fasta headers and sequences in each row in single-fasta files with trimmed sequences") - ap.add_argument("-in", "--input", required=True, widget="FileChooser" ,help="input txt file") + ap = GooeyParser(description="convert each row of a tabular file with the fasta headers and sequences in each row in single-fasta files or a multi-fasta file, with trimmed sequences") + ap.add_argument("-in", "--input", required=True, widget='FileChooser', help="input txt file") ap.add_argument("-start", "--start", required=False, default=1, type=int, help="region to start writing the fasta file(default 1)") ap.add_argument("-stop", "--stop", required=False, type=int, help="region to stop writing the fasta file(it can be both a positive and a negative number)") ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and stop location 2) the stop location with be that of the sequence length. Default is 1") + ap.add_argument("-type", "--type", required=False,default=1, type=int, help="type of fasta to export 1) 1 multi-fasta file 2) many single-fasta files. Default is 1") + ap.add_argument("-out", "--output", required=False, widget='FileSaver', type=int, help="output multi-fasta file") args = vars(ap.parse_args()) # main # create function to trim fasta records @@ -39,15 +41,23 @@ def fastatrim(fastaseq): seq_end = args['stop'] # subset each fasta record return fastaseq[seq_start:seq_end] - # insert txt file as dataframe df = pd.read_csv(args['input'], header=None, sep="\t") # select ids and sequence columns, convert to lists headers = df.iloc[:,0].values.tolist() sequences = df.iloc[:,1].values.tolist() - # iter elements on pairs to export in single fasta files - for (ids, seq) in zip(headers, sequences): - seq_for_fasta=SeqRecord(Seq(fastatrim(str(seq))),id=str(ids),description="") - SeqIO.write(seq_for_fasta, "".join([str(ids),".fasta"]), "fasta") + # choose fasta type to export + if args['type'] == 1: + # setup empty list + seqs_for_fasta = [] + # iter elements on pairs to export in single fasta files + for (ids, seq) in zip(headers, sequences): + seqs_for_fasta.append(SeqRecord(Seq(fastatrim(str(seq))),id=str(ids),description="")) + SeqIO.write(seqs_for_fasta, args['output'], "fasta") + else: + # iter elements on pairs to export in single fasta files + for (ids, seq) in zip(headers, sequences): + seq_for_fasta=SeqRecord(Seq(fastatrim(str(seq))),id=str(ids),description="") + SeqIO.write(seq_for_fasta, "".join([str(ids),".fasta"]), "fasta") if __name__ == '__main__': main() diff --git a/fasta_manipulation/tab_trim_to_multifasta_gui.py b/fasta_manipulation/tab_trim_to_multifasta_gui.py deleted file mode 100644 index 66dfcb4..0000000 --- a/fasta_manipulation/tab_trim_to_multifasta_gui.py +++ /dev/null @@ -1,56 +0,0 @@ -# python3 -import itertools -from gooey import * -from Bio import SeqIO -from Bio.Seq import Seq -from Bio.SeqRecord import SeqRecord -import pandas as pd -# input arguments -@Gooey(required_cols=2, program_name='tabular trim to multi-fasta', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC') -def main(): - ap = GooeyParser(description="convert each row of a tabular file with the fasta headers and sequences in each row in a multi-fasta file with trimmed sequences") - ap.add_argument("-in", "--input", required=True, widget='FileChooser', help="input txt file") - ap.add_argument("-start", "--start", required=False, default=1, type=int, help="region to start writing the fasta file(default 1)") - ap.add_argument("-stop", "--stop", required=False, type=int, help="region to stop writing the fasta file(it can be both a positive and a negative number)") - ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and stop location 2) the stop location with be that of the sequence length. Default is 1") - ap.add_argument("-out", "--output", required=True, widget='FileSaver' ,help="output multi-fasta file") - args = vars(ap.parse_args()) -# main -# create function to trim fasta records - def fastatrim(fastaseq): - # choose program - if args['program'] == 1: - # fix the index for start parameter - if args['start'] > 0: - seq_start = args['start'] -1 - else: - print("-start parameter must be a positive integer") - exit(1) - # add end parameter - seq_end = args['stop'] - else: - # fix the index for start parameter - if args['start'] > 0: - seq_start = args['start'] -1 - else: - print("-start parameter must be a positive integer") - exit(1) - # add end parameter according to program 2 - args['stop'] = len(fastaseq) - seq_end = args['stop'] - # subset each fasta record - return fastaseq[seq_start:seq_end] - df = pd.read_csv(args['input'], header=None, sep="\t") - # select ids and sequence columns, convert to lists - headers = df.iloc[:,0].values.tolist() - sequences = df.iloc[:,1].values.tolist() - # setup empty list - seqs_for_fasta = [] - # iter elements on pairs to export in single fasta files - for (ids, seq) in zip(headers, sequences): - seqs_for_fasta.append(SeqRecord(Seq(fastatrim(str(seq))),id=str(ids),description="")) - SeqIO.write(seqs_for_fasta, args['output'], "fasta") - -if __name__ == '__main__': - main() - diff --git a/fasta_manipulation/trim_fasta_gui.py b/fasta_manipulation/trim_fasta_gui.py new file mode 100644 index 0000000..9a471be --- /dev/null +++ b/fasta_manipulation/trim_fasta_gui.py @@ -0,0 +1,63 @@ +# python3 +import os +from gooey import * +from Bio import SeqIO +# input parameters +@Gooey(required_cols=0, program_name='trim a multi-fasta file or multiple single-fasta files', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC') +def main(): + ap = GooeyParser() + ap.add_argument("-in", "--input", required=False ,widget='FileChooser', help="input fasta file") + ap.add_argument("-start", "--start", required=False, default=1, type=int, help="region to start writing the fasta file(default 1)") + ap.add_argument("-stop", "--stop", required=False, type=int, help="region to stop writing the fasta file(it can be both a positive and a negative number)") + ap.add_argument("-dir", "--directory", required=False, type=str, widget='DirChooser', help="directory to search for fasta files") + ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and stop location 2) the stop location will be that of the sequence length. Default is 1") + ap.add_argument("-type", "--type", required=False,default=1, type=int, help="type of fasta to import 1) 1 multi-fasta file 2) many single-fasta files. Default is 1") + ap.add_argument("-out", "--output", required=False, widget='FileSaver', help="output fasta file") + args = vars(ap.parse_args()) + # main + # create function to trim fasta records + def fastatrim(fastarec,fastaseq): + # choose program + if args['program'] == 1: + # fix the index for start parameter + if args['start'] > 0: + seq_start = args['start'] -1 + else: + print("-start parameter must be a positive integer") + exit(1) + # add end parameter + seq_end = args['stop'] + else: + # fix the index for start parameter + if args['start'] > 0: + seq_start = args['start'] -1 + else: + print("-start parameter must be a positive integer") + exit(1) + # add end parameter according to program 2 + args['stop'] = len(fastaseq) + seq_end = args['stop'] + # subset each fasta record + return fastarec[seq_start:seq_end] + # choose fasta type to import + if args['type'] == 1: + # setup an empty list + sequences = [] + # iterate for each record + for record in SeqIO.parse(args['input'], "fasta"): + # add this record to the list + sequences.append(fastatrim(record,record.seq)) + # export to fasta + SeqIO.write(sequences, args['output'], "fasta") + else: + # import each fasta file from the working directory + for filename in sorted(os.listdir(os.chdir(args['directory']))): + if filename.endswith(".fa") or filename.endswith(".fasta"): + # read each file, trim and create SeqRecord to export + record = SeqIO.read(filename, "fasta") + sequence = fastatrim(record,record.seq) + # export to fasta + SeqIO.write(sequence, "".join([filename.split(".")[0],"_","trimmed",".fasta"]), "fasta") + +if __name__ == '__main__': + main() diff --git a/fasta_manipulation/trim_singlefastas_to_tab_gui.py b/fasta_manipulation/trim_fasta_to_tab_gui.py similarity index 62% rename from fasta_manipulation/trim_singlefastas_to_tab_gui.py rename to fasta_manipulation/trim_fasta_to_tab_gui.py index 5a10b45..2344a01 100644 --- a/fasta_manipulation/trim_singlefastas_to_tab_gui.py +++ b/fasta_manipulation/trim_fasta_to_tab_gui.py @@ -4,14 +4,16 @@ from Bio import SeqIO import pandas as pd # input parameters -@Gooey(required_cols=2, program_name='single-fastas trimmed to tabular txt file', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC') +@Gooey(required_cols=1, program_name='fasta trimmed to tabular txt file', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC') def main(): - ap = GooeyParser() + ap = GooeyParser() + ap.add_argument("-in","--input", required=False, widget='FileChooser', help="input multi-fasta file") ap.add_argument("-start", "--start", required=False, default=1, type=int, help="region to start writing the fasta file(default 1)") ap.add_argument("-stop", "--stop", required=False, type=int, help="region to stop writing the fasta file(it can be both a positive and a negative number)") + ap.add_argument("-dir", "--directory", required=False, type=str, widget='DirChooser', help="directory to search for fasta files") ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and stop location 2) the stop location with be that of the sequence length. Default is 1") - ap.add_argument("-dir", "--directory", required=True, type=str, widget='DirChooser', help="directory to search for fasta files") - ap.add_argument("-out","--output", required=True, widget='FileSaver',help="output txt file") + ap.add_argument("-type", "--type", required=False,default=1, type=int, help="type of fasta to import 1) 1 multi-fasta file 2) many single-fasta files. Default is 1") + ap.add_argument("-out","--output", required=True, widget='FileSaver', help="output txt file") args = vars(ap.parse_args()) # main # create function to trim fasta records @@ -41,14 +43,21 @@ def fastatrim(fastaseq): # setup empty lists seqs = [] ids = [] - # import each fasta file from the working directory - for filename in sorted(os.listdir(os.chdir(args['directory']))): - if filename.endswith(".fa") or filename.endswith(".fasta"): - # read each file, trim and add to list - record = SeqIO.read(filename, "fasta") + # choose fasta type to import + if args['type'] == 1: + # import multi-fasta file + for record in SeqIO.parse(args['input'], "fasta"): seqs.append(fastatrim(record.seq)) ids.append(record.id) - # put the 2 list in a data frame of 2 columns + else: + # import each fasta file from the working directory + for filename in sorted(os.listdir(os.chdir(args['directory']))): + if filename.endswith(".fa") or filename.endswith(".fasta"): + # read each file, trim and add to list + record = SeqIO.read(filename, "fasta") + seqs.append(fastatrim(record.seq)) + ids.append(record.id) + # put the 2 list in a data frame of 2 columns dfasta = pd.DataFrame() dfasta['id'] = ids dfasta['seq'] = seqs diff --git a/fasta_manipulation/trim_multifasta_gui.py b/fasta_manipulation/trim_multifasta_gui.py deleted file mode 100644 index 7446913..0000000 --- a/fasta_manipulation/trim_multifasta_gui.py +++ /dev/null @@ -1,49 +0,0 @@ -# python3 -from gooey import * -from Bio import SeqIO -# input parameters -@Gooey(required_cols=2, program_name='trim multifasta', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC') -def main(): - ap = GooeyParser() - ap.add_argument("-in", "--input", required=True, widget='FileChooser', help="input fasta file") - ap.add_argument("-start", "--start", required=False, default=1, type=int, help="region to start writing the fasta file") - ap.add_argument("-stop", "--stop", required=False, type=int, help="region to stop writing the fasta file(it can be both a positive and a negative number)") - ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and stop location 2) the stop location with be that of the sequence length. Default is 1") - ap.add_argument("-out", "--output", required=True, widget='FileSaver', help="output fasta file") - args = vars(ap.parse_args()) -# main - sequences = [] # setup an empty list -# create function to trim fasta records - def fastatrim(fastarec,fastaseq): - # choose program - if args['program'] == 1: - # fix the index for start parameter - if args['start'] > 0: - seq_start = args['start'] -1 - else: - print("-start parameter must be a positive integer") - exit(1) - # add end parameter - seq_end = args['stop'] - else: - # fix the index for start parameter - if args['start'] > 0: - seq_start = args['start'] -1 - else: - print("-start parameter must be a positive integer") - exit(1) - # add end parameter according to program 2 - args['stop'] = len(fastaseq) - seq_end = args['stop'] - # subset each fasta record - return fastarec[seq_start:seq_end] -# iterate for each record - for record in SeqIO.parse(args['input'], "fasta"): - # add this record to the list - sequences.append(fastatrim(record,record.seq)) - -# export to fasta - SeqIO.write(sequences, args['output'], "fasta") - -if __name__ == '__main__': - main() diff --git a/fasta_manipulation/trim_multifasta_to_tab_gui.py b/fasta_manipulation/trim_multifasta_to_tab_gui.py deleted file mode 100644 index 49e4aac..0000000 --- a/fasta_manipulation/trim_multifasta_to_tab_gui.py +++ /dev/null @@ -1,59 +0,0 @@ -# python3 -import os -from gooey import * -from Bio import SeqIO -import pandas as pd -# input parameters -@Gooey(required_cols=2, program_name='multi-fasta trimmed to tabular txt file', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC') -def main(): - ap = GooeyParser() - ap.add_argument("-in","--input", required=True, widget='FileChooser', help="input multi-fasta file") - ap.add_argument("-start", "--start", required=False, default=1, type=int, help="region to start writing the fasta file(default 1)") - ap.add_argument("-stop", "--stop", required=False, type=int, help="region to stop writing the fasta file(it can be both a positive and a negative number)") - ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and stop location 2) the stop location with be that of the sequence length. Default is 1") - ap.add_argument("-out","--output", required=True, widget='FileSaver', help="output txt file") - args = vars(ap.parse_args()) - # main - # create function to trim fasta records - def fastatrim(fastaseq): - # choose program - if args['program'] == 1: - # fix the index for start parameter - if args['start'] > 0: - seq_start = args['start'] -1 - else: - print("-start parameter must be a positive integer") - exit(1) - # add end parameter - seq_end = args['stop'] - else: - # fix the index for start parameter - if args['start'] > 0: - seq_start = args['start'] -1 - else: - print("-start parameter must be a positive integer") - exit(1) - # add end parameter according to program 2 - args['stop'] = len(fastaseq) - seq_end = args['stop'] - # subset each fasta record - return fastaseq[seq_start:seq_end] - # setup empty lists - seqs = [] - ids = [] - # import each fasta file from the working directory - for record in SeqIO.parse(args['input'], "fasta"): - seqs.append(fastatrim(record.seq)) - ids.append(record.id) - # put the 2 list in a data frame of 2 columns - dfasta = pd.DataFrame() - dfasta['id'] = ids - dfasta['seq'] = seqs - # export data frame to a tabular txt file - with open(args['output'], 'a') as f: - f.write( - dfasta.to_csv(header = False, index = False, sep= "\t", line_terminator= '\n') - ) - -if __name__ == '__main__': - main() diff --git a/fasta_manipulation/trim_singlefastas_gui.py b/fasta_manipulation/trim_singlefastas_gui.py deleted file mode 100644 index 0b62e55..0000000 --- a/fasta_manipulation/trim_singlefastas_gui.py +++ /dev/null @@ -1,49 +0,0 @@ -# python3 -import os -from gooey import * -from Bio import SeqIO -# input parameters -@Gooey(required_cols=1, program_name='trim multiple single-fasta files', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC') -def main(): - ap = GooeyParser() - ap.add_argument("-start", "--start", required=False, default=1, type=int, help="region to start writing the fasta file") - ap.add_argument("-stop", "--stop", required=False, type=int, help="region to stop writing the fasta file(it can be both a positive and a negative number)") - ap.add_argument("-dir", "--directory", required=True, type=str, widget='DirChooser', help="directory to search for fasta files") - ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and stop location 2) the stop location with be that of the sequence length. Default is 1") - args = vars(ap.parse_args()) -# main -# create function to trim fasta records - def fastatrim(fastarec,fastaseq): - # choose program - if args['program'] == 1: - # fix the index for start parameter - if args['start'] > 0: - seq_start = args['start'] -1 - else: - print("-start parameter must be a positive integer") - exit(1) - # add end parameter - seq_end = args['stop'] - else: - # fix the index for start parameter - if args['start'] > 0: - seq_start = args['start'] -1 - else: - print("-start parameter must be a positive integer") - exit(1) - # add end parameter according to program 2 - args['stop'] = len(fastaseq) - seq_end = args['stop'] - # subset each fasta record - return fastarec[seq_start:seq_end] -# import each fasta file from the working directory - for filename in sorted(os.listdir(os.chdir(args['directory']))): - if filename.endswith(".fa") or filename.endswith(".fasta"): - # read each file, trim and create SeqRecord to export - record = SeqIO.read(filename, "fasta") - sequence = fastatrim(record,record.seq) - # export to fasta - SeqIO.write(sequence, "".join([filename.split(".")[0],"_","trimmed",".fasta"]), "fasta") - -if __name__ == '__main__': - main() \ No newline at end of file