diff --git a/README.md b/README.md index f983ca9..47684dc 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# bioinfo_gui_scripts [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5719342.svg)](https://doi.org/10.5281/zenodo.5719342) +# bioinfo_gui_scripts python scripts that can be easily transformed to gui programs for wet lab scientists to use(see the wiki page for documentation and depedences) ## GUI stadalone programs(.exe) 1. DSSP statistics GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4838997.svg)](https://doi.org/10.5281/zenodo.4838997) @@ -11,13 +11,14 @@ python scripts that can be easily transformed to gui programs for wet lab scient 8. amino acids content multifasta calculator GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5275827.svg)](https://doi.org/10.5281/zenodo.5275827) 9. pdbs secondary structure statistics GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5348006.svg)](https://doi.org/10.5281/zenodo.5348006) 10. add adapters on single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5559117.svg)](https://doi.org/10.5281/zenodo.5559117) -11. Trim multi-fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5565197.svg)](https://doi.org/10.5281/zenodo.5565197) -12. Trim single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5565299.svg)](https://doi.org/10.5281/zenodo.5565299) +11. Trim multi-fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5725555.svg)](https://doi.org/10.5281/zenodo.5725555) +12. Trim single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5725465.svg)](https://doi.org/10.5281/zenodo.5725465) 13. fasta to tab GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5699003.svg)](https://doi.org/10.5281/zenodo.5699003) 14. tab to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5703366.svg)](https://doi.org/10.5281/zenodo.5703366) 15. single-fastas to tabular GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5672075.svg)](https://doi.org/10.5281/zenodo.5672075) 16. tabular file to single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5652249.svg)](https://doi.org/10.5281/zenodo.5652249) 17. fasta formatter GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5703665.svg)](https://doi.org/10.5281/zenodo.5703665) 18. chain pdb to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5706468.svg)](https://doi.org/10.5281/zenodo.5706468) -19. subset pdb to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5718967.svg)](https://doi.org/10.5281/zenodo.5718967) +19. subset pdb to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5725658.svg)](https://doi.org/10.5281/zenodo.5725658) + diff --git a/fasta_manipulation/trim_multifasta_gui.py b/fasta_manipulation/trim_multifasta_gui.py index 8330425..7446913 100644 --- a/fasta_manipulation/trim_multifasta_gui.py +++ b/fasta_manipulation/trim_multifasta_gui.py @@ -2,33 +2,47 @@ from gooey import * from Bio import SeqIO # input parameters -@Gooey(required_cols=3, program_name='trim multifasta', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC') +@Gooey(required_cols=2, program_name='trim multifasta', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC') def main(): ap = GooeyParser() ap.add_argument("-in", "--input", required=True, widget='FileChooser', help="input fasta file") ap.add_argument("-start", "--start", required=False, default=1, type=int, help="region to start writing the fasta file") - ap.add_argument("-stop", "--stop", required=True, type=int, help="region to stop writing the fasta file(it can be both a positive and a negative number)") + ap.add_argument("-stop", "--stop", required=False, type=int, help="region to stop writing the fasta file(it can be both a positive and a negative number)") + ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and stop location 2) the stop location with be that of the sequence length. Default is 1") ap.add_argument("-out", "--output", required=True, widget='FileSaver', help="output fasta file") args = vars(ap.parse_args()) - # main +# main sequences = [] # setup an empty list - # fix the index for start parameter - if args['start'] > 0: - seq_start = args['start'] -1 - else: - print("-start parameter must be a positive integer") - exit(1) - # fix the index for end parameter - if args['stop'] > 0: - seq_end = args['stop'] -1 - else: - seq_end = args['stop'] - # iterate for each record +# create function to trim fasta records + def fastatrim(fastarec,fastaseq): + # choose program + if args['program'] == 1: + # fix the index for start parameter + if args['start'] > 0: + seq_start = args['start'] -1 + else: + print("-start parameter must be a positive integer") + exit(1) + # add end parameter + seq_end = args['stop'] + else: + # fix the index for start parameter + if args['start'] > 0: + seq_start = args['start'] -1 + else: + print("-start parameter must be a positive integer") + exit(1) + # add end parameter according to program 2 + args['stop'] = len(fastaseq) + seq_end = args['stop'] + # subset each fasta record + return fastarec[seq_start:seq_end] +# iterate for each record for record in SeqIO.parse(args['input'], "fasta"): # add this record to the list - sequences.append(record[seq_start:seq_end]) + sequences.append(fastatrim(record,record.seq)) - # export to fasta +# export to fasta SeqIO.write(sequences, args['output'], "fasta") if __name__ == '__main__': diff --git a/fasta_manipulation/trim_singlefastas_gui.py b/fasta_manipulation/trim_singlefastas_gui.py index c3f3b40..0b62e55 100644 --- a/fasta_manipulation/trim_singlefastas_gui.py +++ b/fasta_manipulation/trim_singlefastas_gui.py @@ -3,34 +3,47 @@ from gooey import * from Bio import SeqIO # input parameters -@Gooey(required_cols=2, program_name='trim multiple single-fasta files', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC') +@Gooey(required_cols=1, program_name='trim multiple single-fasta files', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC') def main(): ap = GooeyParser() - ap.add_argument("-start", "--start_fasta", required=False, default=1, type=int, help="region to start writing the fasta file") - ap.add_argument("-stop", "--stop", required=True, type=int, help="region to stop writing the fasta file(it can be both a positive and a negative number)") + ap.add_argument("-start", "--start", required=False, default=1, type=int, help="region to start writing the fasta file") + ap.add_argument("-stop", "--stop", required=False, type=int, help="region to stop writing the fasta file(it can be both a positive and a negative number)") ap.add_argument("-dir", "--directory", required=True, type=str, widget='DirChooser', help="directory to search for fasta files") + ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and stop location 2) the stop location with be that of the sequence length. Default is 1") args = vars(ap.parse_args()) # main -# fix the index for start parameter - if args['start'] > 0: - seq_start = args['start'] -1 - else: - print("-start parameter must be a positive integer") - exit(1) -# fix the index for end parameter - if args['stop'] > 0: - seq_end = args['stop'] -1 - else: - seq_end = args['stop'] -# import each fasta file from a working directory of choice +# create function to trim fasta records + def fastatrim(fastarec,fastaseq): + # choose program + if args['program'] == 1: + # fix the index for start parameter + if args['start'] > 0: + seq_start = args['start'] -1 + else: + print("-start parameter must be a positive integer") + exit(1) + # add end parameter + seq_end = args['stop'] + else: + # fix the index for start parameter + if args['start'] > 0: + seq_start = args['start'] -1 + else: + print("-start parameter must be a positive integer") + exit(1) + # add end parameter according to program 2 + args['stop'] = len(fastaseq) + seq_end = args['stop'] + # subset each fasta record + return fastarec[seq_start:seq_end] +# import each fasta file from the working directory for filename in sorted(os.listdir(os.chdir(args['directory']))): if filename.endswith(".fa") or filename.endswith(".fasta"): # read each file, trim and create SeqRecord to export record = SeqIO.read(filename, "fasta") - sequence = record[seq_start:seq_end] + sequence = fastatrim(record,record.seq) # export to fasta SeqIO.write(sequence, "".join([filename.split(".")[0],"_","trimmed",".fasta"]), "fasta") - if __name__ == '__main__': main() \ No newline at end of file diff --git a/pdb_corner/subset_pdb_to_fasta_gui.py b/pdb_corner/subset_pdb_to_fasta_gui.py index dac49a8..65d321f 100644 --- a/pdb_corner/subset_pdb_to_fasta_gui.py +++ b/pdb_corner/subset_pdb_to_fasta_gui.py @@ -36,21 +36,17 @@ def main(): else: print("-start parameter must be a positive integer") exit(1) - # fix the index for end parameter - if args['end'] > 0: - aa_end = args['end'] -1 - else: - aa_end = args['end'] + # add end parameter + aa_end = args['end'] else: - # fix the index for start parameter if args['start'] > 0: aa_start = args['start'] -1 else: print("-start parameter must be a positive integer") exit(1) - # fix the index for end parameter - args['end'] = len(aa_chain) -1 + # add end parameter according to program 2 + args['end'] = len(aa_chain) aa_end = args['end'] # subset based on aa in chain sub_seq = aa_chain[aa_start:aa_end]