merge single-fasta and multi-fasta scripts and executables to 1

olgatsiouri1996 · Dec 21, 2021 · 9dedc9c · 9dedc9c
1 parent 6ca130c
commit 9dedc9c
Show file tree

Hide file tree

Showing 8 changed files with 110 additions and 244 deletions.
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# bioinfo_gui_scripts [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5787392.svg)](https://doi.org/10.5281/zenodo.5787392)
+# bioinfo_gui_scripts 
 python scripts that can be easily transformed to gui programs for wet lab scientists to use(see the wiki page for documentation and depedences)
 ## GUI stadalone programs(.exe)
 1. DSSP statistics GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4838997.svg)](https://doi.org/10.5281/zenodo.4838997)
@@ -11,18 +11,15 @@ python scripts that can be easily transformed to gui programs for wet lab scient
 8. amino acids content multifasta calculator GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5275827.svg)](https://doi.org/10.5281/zenodo.5275827)
 9. pdbs secondary structure statistics GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5348006.svg)](https://doi.org/10.5281/zenodo.5348006)
 10. add adapters on single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5559117.svg)](https://doi.org/10.5281/zenodo.5559117)
-11. Trim multi-fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5725555.svg)](https://doi.org/10.5281/zenodo.5725555)
-12. Trim single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5725465.svg)](https://doi.org/10.5281/zenodo.5725465)
-13. fasta to tab GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5699003.svg)](https://doi.org/10.5281/zenodo.5699003)
-14. tab to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5703366.svg)](https://doi.org/10.5281/zenodo.5703366)
-15. single-fastas to tabular GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5672075.svg)](https://doi.org/10.5281/zenodo.5672075)
-16. tabular file to single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5652249.svg)](https://doi.org/10.5281/zenodo.5652249)
-17. fasta formatter GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5786883.svg)](https://doi.org/10.5281/zenodo.5786883)
-18. chain pdb to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5706468.svg)](https://doi.org/10.5281/zenodo.5706468)
-19. subset pdb to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5725658.svg)](https://doi.org/10.5281/zenodo.5725658)
-20. tab trim to single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5760004.svg)](https://doi.org/10.5281/zenodo.5760004)
-21. trim single-fastas to tab GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5759929.svg)](https://doi.org/10.5281/zenodo.5759929)
-22. trim multi-fasta to tab GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5759811.svg)](https://doi.org/10.5281/zenodo.5759811)
-23. tab trim to multi-fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5759141.svg)](https://doi.org/10.5281/zenodo.5759141)
+11. fasta to tab GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5699003.svg)](https://doi.org/10.5281/zenodo.5699003)
+12. tab to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5703366.svg)](https://doi.org/10.5281/zenodo.5703366)
+13. single-fastas to tabular GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5672075.svg)](https://doi.org/10.5281/zenodo.5672075)
+14. tabular file to single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5652249.svg)](https://doi.org/10.5281/zenodo.5652249)
+15. fasta formatter GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5786883.svg)](https://doi.org/10.5281/zenodo.5786883)
+16. chain pdb to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5706468.svg)](https://doi.org/10.5281/zenodo.5706468)
+17. subset pdb to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5725658.svg)](https://doi.org/10.5281/zenodo.5725658)
+18. trim fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5794123.svg)](https://doi.org/10.5281/zenodo.5794123)
+19. trim fasta to tab GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5794534.svg)](https://doi.org/10.5281/zenodo.5794534)
+20. tab trim to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5794334.svg)](https://doi.org/10.5281/zenodo.5794334)
 
 
diff --git a/...ipulation/tab_trim_to_singlefastas_gui.py → fasta_manipulation/tab_trim_to_fasta_gui.py b/...ipulation/tab_trim_to_singlefastas_gui.py → fasta_manipulation/tab_trim_to_fasta_gui.py
@@ -8,11 +8,13 @@
 # input arguments
 @Gooey(required_cols=1, program_name='tabular trim to single-fasta files', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC')
 def main():
-    ap = GooeyParser(description="convert each row of a tabular file with the fasta headers and sequences in each row in single-fasta files with trimmed sequences")
-    ap.add_argument("-in", "--input", required=True, widget="FileChooser" ,help="input txt file")
+    ap = GooeyParser(description="convert each row of a tabular file with the fasta headers and sequences in each row in single-fasta files or a multi-fasta file, with trimmed sequences") 
+    ap.add_argument("-in", "--input", required=True, widget='FileChooser', help="input txt file")
     ap.add_argument("-start", "--start", required=False, default=1, type=int, help="region to start writing the fasta file(default 1)")
     ap.add_argument("-stop", "--stop", required=False, type=int, help="region to stop writing the fasta file(it can be both a positive and  a negative number)")
     ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and stop location 2) the stop location with be that of the sequence length. Default is 1")
+    ap.add_argument("-type", "--type", required=False,default=1, type=int, help="type of fasta to export 1) 1 multi-fasta file 2)  many single-fasta files. Default is 1")
+    ap.add_argument("-out", "--output", required=False, widget='FileSaver', type=int, help="output multi-fasta file")
     args = vars(ap.parse_args())
     # main
     # create function to trim fasta records
@@ -39,15 +41,23 @@ def fastatrim(fastaseq):
             seq_end = args['stop']
         # subset each fasta record
         return fastaseq[seq_start:seq_end]
-    # insert txt file as dataframe
     df = pd.read_csv(args['input'], header=None, sep="\t")
     # select ids and sequence columns, convert to lists
     headers = df.iloc[:,0].values.tolist()
     sequences = df.iloc[:,1].values.tolist()
-    # iter elements on pairs to export in single fasta files
-    for (ids, seq) in zip(headers, sequences):
-            seq_for_fasta=SeqRecord(Seq(fastatrim(str(seq))),id=str(ids),description="")
-            SeqIO.write(seq_for_fasta, "".join([str(ids),".fasta"]), "fasta")
+    # choose fasta type to export
+    if args['type'] == 1:     
+        # setup empty list
+        seqs_for_fasta = []
+        # iter elements on pairs to export in single fasta files
+        for (ids, seq) in zip(headers, sequences):
+                seqs_for_fasta.append(SeqRecord(Seq(fastatrim(str(seq))),id=str(ids),description=""))
+                SeqIO.write(seqs_for_fasta, args['output'], "fasta")
+    else:
+        # iter elements on pairs to export in single fasta files
+        for (ids, seq) in zip(headers, sequences):
+                seq_for_fasta=SeqRecord(Seq(fastatrim(str(seq))),id=str(ids),description="")
+                SeqIO.write(seq_for_fasta, "".join([str(ids),".fasta"]), "fasta")
 
 if __name__ == '__main__':
     main()

diff --git a/fasta_manipulation/tab_trim_to_multifasta_gui.py b/fasta_manipulation/tab_trim_to_multifasta_gui.py
diff --git a/fasta_manipulation/trim_fasta_gui.py b/fasta_manipulation/trim_fasta_gui.py
@@ -0,0 +1,63 @@
+# python3
+import os
+from gooey import *
+from Bio import SeqIO
+# input parameters
+@Gooey(required_cols=0, program_name='trim  a multi-fasta file or multiple single-fasta files', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC')
+def main():
+    ap = GooeyParser() 
+    ap.add_argument("-in", "--input", required=False ,widget='FileChooser', help="input fasta file")
+    ap.add_argument("-start", "--start", required=False, default=1, type=int, help="region to start writing the fasta file(default 1)")
+    ap.add_argument("-stop", "--stop", required=False, type=int, help="region to stop writing the fasta file(it can be both a positive and  a negative number)")
+    ap.add_argument("-dir", "--directory", required=False, type=str, widget='DirChooser', help="directory to search for fasta files")
+    ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and stop location 2) the stop location will be that of the sequence length. Default is 1")
+    ap.add_argument("-type", "--type", required=False,default=1, type=int, help="type of fasta to import 1) 1 multi-fasta file 2)  many single-fasta files. Default is 1")
+    ap.add_argument("-out", "--output", required=False, widget='FileSaver', help="output fasta file")
+    args = vars(ap.parse_args())
+    # main
+    # create function to trim fasta records
+    def fastatrim(fastarec,fastaseq):
+        # choose program
+        if args['program'] == 1:
+            # fix the index for start parameter
+            if args['start'] > 0:
+                seq_start = args['start'] -1
+            else:
+                print("-start parameter must be a positive integer")
+                exit(1)
+            # add end parameter
+            seq_end = args['stop']
+        else:
+            # fix the index for start parameter
+            if args['start'] > 0:
+                seq_start = args['start'] -1
+            else:
+                print("-start parameter must be a positive integer")
+                exit(1)
+            # add end parameter according to program 2
+            args['stop'] = len(fastaseq)
+            seq_end = args['stop']
+        # subset each fasta record
+        return fastarec[seq_start:seq_end]
+    # choose fasta type to import
+    if args['type'] == 1:    
+        # setup an empty list
+        sequences = []  
+        # iterate for each record
+        for record in SeqIO.parse(args['input'], "fasta"):
+                # add this record to the list
+            sequences.append(fastatrim(record,record.seq))
+        # export to fasta
+        SeqIO.write(sequences, args['output'], "fasta")
+    else:
+        # import each fasta file from the working directory
+        for filename in sorted(os.listdir(os.chdir(args['directory']))):
+            if filename.endswith(".fa") or filename.endswith(".fasta"):
+                # read each file, trim and create SeqRecord to export
+                record = SeqIO.read(filename, "fasta")
+                sequence = fastatrim(record,record.seq)
+                # export to fasta
+                SeqIO.write(sequence, "".join([filename.split(".")[0],"_","trimmed",".fasta"]), "fasta")
+
+if __name__ == '__main__':
+    main()
diff --git a/...ipulation/trim_singlefastas_to_tab_gui.py → fasta_manipulation/trim_fasta_to_tab_gui.py b/...ipulation/trim_singlefastas_to_tab_gui.py → fasta_manipulation/trim_fasta_to_tab_gui.py
@@ -4,14 +4,16 @@
 from Bio import SeqIO
 import pandas as pd
 # input parameters
-@Gooey(required_cols=2, program_name='single-fastas trimmed to tabular txt file', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC')
+@Gooey(required_cols=1, program_name='fasta trimmed to tabular txt file', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC')
 def main():
-    ap = GooeyParser()
+    ap = GooeyParser() 
+    ap.add_argument("-in","--input", required=False, widget='FileChooser', help="input multi-fasta file")
     ap.add_argument("-start", "--start", required=False, default=1, type=int, help="region to start writing the fasta file(default 1)")
     ap.add_argument("-stop", "--stop", required=False, type=int, help="region to stop writing the fasta file(it can be both a positive and  a negative number)")
+    ap.add_argument("-dir", "--directory", required=False, type=str, widget='DirChooser', help="directory to search for fasta files") 
     ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and stop location 2) the stop location with be that of the sequence length. Default is 1")
-    ap.add_argument("-dir", "--directory", required=True, type=str, widget='DirChooser', help="directory to search for fasta files")
-    ap.add_argument("-out","--output", required=True, widget='FileSaver',help="output txt file")
+    ap.add_argument("-type", "--type", required=False,default=1, type=int, help="type of fasta to import 1) 1 multi-fasta file 2)  many single-fasta files. Default is 1")
+    ap.add_argument("-out","--output", required=True, widget='FileSaver', help="output txt file")
     args = vars(ap.parse_args())
     # main
     # create function to trim fasta records
@@ -41,14 +43,21 @@ def fastatrim(fastaseq):
     # setup empty lists
     seqs = []
     ids = []
-    # import each fasta file from the working directory
-    for filename in sorted(os.listdir(os.chdir(args['directory']))):
-        if filename.endswith(".fa") or filename.endswith(".fasta"):
-            # read each file, trim and add to list
-            record = SeqIO.read(filename, "fasta")
+    # choose fasta type to import
+    if args['type'] == 1:     
+       # import multi-fasta file
+        for record in SeqIO.parse(args['input'], "fasta"):
             seqs.append(fastatrim(record.seq))
             ids.append(record.id)
-     # put the 2 list in a data frame of 2 columns
+    else:
+        # import each fasta file from the working directory
+        for filename in sorted(os.listdir(os.chdir(args['directory']))):
+            if filename.endswith(".fa") or filename.endswith(".fasta"):
+                # read each file, trim and add to list
+                record = SeqIO.read(filename, "fasta")
+                seqs.append(fastatrim(record.seq))
+                ids.append(record.id)     
+    # put the 2 list in a data frame of 2 columns
     dfasta = pd.DataFrame()
     dfasta['id'] = ids
     dfasta['seq'] = seqs

diff --git a/fasta_manipulation/trim_multifasta_gui.py b/fasta_manipulation/trim_multifasta_gui.py