From 6f1f25134fc1e996c7928acd30283e448e2ddb11 Mon Sep 17 00:00:00 2001
From: tjakobi <tobias.jakobi@med.uni-heidelberg.de>
Date: Thu, 30 Jul 2020 12:47:22 +0200
Subject: [PATCH 1/6] Stating Python3 porting

---
 DCC/Circ_nonCirc_Exon_Match.py |  8 ++--
 DCC/CombineCounts.py           |  4 +-
 DCC/IntervalTree.py            |  2 +-
 DCC/circFilter.py              |  4 +-
 DCC/findcircRNA.py             |  8 ++--
 DCC/fix2chimera.py             | 10 ++---
 DCC/genecount.py               | 36 ++++++++--------
 DCC/main.py                    | 76 +++++++++++++++++-----------------
 setup.py                       | 21 ++++++----
 9 files changed, 87 insertions(+), 82 deletions(-)

diff --git a/DCC/Circ_nonCirc_Exon_Match.py b/DCC/Circ_nonCirc_Exon_Match.py
index 6f27a0a..9e0b1fe 100644
--- a/DCC/Circ_nonCirc_Exon_Match.py
+++ b/DCC/Circ_nonCirc_Exon_Match.py
@@ -194,7 +194,7 @@ def printuniq(self, Infile):
         for lin in f:
             lin_split = lin.split('\t')
             if keys.count(lin_split[0] + '\t' + lin_split[1] + '\t' + lin_split[2]) == 1:
-                print lin.strip('\n')
+                print(lin.strip('\n'))
 
     def readgtf(self, gtf_file):
         # store nonCircExons based on transcript_id and exon_number with all its annotations from different transcripts
@@ -275,7 +275,7 @@ def readHTSeqCount(self, HTSeqCount, exon_id2custom_exon_id):
     def findcircAdjacent(self, circExons, Custom_exon_id2Iv, Iv2Custom_exon_id, start=True):
         circAdjacentExons = {}
         circAdjacentExonsIv = {}
-        for key in circExons.keys():
+        for key in list(circExons.keys()):
             for ids in circExons[key]:
                 try:
                     interval = Custom_exon_id2Iv[self.getAdjacent(ids, start=start)]
@@ -292,7 +292,7 @@ def printCounts(self, Exons, Count_custom_exon_id, Custom_exon_id2Length):
         # Print the counts of circexons and adjacentexons
         # Exons: dictionaries with intervals as key, custom_exon_id as values
         ExonCounts = {}
-        for key in Exons.keys():
+        for key in list(Exons.keys()):
             counts = []
             for ids in Exons[key]:  # If for circAdjacentExons, ids here is a list
                 try:
@@ -397,7 +397,7 @@ def readSJ_out_tab(self, SJ_out_tab):
                                   strand] = lin_split[6]
             sj.close()
         except IOError:
-            print 'Do you have SJ.out.tab files in your sample folder? DCC cannot find it.'
+            print('Do you have SJ.out.tab files in your sample folder? DCC cannot find it.')
         return junctionReadCount
 
     def getskipjunctionCount(self, exonskipjunctions, junctionReadCount):
diff --git a/DCC/CombineCounts.py b/DCC/CombineCounts.py
index 5bc3901..434ecb5 100644
--- a/DCC/CombineCounts.py
+++ b/DCC/CombineCounts.py
@@ -35,9 +35,9 @@ def comb_coor(self, circfiles, strand=True):
             onefile.close()
 
         if strand:
-            coors = ['\t'.join(key.split('\t')[:-1]) + value for key, value in coorsDict.iteritems()]
+            coors = ['\t'.join(key.split('\t')[:-1]) + value for key, value in coorsDict.items()]
         else:
-            coors = ['{}{}'.format(key, value) for key, value in coorsDict.iteritems()]
+            coors = ['{}{}'.format(key, value) for key, value in coorsDict.items()]
 
         coorsSorted = self.sortBed(coors, retList=True)
         for itm in coorsSorted:
diff --git a/DCC/IntervalTree.py b/DCC/IntervalTree.py
index e3333c9..789bc7a 100644
--- a/DCC/IntervalTree.py
+++ b/DCC/IntervalTree.py
@@ -36,7 +36,7 @@ def intersect(self, interval, report_func):
             # use the intersect method of IntervalNode class, need make this function aware of strand
 
     def traverse(self, func):
-        for item in self.chroms.itervalues():
+        for item in self.chroms.values():
             item.traverse(func)
 
 
diff --git a/DCC/circFilter.py b/DCC/circFilter.py
index c6f66e5..528baa5 100644
--- a/DCC/circFilter.py
+++ b/DCC/circFilter.py
@@ -63,7 +63,7 @@ def readcirc(self, countfile, coordinates):
 
     # Do filtering
     def filtercount(self, count, indx):
-        print 'Filtering by read counts'
+        print('Filtering by read counts')
         sel = []  # store the passed filtering rows
         for itm in range(len(count)):
             if indx[itm][4] == '0':
@@ -117,7 +117,7 @@ def dummy_filter(self, indx0, count0):
         np.savetxt(self.tmp_dir + 'tmp_unsortedWithChrM', nonrep, delimiter='\t', newline='\n', fmt='%s')
 
     def removeChrM(self, withChrM):
-        print 'Remove ChrM'
+        print('Remove ChrM')
         unremoved = open(withChrM, 'r').readlines()
         removed = []
         for lines in unremoved:
diff --git a/DCC/findcircRNA.py b/DCC/findcircRNA.py
index 20d45b4..9482236 100644
--- a/DCC/findcircRNA.py
+++ b/DCC/findcircRNA.py
@@ -78,7 +78,7 @@ def sepDuplicates(self, Chim_junc, duplicates, nonduplicates):
             if reads.count(read) == 2:
                 dup.write(lines[indx])
             elif reads.count(read) > 2:
-                print 'Read %s has more than 2 count.' % read
+                print('Read %s has more than 2 count.' % read)
                 try:
                     logging.warning('Read %s has more than 2 count.' % read)
                 except NameError:
@@ -159,8 +159,8 @@ def findcirc(self, Chim_junc, output, strand=True):
             linecnt = linecnt + 1
 
             if len(L) < 14:
-                print ("WARNING: File " + str(Chim_junc) + ", line " + str(linecnt) + " does not contain all features.")
-                print ("WARNING: " + str(Chim_junc) + " is probably corrupt.")
+                print(("WARNING: File " + str(Chim_junc) + ", line " + str(linecnt) + " does not contain all features."))
+                print(("WARNING: " + str(Chim_junc) + " is probably corrupt."))
             if L[0] == "chr_donorA":
                continue
             if int(L[6]) >= 0 and L[0] == L[3] and L[2] == L[5] and (
@@ -217,7 +217,7 @@ def count(self, sortedlist, strand=True):
             elif not strand:
                 circs = (itm[0], itm[1], itm[2])
             else:
-                print "Please specify correct strand information."
+                print("Please specify correct strand information.")
             cnt[circs] += 1
             itm.append(str(cnt[circs]))
             # tmp_count.append( [itm[0],itm[1],itm[2],itm[3],itm[7],itm[4],itm[5],itm[6]] )
diff --git a/DCC/fix2chimera.py b/DCC/fix2chimera.py
index a031cc2..8d26a00 100644
--- a/DCC/fix2chimera.py
+++ b/DCC/fix2chimera.py
@@ -55,10 +55,10 @@ def modify_junctiontype(junctiontype):
                 continue
             # check if the row has all fields
             if len(line_split) < 14:
-                print ("WARNING: File " + str(chimeric_junction_mate2) + ", line " + str(linecnt)
-                       + " does not contain all features.")
-                print ("WARNING: " + str(chimeric_junction_mate2) + " is probably corrupt.")
-                print ("WARNING: Offending line: " + str(line))
+                print(("WARNING: File " + str(chimeric_junction_mate2) + ", line " + str(linecnt)
+                       + " does not contain all features."))
+                print(("WARNING: " + str(chimeric_junction_mate2) + " is probably corrupt."))
+                print(("WARNING: Offending line: " + str(line)))
 
             linecnt += 1
 
@@ -110,7 +110,7 @@ def printduplicates(self, merged, duplicates, field=10):
         if not os.path.isfile(merged):
             sys.exit("ERROR: File " + str(merged) + " is missing!")
         elif os.stat(merged).st_size == 0:
-            print ("WARNING: File " + str(merged) + " is empty!")
+            print(("WARNING: File " + str(merged) + " is empty!"))
         else:
             try:
                 inputfile = open(merged, 'r')
diff --git a/DCC/genecount.py b/DCC/genecount.py
index ab7b0d4..2cd84e2 100644
--- a/DCC/genecount.py
+++ b/DCC/genecount.py
@@ -99,33 +99,33 @@ def genecount(self, circ_coordinates, bamfile, ref, tid):
         start_coordinates.close()
         end_coordinates.close()
 
-        print ('Started linear gene expression counting for %s' % bamfile)
+        print(('Started linear gene expression counting for %s' % bamfile))
 
         start = time.time()
         # mpileup get the read counts of the start and end positions
-        print ("\t=> running mpileup for start positions [%s]" % bamfile)
+        print(("\t=> running mpileup for start positions [%s]" % bamfile))
         mpileup_start = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_start_coordinates_' + tid)
         end = time.time() - start
-        print ("\t=> mpileup for start positions for %s took %d seconds" % (bamfile, end))
+        print(("\t=> mpileup for start positions for %s took %d seconds" % (bamfile, end)))
 
         start = time.time()
         # mpileup get the read counts of the start and end positions
-        print ("\t=> running mpileup for end positions [%s]" % bamfile)
+        print(("\t=> running mpileup for end positions [%s]" % bamfile))
         mpileup_end = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_end_coordinates_' + tid)
         end = time.time() - start
-        print ("\t=> mpileup for end positions for %s took %d seconds" % (bamfile, end))
+        print(("\t=> mpileup for end positions for %s took %d seconds" % (bamfile, end)))
 
-        print "\t=> gathering read counts for start positions [%s]" % bamfile
+        print("\t=> gathering read counts for start positions [%s]" % bamfile)
         startcount = self.getreadscount(mpileup_start, countmapped=True)
 
-        print "\t=> gathering read counts for end positions [%s]" % bamfile
+        print("\t=> gathering read counts for end positions [%s]" % bamfile)
         endcount = self.getreadscount(mpileup_end, countmapped=True)
 
         # remove tmp files
         # os.remove(self.tmp_dir + 'tmp_start_coordinates_' + tid)
         # os.remove(self.tmp_dir + 'tmp_end_coordinates_' + tid)
 
-        print 'Finished linear gene expression counting for %s' % bamfile
+        print('Finished linear gene expression counting for %s' % bamfile)
 
         return startcount, endcount
 
@@ -194,29 +194,29 @@ def linearsplicedreadscount(self, circ_coor, bamfile, ref, header=True):
         start_coor_1.close()
         end_coor.close()
         end_coor_1.close()
-        print ('Started linear spliced read counting for %s' % bamfile)
+        print(('Started linear spliced read counting for %s' % bamfile))
 
         # mpileup get the number of spliced reads at circle start position and (start-1) position.
 
-        print ("\t=> running mpileup 1 for start positions [%s]" % bamfile)
+        print(("\t=> running mpileup 1 for start positions [%s]" % bamfile))
         mpileup_start = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_start_coor_1')
 
-        print ("\t=> running mpileup 2 for start positions [%s]" % bamfile)
+        print(("\t=> running mpileup 2 for start positions [%s]" % bamfile))
         mpileup_start_1 = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_start_coor_2')
 
         # mpileup get the number of spliced reads at circle end position and (end+1) position.
-        print ("\t=> running mpileup 1 for end positions [%s]" % bamfile)
+        print(("\t=> running mpileup 1 for end positions [%s]" % bamfile))
         mpileup_end = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_end_coor_1')
 
-        print ("\t=> running mpileup 2 for end positions [%s]" % bamfile)
+        print(("\t=> running mpileup 2 for end positions [%s]" % bamfile))
         mpileup_end_1 = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_end_coor_2')
 
         # get count
 
-        print "\t=> gathering read counts for start positions [%s]" % bamfile
+        print("\t=> gathering read counts for start positions [%s]" % bamfile)
         startcount = self.submpileup(self.getreadscount(mpileup_start_1), self.getreadscount(mpileup_start))
 
-        print "\t=> gathering read counts for end positions [%s]" % bamfile
+        print("\t=> gathering read counts for end positions [%s]" % bamfile)
         endcount = self.submpileup(self.getreadscount(mpileup_end), self.getreadscount(mpileup_end_1), left=False)
 
         # remove tmp files
@@ -225,7 +225,7 @@ def linearsplicedreadscount(self, circ_coor, bamfile, ref, header=True):
         # os.remove(self.tmp_dir + 'tmp_end_coor')
         # os.remove(self.tmp_dir + 'tmp_end_coor_1')
 
-        print 'Finished linear spliced read counting for %s' % bamfile
+        print('Finished linear spliced read counting for %s' % bamfile)
 
         return startcount, endcount
 
@@ -266,7 +266,7 @@ def comb_gen_count(self, circ_coor, bamfile, ref, output, countlinearsplicedread
             # call genecount to get the start and end positon read counts
             tmp_start, tmp_end = self.genecount(circ_coor, bamfile, ref, tid)
 
-        print 'Ended linear gene expression counting %s' % bamfile
+        print('Ended linear gene expression counting %s' % bamfile)
         logging.info('Ended linear gene expression counting %s' % bamfile)
 
         for line in tmp_start:
@@ -314,6 +314,6 @@ def comb_gen_count(self, circ_coor, bamfile, ref, output, countlinearsplicedread
         # tmp_end.close()
         count_table.close()
 
-        print 'Ended post processing %s' % bamfile
+        print('Ended post processing %s' % bamfile)
         logging.info('Ended post processing %s' % bamfile)
         return tid
diff --git a/DCC/main.py b/DCC/main.py
index 3fc40ef..8cfabcb 100644
--- a/DCC/main.py
+++ b/DCC/main.py
@@ -22,7 +22,7 @@
 
 
 def main():
-    version = "0.4.8"
+    version = "0.5.0"
 
     parser = argparse.ArgumentParser(prog="DCC", formatter_class=argparse.RawDescriptionHelpFormatter,
                                      fromfile_prefix_chars="@",
@@ -49,7 +49,7 @@ def main():
                        help="Must be enabled for stranded libraries, aka 'fr-secondstrand' [default: False]")
     group.add_argument("-N", "--nonstrand", action="store_false", dest="strand", default=True,
                        help="The library is non-stranded [default stranded]")
-    group.add_argument("-E", "--endTol", dest="endTol", type=int, default=5, choices=range(0, 10),
+    group.add_argument("-E", "--endTol", dest="endTol", type=int, default=5, choices=list(range(0, 10)),
                        help="Maximum base pair tolerance of reads extending over junction sites [default: 5]")
     group.add_argument("-m", "--maximum", dest="max", type=int, default=1000000,
                        help="The maximum length of candidate circRNAs (including introns) [default: 1000000]")
@@ -112,12 +112,12 @@ def main():
         try:
             os.makedirs(options.out_dir)
         except OSError:
-            print "Could not create output folder %s" % options.out_dir
+            print("Could not create output folder %s" % options.out_dir)
             logging.info("Could not create output folder %s" % options.out_dir)
 
             exit(-1)
     else:
-        print "Output folder %s already exists, reusing" % options.out_dir
+        print("Output folder %s already exists, reusing" % options.out_dir)
 
     # create temporary directory if not existing
 
@@ -125,17 +125,17 @@ def main():
         try:
             os.makedirs(options.tmp_dir)
         except OSError:
-            print "Could not create temporary folder %s" % options.tmp_dir
+            print("Could not create temporary folder %s" % options.tmp_dir)
             exit(-1)
     else:
-        print "Temporary folder %s already exists, reusing" % options.tmp_dir
+        print("Temporary folder %s already exists, reusing" % options.tmp_dir)
 
     logging.basicConfig(filename=os.path.join(options.out_dir, "DCC-" + timestr + ".log"),
                         filemode="w", level=logging.DEBUG,
                         format="%(asctime)s %(message)s")
 
     logging.info("DCC %s started" % version)
-    print "DCC %s started" % version
+    print("DCC %s started" % version)
     logging.info('DCC command line: ' + ' '.join(sys.argv))
 
     # Get input file names
@@ -143,7 +143,7 @@ def main():
     options.Input = remove_empty_lines(options.Input)
 
     if (options.mate1 and not options.mate1) or (options.mate2 and not options.mate1) and options.pairedendindependent:
-        print "Only one mate data file supplied; check if both, -mt1 and -mt2 are specified."
+        print("Only one mate data file supplied; check if both, -mt1 and -mt2 are specified.")
         logging.info("Only one mate data file supplied; check if both, -mt1 and -mt2 are specified.")
         exit(-1)
 
@@ -180,10 +180,10 @@ def main():
     cpu_count = multiprocessing.cpu_count()
 
     if options.cpu_threads <= cpu_count:
-        print "%s CPU cores available, using %s" % (cpu_count, options.cpu_threads)
+        print("%s CPU cores available, using %s" % (cpu_count, options.cpu_threads))
     else:
-        print "Only %s CPU cores available while %s requested, falling back to %s" % \
-              (cpu_count, options.cpu_threads, cpu_count)
+        print("Only %s CPU cores available while %s requested, falling back to %s" % \
+              (cpu_count, options.cpu_threads, cpu_count))
         options.cpu_threads = cpu_count
 
     pool = multiprocessing.Pool(processes=options.cpu_threads)
@@ -233,7 +233,7 @@ def main():
             logging.info("Stranded data mode")
         else:
             logging.info("Non-stranded data, the strand of circRNAs guessed from the strand of host genes")
-            print "WARNING: non-stranded data, the strand of circRNAs guessed from the strand of host genes"
+            print("WARNING: non-stranded data, the strand of circRNAs guessed from the strand of host genes")
 
         # Start de novo circular RNA detection model
         # Create instances
@@ -241,7 +241,7 @@ def main():
         sort = Fc.Sort()
 
         if options.pairedendindependent:
-            print "Please make sure that the read pairs have been mapped both, combined and on a per mate basis"
+            print("Please make sure that the read pairs have been mapped both, combined and on a per mate basis")
             logging.info("Please make sure that the read pairs have been mapped both, combined and on a per mate basis")
 
             # Fix2chimera problem by STAR
@@ -336,7 +336,7 @@ def main():
                 file2filter = options.filteronly[0]
                 coorfile = options.filteronly[1]
                 logging.info("Using files %s and %s for filtering" % (options.filteronly[0], options.filteronly[1]))
-                print "Using files %s and %s for filtering" % (options.filteronly[0], options.filteronly[1])
+                print("Using files %s and %s for filtering" % (options.filteronly[0], options.filteronly[1]))
 
             except IndexError:
                 logging.error("Program exit because input error. Please check the input. If only use the program "
@@ -354,7 +354,7 @@ def main():
             file2filter = options.tmp_dir + "tmp_circCount"
             coorfile = options.tmp_dir + "tmp_coordinates"
             logging.info("Using files _tmp_DCC/tmp_circCount and _tmp_DCC/tmp_coordinates for filtering")
-            print "Using files _tmp_DCC/tmp_circCount and _tmp_DCC/tmp_coordinates for filtering"
+            print("Using files _tmp_DCC/tmp_circCount and _tmp_DCC/tmp_coordinates for filtering")
 
         if options.rep_file:
             rep_file = options.rep_file
@@ -420,17 +420,17 @@ def main():
     if options.gene:
         # import the list of bamfile names as a file
         if not options.bam:
-            print "No BAM files provided (-B) trying to automatically guess BAM file names"
+            print("No BAM files provided (-B) trying to automatically guess BAM file names")
             logging.info("No BAM files provided (-B) trying to automatically guess BAM file names")
             bamfiles = convertjunctionfile2bamfile(options.Input)
             if not bamfiles:
-                print "Could not guess BAM file names, please provides them manually via -B"
+                print("Could not guess BAM file names, please provides them manually via -B")
                 logging.info("Could not guess BAM file names, please provides them manually via -B")
         else:
             bamfiles = remove_empty_lines(options.bam)
 
         if not options.refseq:
-            print "Please provide reference sequence, program will not count host gene expression"
+            print("Please provide reference sequence, program will not count host gene expression")
             logging.warning("Please provide reference sequence, program will not count host gene expression")
 
         if options.refseq:
@@ -446,7 +446,7 @@ def main():
                 logging.error("The following BAM files seem to be not sorted by coordinate or are missing an index:")
                 logging.error(', '.join(unsortedBAMS))
                 print("The following BAM files seem to be not sorted by coordinate or are missing an index:")
-                print(', '.join(unsortedBAMS))
+                print((', '.join(unsortedBAMS)))
                 sys.exit("Error: not all BAM files are sorted by coordinate or are missing indices")
             else:
                 # For each sample (each bamfile), do one host gene count, and then combine to a single table
@@ -504,13 +504,13 @@ def main():
         try:
             os.rmdir(options.tmp_dir)
         except OSError:
-            print "Could not delete temporary folder %s: not empty" % options.tmp_dir
+            print("Could not delete temporary folder %s: not empty" % options.tmp_dir)
             logging.info("Could not delete temporary folder %s: not empty" % options.tmp_dir)
 
         try:
             os.rmdir(options.out_dir)
         except OSError:
-            print "Not deleting output folder %s: contains files" % options.out_dir
+            print("Not deleting output folder %s: contains files" % options.out_dir)
             logging.info("Not deleting output folder %s: contains files" % options.out_dir)
 
         print("Temporary files deleted")
@@ -543,7 +543,7 @@ def checkfile(filename, previousstate):
         sys.exit("ERROR: Required file " + str(filename) + " is missing, exiting")
     # check for file content
     elif os.stat(filename).st_size == 0:
-        print ("WARNING: File " + str(filename) + " is empty!")
+        print(("WARNING: File " + str(filename) + " is empty!"))
         return True
     return previousstate
 
@@ -591,8 +591,8 @@ def checkjunctionfiles(joinedfnames, mate1filenames, mate2filenames, pairedendin
             logging.warning('Input file lists have different length (mate 1 %d, mate 2 %d, joined %d).' % (
             len(mate1filenames), len(mate2filenames), len(joinedfnames)))
 
-            print('Input file lists have different length (mate 1 %d, mate 2 %d, joined %d).' % (
-            len(mate1filenames), len(mate2filenames), len(joinedfnames)))
+            print(('Input file lists have different length (mate 1 %d, mate 2 %d, joined %d).' % (
+            len(mate1filenames), len(mate2filenames), len(joinedfnames))))
 
         if skipcirc:
             logging.warning('Junction files seem empty, skipping circRNA detection module.')
@@ -724,7 +724,7 @@ def checkBAMsorting(bamfiles):
         try:
             bamfile.check_index()
         except ValueError:
-            print "BAM file %s has no index (%s.bai is missing)" % (file, file)
+            print("BAM file %s has no index (%s.bai is missing)" % (file, file))
             logging.info("BAM file %s has no index (%s.bai is missing)" % (file, file))
             unsortedBAMs.append(file)
             break
@@ -769,8 +769,8 @@ def wraphostgenecount(bamfile, tmp_dir, circ_coor, ref, countlinearsplicedreads=
     # create an (temporary) output file based on tid and file name
     output = tmp_dir + "tmp_" + os.path.basename(bamfile) + "_" + tid + "_junction.linear"
 
-    print "Counting host gene expression based on " \
-          "detected and filtered circRNA coordinates for %s" % bamfile
+    print("Counting host gene expression based on " \
+          "detected and filtered circRNA coordinates for %s" % bamfile)
 
     # launch the gene counting
     gc.comb_gen_count(circ_coor, bamfile, ref, output, countlinearsplicedreads)
@@ -787,7 +787,7 @@ def wrapfindcirc(files, tmp_dir, endTol, maxL, minL, strand=True, pairdendindepe
     sort = Fc.Sort()
     indx = id_generator()
     logging.info("started circRNA detection from file %s" % files)
-    print "started circRNA detection from file %s" % files
+    print("started circRNA detection from file %s" % files)
 
     if same:
         circfilename = files + indx + ".circRNA"
@@ -796,44 +796,44 @@ def wrapfindcirc(files, tmp_dir, endTol, maxL, minL, strand=True, pairdendindepe
     if pairdendindependent:
         f.printcircline(files, tmp_dir + "tmp_printcirclines." + indx)
 
-        print "\t=> separating duplicates [%s]" % files
+        print("\t=> separating duplicates [%s]" % files)
         f.sepDuplicates(tmp_dir + "tmp_printcirclines." + indx, tmp_dir + "tmp_duplicates." + indx,
                         tmp_dir + "tmp_nonduplicates." + indx)
 
         # Find small circles
-        print "\t=> locating small circRNAs [%s]" % files
+        print("\t=> locating small circRNAs [%s]" % files)
         f.smallcirc(tmp_dir + "tmp_duplicates." + indx, tmp_dir + "tmp_smallcircs." + indx)
 
         if strand:
             # Find normal circles
-            print "\t=> locating circRNAs (stranded mode) [%s]" % files
+            print("\t=> locating circRNAs (stranded mode) [%s]" % files)
             f.findcirc(tmp_dir + "tmp_nonduplicates." + indx, tmp_dir + "tmp_normalcircs." + indx, strand=True)
         else:
-            print "\t=> locating circRNAs (unstranded mode) [%s]" % files
+            print("\t=> locating circRNAs (unstranded mode) [%s]" % files)
             f.findcirc(tmp_dir + "tmp_nonduplicates." + indx, tmp_dir + "tmp_normalcircs." + indx, strand=False)
 
         # Merge small and normal circles
-        print "\t=> merging circRNAs [%s]" % files
+        print("\t=> merging circRNAs [%s]" % files)
         mergefiles(tmp_dir + "tmp_findcirc." + indx, tmp_dir + "tmp_smallcircs." + indx,
                    tmp_dir + "tmp_normalcircs." + indx)
     else:
         if strand:
-            print "\t=> locating circRNAs (stranded mode) [%s]" % files
+            print("\t=> locating circRNAs (stranded mode) [%s]" % files)
             f.findcirc(files, tmp_dir + "tmp_findcirc." + indx, strand=True)
         else:
-            print "\t=> locating circRNAs (unstranded mode) [%s]" % files
+            print("\t=> locating circRNAs (unstranded mode) [%s]" % files)
             f.findcirc(files, tmp_dir + "tmp_findcirc." + indx, strand=False)
 
     # Sort
     if strand:
-        print "\t=> sorting circRNAs (stranded mode) [%s]" % files
+        print("\t=> sorting circRNAs (stranded mode) [%s]" % files)
         sort.sort_count(tmp_dir + "tmp_findcirc." + indx, circfilename, strand=True)
     else:
-        print "\t=> sorting circRNAs (unstranded mode) [%s]" % files
+        print("\t=> sorting circRNAs (unstranded mode) [%s]" % files)
         sort.sort_count(tmp_dir + "tmp_findcirc." + indx, circfilename, strand=False)
 
     logging.info("finished circRNA detection from file %s" % files)
-    print "finished circRNA detection from file %s" % files
+    print("finished circRNA detection from file %s" % files)
 
     return circfilename
 
diff --git a/setup.py b/setup.py
index 86e944b..79a57ed 100644
--- a/setup.py
+++ b/setup.py
@@ -22,7 +22,7 @@
     # Versions should comply with PEP440.  For a discussion on single-sourcing
     # the version across setup.py and the project code, see
     # https://packaging.python.org/en/latest/single_source_version.html
-    version='0.4.8',
+    version='0.5.0',
 
     description='Detect circRNAs from chimeras',
     long_description=long_description,
@@ -60,12 +60,17 @@
         # Specify the Python versions you support here. In particular, ensure
         # that you indicate whether you support Python 2, Python 3 or both.
         # 'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.6',
-        'Programming Language :: Python :: 2.7',
+        # 'Programming Language :: Python :: 2.6',
+        # 'Programming Language :: Python :: 2.7',
         # 'Programming Language :: Python :: 3',
         # 'Programming Language :: Python :: 3.2',
         # 'Programming Language :: Python :: 3.3',
-        # 'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+
     ],
 
     # What does your project relate to?
@@ -80,13 +85,13 @@
     # requirements files see:
     # https://packaging.python.org/en/latest/requirements.html
     install_requires=[
-        'HTSeq>=0.11.0',
+        'HTSeq >= 0.11.0',
         'pysam >= 0.13',
-        'numpy<1.17.0',
-        'pandas<0.24.0'
+        'numpy',
+        'pandas'
     ],
 
-    python_requires='<3',
+    #python_requires='<3',
 
     # List additional groups of dependencies here (e.g. development
     # dependencies). You can install these using the following syntax,

From 71488454e338c182853cf7df1a02b8a605bcd915 Mon Sep 17 00:00:00 2001
From: tjakobi <tobias.jakobi@med.uni-heidelberg.de>
Date: Thu, 30 Jul 2020 14:10:07 +0200
Subject: [PATCH 2/6] Fixing HTSeq setup issue

---
 setup.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/setup.py b/setup.py
index 79a57ed..2f96788 100644
--- a/setup.py
+++ b/setup.py
@@ -80,18 +80,23 @@
     # simple. Or you can use find_packages().
     packages=['DCC'],
 
+    # setup_requires=['Cython','pysam','matplotlib'],
+
     # List run-time dependencies here.  These will be installed by pip when
     # your project is installed. For an analysis of "install_requires" vs pip's
     # requirements files see:
     # https://packaging.python.org/en/latest/requirements.html
-    install_requires=[
-        'HTSeq >= 0.11.0',
-        'pysam >= 0.13',
-        'numpy',
-        'pandas'
+     install_requires=[
+         'HTSeq',
+    #     'pysam >= 0.13',
+    #     'numpy',
+    #     'pandas',
+    #     'Cython'
     ],
 
-    #python_requires='<3',
+    #install_requires=read('requirements.txt').splitlines(),
+
+    # python_requires='<3',
 
     # List additional groups of dependencies here (e.g. development
     # dependencies). You can install these using the following syntax,

From 84633258303dd51fd3770b2a9409233d3de1b0a6 Mon Sep 17 00:00:00 2001
From: tjakobi <tobias.jakobi@med.uni-heidelberg.de>
Date: Thu, 30 Jul 2020 14:10:26 +0200
Subject: [PATCH 3/6] Fixing HTSeq setup issue

---
 requirements.txt | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..47fa4f1
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+pysam
+numpy
+pandas
+Cython

From 513c4d0f8b19ae83012740390acd1a82a028a77d Mon Sep 17 00:00:00 2001
From: tjakobi <tobias.jakobi@med.uni-heidelberg.de>
Date: Thu, 30 Jul 2020 14:12:06 +0200
Subject: [PATCH 4/6] Fixing imports

---
 DCC/Circ_nonCirc_Exon_Match.py |  2 +-
 DCC/__init__.py                | 16 ++++++++--------
 DCC/circAnnotate.py            |  2 +-
 DCC/circFilter.py              |  2 +-
 DCC/main.py                    | 14 +++++++-------
 5 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/DCC/Circ_nonCirc_Exon_Match.py b/DCC/Circ_nonCirc_Exon_Match.py
index 9e0b1fe..0009ee0 100644
--- a/DCC/Circ_nonCirc_Exon_Match.py
+++ b/DCC/Circ_nonCirc_Exon_Match.py
@@ -5,7 +5,7 @@
 
 import HTSeq
 
-from IntervalTree import IntervalTree
+from .IntervalTree import IntervalTree
 
 
 class CircNonCircExon(object):
diff --git a/DCC/__init__.py b/DCC/__init__.py
index 0bc0e72..dcfe6dc 100644
--- a/DCC/__init__.py
+++ b/DCC/__init__.py
@@ -1,9 +1,9 @@
 # Import modules
-from findcircRNA import Findcirc
-from circFilter import Circfilter
-from circAnnotate import CircAnnotate
-from genecount import Genecount
-from CombineCounts import Combine
-from Circ_nonCirc_Exon_Match import CircNonCircExon
-from IntervalTree import IntervalTree
-from main import main
+from .findcircRNA import Findcirc
+from .circFilter import Circfilter
+from .circAnnotate import CircAnnotate
+from .genecount import Genecount
+from .CombineCounts import Combine
+from .Circ_nonCirc_Exon_Match import CircNonCircExon
+from .IntervalTree import IntervalTree
+from .main import main
diff --git a/DCC/circAnnotate.py b/DCC/circAnnotate.py
index 0e5d4ef..0d7fefb 100644
--- a/DCC/circAnnotate.py
+++ b/DCC/circAnnotate.py
@@ -9,7 +9,7 @@
 
 import HTSeq
 
-from IntervalTree import IntervalTree
+from .IntervalTree import IntervalTree
 
 
 class CircAnnotate(object):
diff --git a/DCC/circFilter.py b/DCC/circFilter.py
index 528baa5..9f82f44 100644
--- a/DCC/circFilter.py
+++ b/DCC/circFilter.py
@@ -4,7 +4,7 @@
 
 import HTSeq
 
-from IntervalTree import IntervalTree
+from .IntervalTree import IntervalTree
 
 
 ##########################
diff --git a/DCC/main.py b/DCC/main.py
index 8cfabcb..5c26d4a 100644
--- a/DCC/main.py
+++ b/DCC/main.py
@@ -13,12 +13,12 @@
 
 import pysam
 
-import CombineCounts as Cc
-import circAnnotate as Ca
-import circFilter as Ft
-import findcircRNA as Fc
-import genecount as Gc
-from fix2chimera import Fix2Chimera
+from . import CombineCounts as Cc
+from . import circAnnotate as Ca
+from . import circFilter as Ft
+from . import findcircRNA as Fc
+from . import genecount as Gc
+from .fix2chimera import Fix2Chimera
 
 
 def main():
@@ -654,7 +654,7 @@ def getbamfname(junctionfname):
 
 # CircSkip junctions
 def findCircSkipJunction(CircCoordinates, tmp_dir, gtffile, circfiles, SJ_out_tab, strand=True, same=False):
-    from Circ_nonCirc_Exon_Match import CircNonCircExon
+    from .Circ_nonCirc_Exon_Match import CircNonCircExon
     CircSkipfiles = []
     CCEM = CircNonCircExon(tmp_dir)
     # Modify gtf file

From 864f93c7c79d99ebeb2e367e7fc451fd1d54ca7e Mon Sep 17 00:00:00 2001
From: tjakobi <tobias.jakobi@med.uni-heidelberg.de>
Date: Fri, 7 Aug 2020 12:48:26 +0200
Subject: [PATCH 5/6] Adding direct circtools interface

---
 DCC/main.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/DCC/main.py b/DCC/main.py
index 5c26d4a..b9aebb2 100644
--- a/DCC/main.py
+++ b/DCC/main.py
@@ -21,7 +21,7 @@
 from .fix2chimera import Fix2Chimera
 
 
-def main():
+def main(circtools_parser=None):
     version = "0.5.0"
 
     parser = argparse.ArgumentParser(prog="DCC", formatter_class=argparse.RawDescriptionHelpFormatter,
@@ -104,6 +104,10 @@ def main():
 
     parser.add_argument_group(group)
 
+    # called directly from circtools
+    if circtools_parser:
+        parser = circtools_parser
+
     options = parser.parse_args()
 
     timestr = time.strftime("%Y-%m-%d_%H%M")

From 07192e7efe028832d1fac0f1c8b70ccab91aba87 Mon Sep 17 00:00:00 2001
From: tjakobi <tobias.jakobi@med.uni-heidelberg.de>
Date: Wed, 12 Aug 2020 14:45:46 +0200
Subject: [PATCH 6/6] Updating setup.py

---
 setup.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/setup.py b/setup.py
index 2f96788..7560a93 100644
--- a/setup.py
+++ b/setup.py
@@ -13,7 +13,7 @@
 here = path.abspath(path.dirname(__file__))
 
 # Get the long description from the relevant file
-with open(path.join(here, 'DESCRIPTION.rst'), encoding='utf-8') as f:
+with open(path.join(here, 'README.rst')) as f:
     long_description = f.read()
 
 setup(
@@ -40,7 +40,7 @@
 
 
     # Choose your license
-    license='GNU General Public License (GPL)',
+    license='License :: OSI Approved :: GNU General Public License (GPL)',
 
     # See https://pypi.python.org/pypi?%3Aaction=list_classifiers
     classifiers=[
@@ -51,11 +51,11 @@
         'Development Status :: 5 - Production/Stable',
 
         # Indicate who your project is intended for
-        'Intended Audience :: Researchers',
-        'Topic :: Software Development :: Build Tools',
+        'Intended Audience :: Science/Research',
+        'Topic :: Scientific/Engineering :: Bio-Informatics',
 
         # Pick your license as you wish (should match "license" above)
-        'License :: GNU General Public License (GPL)',
+        'License :: OSI Approved :: GNU General Public License (GPL)',
 
         # Specify the Python versions you support here. In particular, ensure
         # that you indicate whether you support Python 2, Python 3 or both.
@@ -65,7 +65,7 @@
         # 'Programming Language :: Python :: 3',
         # 'Programming Language :: Python :: 3.2',
         # 'Programming Language :: Python :: 3.3',
-        'Programming Language :: Python :: 3.4',
+        # 'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
@@ -130,5 +130,12 @@
     },
     scripts=[
         'scripts/DCC',
-    ]
+    ],
+
+    project_urls={  # Optional
+        'Bug Reports': 'https://github.com/dieterich-lab/DCC/issues',
+        'Dieterich Lab': 'https://dieterichlab.org',
+        'Source': 'https://github.com/dieterich-lab/DCC',
+        'Documentation': 'http://docs.circ.tools'
+},
 )