From 6f1f25134fc1e996c7928acd30283e448e2ddb11 Mon Sep 17 00:00:00 2001 From: tjakobi Date: Thu, 30 Jul 2020 12:47:22 +0200 Subject: [PATCH 1/6] Stating Python3 porting --- DCC/Circ_nonCirc_Exon_Match.py | 8 ++-- DCC/CombineCounts.py | 4 +- DCC/IntervalTree.py | 2 +- DCC/circFilter.py | 4 +- DCC/findcircRNA.py | 8 ++-- DCC/fix2chimera.py | 10 ++--- DCC/genecount.py | 36 ++++++++-------- DCC/main.py | 76 +++++++++++++++++----------------- setup.py | 21 ++++++---- 9 files changed, 87 insertions(+), 82 deletions(-) diff --git a/DCC/Circ_nonCirc_Exon_Match.py b/DCC/Circ_nonCirc_Exon_Match.py index 6f27a0a..9e0b1fe 100644 --- a/DCC/Circ_nonCirc_Exon_Match.py +++ b/DCC/Circ_nonCirc_Exon_Match.py @@ -194,7 +194,7 @@ def printuniq(self, Infile): for lin in f: lin_split = lin.split('\t') if keys.count(lin_split[0] + '\t' + lin_split[1] + '\t' + lin_split[2]) == 1: - print lin.strip('\n') + print(lin.strip('\n')) def readgtf(self, gtf_file): # store nonCircExons based on transcript_id and exon_number with all its annotations from different transcripts @@ -275,7 +275,7 @@ def readHTSeqCount(self, HTSeqCount, exon_id2custom_exon_id): def findcircAdjacent(self, circExons, Custom_exon_id2Iv, Iv2Custom_exon_id, start=True): circAdjacentExons = {} circAdjacentExonsIv = {} - for key in circExons.keys(): + for key in list(circExons.keys()): for ids in circExons[key]: try: interval = Custom_exon_id2Iv[self.getAdjacent(ids, start=start)] @@ -292,7 +292,7 @@ def printCounts(self, Exons, Count_custom_exon_id, Custom_exon_id2Length): # Print the counts of circexons and adjacentexons # Exons: dictionaries with intervals as key, custom_exon_id as values ExonCounts = {} - for key in Exons.keys(): + for key in list(Exons.keys()): counts = [] for ids in Exons[key]: # If for circAdjacentExons, ids here is a list try: @@ -397,7 +397,7 @@ def readSJ_out_tab(self, SJ_out_tab): strand] = lin_split[6] sj.close() except IOError: - print 'Do you have SJ.out.tab files in your sample folder? DCC cannot find it.' + print('Do you have SJ.out.tab files in your sample folder? DCC cannot find it.') return junctionReadCount def getskipjunctionCount(self, exonskipjunctions, junctionReadCount): diff --git a/DCC/CombineCounts.py b/DCC/CombineCounts.py index 5bc3901..434ecb5 100644 --- a/DCC/CombineCounts.py +++ b/DCC/CombineCounts.py @@ -35,9 +35,9 @@ def comb_coor(self, circfiles, strand=True): onefile.close() if strand: - coors = ['\t'.join(key.split('\t')[:-1]) + value for key, value in coorsDict.iteritems()] + coors = ['\t'.join(key.split('\t')[:-1]) + value for key, value in coorsDict.items()] else: - coors = ['{}{}'.format(key, value) for key, value in coorsDict.iteritems()] + coors = ['{}{}'.format(key, value) for key, value in coorsDict.items()] coorsSorted = self.sortBed(coors, retList=True) for itm in coorsSorted: diff --git a/DCC/IntervalTree.py b/DCC/IntervalTree.py index e3333c9..789bc7a 100644 --- a/DCC/IntervalTree.py +++ b/DCC/IntervalTree.py @@ -36,7 +36,7 @@ def intersect(self, interval, report_func): # use the intersect method of IntervalNode class, need make this function aware of strand def traverse(self, func): - for item in self.chroms.itervalues(): + for item in self.chroms.values(): item.traverse(func) diff --git a/DCC/circFilter.py b/DCC/circFilter.py index c6f66e5..528baa5 100644 --- a/DCC/circFilter.py +++ b/DCC/circFilter.py @@ -63,7 +63,7 @@ def readcirc(self, countfile, coordinates): # Do filtering def filtercount(self, count, indx): - print 'Filtering by read counts' + print('Filtering by read counts') sel = [] # store the passed filtering rows for itm in range(len(count)): if indx[itm][4] == '0': @@ -117,7 +117,7 @@ def dummy_filter(self, indx0, count0): np.savetxt(self.tmp_dir + 'tmp_unsortedWithChrM', nonrep, delimiter='\t', newline='\n', fmt='%s') def removeChrM(self, withChrM): - print 'Remove ChrM' + print('Remove ChrM') unremoved = open(withChrM, 'r').readlines() removed = [] for lines in unremoved: diff --git a/DCC/findcircRNA.py b/DCC/findcircRNA.py index 20d45b4..9482236 100644 --- a/DCC/findcircRNA.py +++ b/DCC/findcircRNA.py @@ -78,7 +78,7 @@ def sepDuplicates(self, Chim_junc, duplicates, nonduplicates): if reads.count(read) == 2: dup.write(lines[indx]) elif reads.count(read) > 2: - print 'Read %s has more than 2 count.' % read + print('Read %s has more than 2 count.' % read) try: logging.warning('Read %s has more than 2 count.' % read) except NameError: @@ -159,8 +159,8 @@ def findcirc(self, Chim_junc, output, strand=True): linecnt = linecnt + 1 if len(L) < 14: - print ("WARNING: File " + str(Chim_junc) + ", line " + str(linecnt) + " does not contain all features.") - print ("WARNING: " + str(Chim_junc) + " is probably corrupt.") + print(("WARNING: File " + str(Chim_junc) + ", line " + str(linecnt) + " does not contain all features.")) + print(("WARNING: " + str(Chim_junc) + " is probably corrupt.")) if L[0] == "chr_donorA": continue if int(L[6]) >= 0 and L[0] == L[3] and L[2] == L[5] and ( @@ -217,7 +217,7 @@ def count(self, sortedlist, strand=True): elif not strand: circs = (itm[0], itm[1], itm[2]) else: - print "Please specify correct strand information." + print("Please specify correct strand information.") cnt[circs] += 1 itm.append(str(cnt[circs])) # tmp_count.append( [itm[0],itm[1],itm[2],itm[3],itm[7],itm[4],itm[5],itm[6]] ) diff --git a/DCC/fix2chimera.py b/DCC/fix2chimera.py index a031cc2..8d26a00 100644 --- a/DCC/fix2chimera.py +++ b/DCC/fix2chimera.py @@ -55,10 +55,10 @@ def modify_junctiontype(junctiontype): continue # check if the row has all fields if len(line_split) < 14: - print ("WARNING: File " + str(chimeric_junction_mate2) + ", line " + str(linecnt) - + " does not contain all features.") - print ("WARNING: " + str(chimeric_junction_mate2) + " is probably corrupt.") - print ("WARNING: Offending line: " + str(line)) + print(("WARNING: File " + str(chimeric_junction_mate2) + ", line " + str(linecnt) + + " does not contain all features.")) + print(("WARNING: " + str(chimeric_junction_mate2) + " is probably corrupt.")) + print(("WARNING: Offending line: " + str(line))) linecnt += 1 @@ -110,7 +110,7 @@ def printduplicates(self, merged, duplicates, field=10): if not os.path.isfile(merged): sys.exit("ERROR: File " + str(merged) + " is missing!") elif os.stat(merged).st_size == 0: - print ("WARNING: File " + str(merged) + " is empty!") + print(("WARNING: File " + str(merged) + " is empty!")) else: try: inputfile = open(merged, 'r') diff --git a/DCC/genecount.py b/DCC/genecount.py index ab7b0d4..2cd84e2 100644 --- a/DCC/genecount.py +++ b/DCC/genecount.py @@ -99,33 +99,33 @@ def genecount(self, circ_coordinates, bamfile, ref, tid): start_coordinates.close() end_coordinates.close() - print ('Started linear gene expression counting for %s' % bamfile) + print(('Started linear gene expression counting for %s' % bamfile)) start = time.time() # mpileup get the read counts of the start and end positions - print ("\t=> running mpileup for start positions [%s]" % bamfile) + print(("\t=> running mpileup for start positions [%s]" % bamfile)) mpileup_start = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_start_coordinates_' + tid) end = time.time() - start - print ("\t=> mpileup for start positions for %s took %d seconds" % (bamfile, end)) + print(("\t=> mpileup for start positions for %s took %d seconds" % (bamfile, end))) start = time.time() # mpileup get the read counts of the start and end positions - print ("\t=> running mpileup for end positions [%s]" % bamfile) + print(("\t=> running mpileup for end positions [%s]" % bamfile)) mpileup_end = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_end_coordinates_' + tid) end = time.time() - start - print ("\t=> mpileup for end positions for %s took %d seconds" % (bamfile, end)) + print(("\t=> mpileup for end positions for %s took %d seconds" % (bamfile, end))) - print "\t=> gathering read counts for start positions [%s]" % bamfile + print("\t=> gathering read counts for start positions [%s]" % bamfile) startcount = self.getreadscount(mpileup_start, countmapped=True) - print "\t=> gathering read counts for end positions [%s]" % bamfile + print("\t=> gathering read counts for end positions [%s]" % bamfile) endcount = self.getreadscount(mpileup_end, countmapped=True) # remove tmp files # os.remove(self.tmp_dir + 'tmp_start_coordinates_' + tid) # os.remove(self.tmp_dir + 'tmp_end_coordinates_' + tid) - print 'Finished linear gene expression counting for %s' % bamfile + print('Finished linear gene expression counting for %s' % bamfile) return startcount, endcount @@ -194,29 +194,29 @@ def linearsplicedreadscount(self, circ_coor, bamfile, ref, header=True): start_coor_1.close() end_coor.close() end_coor_1.close() - print ('Started linear spliced read counting for %s' % bamfile) + print(('Started linear spliced read counting for %s' % bamfile)) # mpileup get the number of spliced reads at circle start position and (start-1) position. - print ("\t=> running mpileup 1 for start positions [%s]" % bamfile) + print(("\t=> running mpileup 1 for start positions [%s]" % bamfile)) mpileup_start = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_start_coor_1') - print ("\t=> running mpileup 2 for start positions [%s]" % bamfile) + print(("\t=> running mpileup 2 for start positions [%s]" % bamfile)) mpileup_start_1 = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_start_coor_2') # mpileup get the number of spliced reads at circle end position and (end+1) position. - print ("\t=> running mpileup 1 for end positions [%s]" % bamfile) + print(("\t=> running mpileup 1 for end positions [%s]" % bamfile)) mpileup_end = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_end_coor_1') - print ("\t=> running mpileup 2 for end positions [%s]" % bamfile) + print(("\t=> running mpileup 2 for end positions [%s]" % bamfile)) mpileup_end_1 = pysam.mpileup(bamfile, '-f', ref, '-l', self.tmp_dir + 'tmp_end_coor_2') # get count - print "\t=> gathering read counts for start positions [%s]" % bamfile + print("\t=> gathering read counts for start positions [%s]" % bamfile) startcount = self.submpileup(self.getreadscount(mpileup_start_1), self.getreadscount(mpileup_start)) - print "\t=> gathering read counts for end positions [%s]" % bamfile + print("\t=> gathering read counts for end positions [%s]" % bamfile) endcount = self.submpileup(self.getreadscount(mpileup_end), self.getreadscount(mpileup_end_1), left=False) # remove tmp files @@ -225,7 +225,7 @@ def linearsplicedreadscount(self, circ_coor, bamfile, ref, header=True): # os.remove(self.tmp_dir + 'tmp_end_coor') # os.remove(self.tmp_dir + 'tmp_end_coor_1') - print 'Finished linear spliced read counting for %s' % bamfile + print('Finished linear spliced read counting for %s' % bamfile) return startcount, endcount @@ -266,7 +266,7 @@ def comb_gen_count(self, circ_coor, bamfile, ref, output, countlinearsplicedread # call genecount to get the start and end positon read counts tmp_start, tmp_end = self.genecount(circ_coor, bamfile, ref, tid) - print 'Ended linear gene expression counting %s' % bamfile + print('Ended linear gene expression counting %s' % bamfile) logging.info('Ended linear gene expression counting %s' % bamfile) for line in tmp_start: @@ -314,6 +314,6 @@ def comb_gen_count(self, circ_coor, bamfile, ref, output, countlinearsplicedread # tmp_end.close() count_table.close() - print 'Ended post processing %s' % bamfile + print('Ended post processing %s' % bamfile) logging.info('Ended post processing %s' % bamfile) return tid diff --git a/DCC/main.py b/DCC/main.py index 3fc40ef..8cfabcb 100644 --- a/DCC/main.py +++ b/DCC/main.py @@ -22,7 +22,7 @@ def main(): - version = "0.4.8" + version = "0.5.0" parser = argparse.ArgumentParser(prog="DCC", formatter_class=argparse.RawDescriptionHelpFormatter, fromfile_prefix_chars="@", @@ -49,7 +49,7 @@ def main(): help="Must be enabled for stranded libraries, aka 'fr-secondstrand' [default: False]") group.add_argument("-N", "--nonstrand", action="store_false", dest="strand", default=True, help="The library is non-stranded [default stranded]") - group.add_argument("-E", "--endTol", dest="endTol", type=int, default=5, choices=range(0, 10), + group.add_argument("-E", "--endTol", dest="endTol", type=int, default=5, choices=list(range(0, 10)), help="Maximum base pair tolerance of reads extending over junction sites [default: 5]") group.add_argument("-m", "--maximum", dest="max", type=int, default=1000000, help="The maximum length of candidate circRNAs (including introns) [default: 1000000]") @@ -112,12 +112,12 @@ def main(): try: os.makedirs(options.out_dir) except OSError: - print "Could not create output folder %s" % options.out_dir + print("Could not create output folder %s" % options.out_dir) logging.info("Could not create output folder %s" % options.out_dir) exit(-1) else: - print "Output folder %s already exists, reusing" % options.out_dir + print("Output folder %s already exists, reusing" % options.out_dir) # create temporary directory if not existing @@ -125,17 +125,17 @@ def main(): try: os.makedirs(options.tmp_dir) except OSError: - print "Could not create temporary folder %s" % options.tmp_dir + print("Could not create temporary folder %s" % options.tmp_dir) exit(-1) else: - print "Temporary folder %s already exists, reusing" % options.tmp_dir + print("Temporary folder %s already exists, reusing" % options.tmp_dir) logging.basicConfig(filename=os.path.join(options.out_dir, "DCC-" + timestr + ".log"), filemode="w", level=logging.DEBUG, format="%(asctime)s %(message)s") logging.info("DCC %s started" % version) - print "DCC %s started" % version + print("DCC %s started" % version) logging.info('DCC command line: ' + ' '.join(sys.argv)) # Get input file names @@ -143,7 +143,7 @@ def main(): options.Input = remove_empty_lines(options.Input) if (options.mate1 and not options.mate1) or (options.mate2 and not options.mate1) and options.pairedendindependent: - print "Only one mate data file supplied; check if both, -mt1 and -mt2 are specified." + print("Only one mate data file supplied; check if both, -mt1 and -mt2 are specified.") logging.info("Only one mate data file supplied; check if both, -mt1 and -mt2 are specified.") exit(-1) @@ -180,10 +180,10 @@ def main(): cpu_count = multiprocessing.cpu_count() if options.cpu_threads <= cpu_count: - print "%s CPU cores available, using %s" % (cpu_count, options.cpu_threads) + print("%s CPU cores available, using %s" % (cpu_count, options.cpu_threads)) else: - print "Only %s CPU cores available while %s requested, falling back to %s" % \ - (cpu_count, options.cpu_threads, cpu_count) + print("Only %s CPU cores available while %s requested, falling back to %s" % \ + (cpu_count, options.cpu_threads, cpu_count)) options.cpu_threads = cpu_count pool = multiprocessing.Pool(processes=options.cpu_threads) @@ -233,7 +233,7 @@ def main(): logging.info("Stranded data mode") else: logging.info("Non-stranded data, the strand of circRNAs guessed from the strand of host genes") - print "WARNING: non-stranded data, the strand of circRNAs guessed from the strand of host genes" + print("WARNING: non-stranded data, the strand of circRNAs guessed from the strand of host genes") # Start de novo circular RNA detection model # Create instances @@ -241,7 +241,7 @@ def main(): sort = Fc.Sort() if options.pairedendindependent: - print "Please make sure that the read pairs have been mapped both, combined and on a per mate basis" + print("Please make sure that the read pairs have been mapped both, combined and on a per mate basis") logging.info("Please make sure that the read pairs have been mapped both, combined and on a per mate basis") # Fix2chimera problem by STAR @@ -336,7 +336,7 @@ def main(): file2filter = options.filteronly[0] coorfile = options.filteronly[1] logging.info("Using files %s and %s for filtering" % (options.filteronly[0], options.filteronly[1])) - print "Using files %s and %s for filtering" % (options.filteronly[0], options.filteronly[1]) + print("Using files %s and %s for filtering" % (options.filteronly[0], options.filteronly[1])) except IndexError: logging.error("Program exit because input error. Please check the input. If only use the program " @@ -354,7 +354,7 @@ def main(): file2filter = options.tmp_dir + "tmp_circCount" coorfile = options.tmp_dir + "tmp_coordinates" logging.info("Using files _tmp_DCC/tmp_circCount and _tmp_DCC/tmp_coordinates for filtering") - print "Using files _tmp_DCC/tmp_circCount and _tmp_DCC/tmp_coordinates for filtering" + print("Using files _tmp_DCC/tmp_circCount and _tmp_DCC/tmp_coordinates for filtering") if options.rep_file: rep_file = options.rep_file @@ -420,17 +420,17 @@ def main(): if options.gene: # import the list of bamfile names as a file if not options.bam: - print "No BAM files provided (-B) trying to automatically guess BAM file names" + print("No BAM files provided (-B) trying to automatically guess BAM file names") logging.info("No BAM files provided (-B) trying to automatically guess BAM file names") bamfiles = convertjunctionfile2bamfile(options.Input) if not bamfiles: - print "Could not guess BAM file names, please provides them manually via -B" + print("Could not guess BAM file names, please provides them manually via -B") logging.info("Could not guess BAM file names, please provides them manually via -B") else: bamfiles = remove_empty_lines(options.bam) if not options.refseq: - print "Please provide reference sequence, program will not count host gene expression" + print("Please provide reference sequence, program will not count host gene expression") logging.warning("Please provide reference sequence, program will not count host gene expression") if options.refseq: @@ -446,7 +446,7 @@ def main(): logging.error("The following BAM files seem to be not sorted by coordinate or are missing an index:") logging.error(', '.join(unsortedBAMS)) print("The following BAM files seem to be not sorted by coordinate or are missing an index:") - print(', '.join(unsortedBAMS)) + print((', '.join(unsortedBAMS))) sys.exit("Error: not all BAM files are sorted by coordinate or are missing indices") else: # For each sample (each bamfile), do one host gene count, and then combine to a single table @@ -504,13 +504,13 @@ def main(): try: os.rmdir(options.tmp_dir) except OSError: - print "Could not delete temporary folder %s: not empty" % options.tmp_dir + print("Could not delete temporary folder %s: not empty" % options.tmp_dir) logging.info("Could not delete temporary folder %s: not empty" % options.tmp_dir) try: os.rmdir(options.out_dir) except OSError: - print "Not deleting output folder %s: contains files" % options.out_dir + print("Not deleting output folder %s: contains files" % options.out_dir) logging.info("Not deleting output folder %s: contains files" % options.out_dir) print("Temporary files deleted") @@ -543,7 +543,7 @@ def checkfile(filename, previousstate): sys.exit("ERROR: Required file " + str(filename) + " is missing, exiting") # check for file content elif os.stat(filename).st_size == 0: - print ("WARNING: File " + str(filename) + " is empty!") + print(("WARNING: File " + str(filename) + " is empty!")) return True return previousstate @@ -591,8 +591,8 @@ def checkjunctionfiles(joinedfnames, mate1filenames, mate2filenames, pairedendin logging.warning('Input file lists have different length (mate 1 %d, mate 2 %d, joined %d).' % ( len(mate1filenames), len(mate2filenames), len(joinedfnames))) - print('Input file lists have different length (mate 1 %d, mate 2 %d, joined %d).' % ( - len(mate1filenames), len(mate2filenames), len(joinedfnames))) + print(('Input file lists have different length (mate 1 %d, mate 2 %d, joined %d).' % ( + len(mate1filenames), len(mate2filenames), len(joinedfnames)))) if skipcirc: logging.warning('Junction files seem empty, skipping circRNA detection module.') @@ -724,7 +724,7 @@ def checkBAMsorting(bamfiles): try: bamfile.check_index() except ValueError: - print "BAM file %s has no index (%s.bai is missing)" % (file, file) + print("BAM file %s has no index (%s.bai is missing)" % (file, file)) logging.info("BAM file %s has no index (%s.bai is missing)" % (file, file)) unsortedBAMs.append(file) break @@ -769,8 +769,8 @@ def wraphostgenecount(bamfile, tmp_dir, circ_coor, ref, countlinearsplicedreads= # create an (temporary) output file based on tid and file name output = tmp_dir + "tmp_" + os.path.basename(bamfile) + "_" + tid + "_junction.linear" - print "Counting host gene expression based on " \ - "detected and filtered circRNA coordinates for %s" % bamfile + print("Counting host gene expression based on " \ + "detected and filtered circRNA coordinates for %s" % bamfile) # launch the gene counting gc.comb_gen_count(circ_coor, bamfile, ref, output, countlinearsplicedreads) @@ -787,7 +787,7 @@ def wrapfindcirc(files, tmp_dir, endTol, maxL, minL, strand=True, pairdendindepe sort = Fc.Sort() indx = id_generator() logging.info("started circRNA detection from file %s" % files) - print "started circRNA detection from file %s" % files + print("started circRNA detection from file %s" % files) if same: circfilename = files + indx + ".circRNA" @@ -796,44 +796,44 @@ def wrapfindcirc(files, tmp_dir, endTol, maxL, minL, strand=True, pairdendindepe if pairdendindependent: f.printcircline(files, tmp_dir + "tmp_printcirclines." + indx) - print "\t=> separating duplicates [%s]" % files + print("\t=> separating duplicates [%s]" % files) f.sepDuplicates(tmp_dir + "tmp_printcirclines." + indx, tmp_dir + "tmp_duplicates." + indx, tmp_dir + "tmp_nonduplicates." + indx) # Find small circles - print "\t=> locating small circRNAs [%s]" % files + print("\t=> locating small circRNAs [%s]" % files) f.smallcirc(tmp_dir + "tmp_duplicates." + indx, tmp_dir + "tmp_smallcircs." + indx) if strand: # Find normal circles - print "\t=> locating circRNAs (stranded mode) [%s]" % files + print("\t=> locating circRNAs (stranded mode) [%s]" % files) f.findcirc(tmp_dir + "tmp_nonduplicates." + indx, tmp_dir + "tmp_normalcircs." + indx, strand=True) else: - print "\t=> locating circRNAs (unstranded mode) [%s]" % files + print("\t=> locating circRNAs (unstranded mode) [%s]" % files) f.findcirc(tmp_dir + "tmp_nonduplicates." + indx, tmp_dir + "tmp_normalcircs." + indx, strand=False) # Merge small and normal circles - print "\t=> merging circRNAs [%s]" % files + print("\t=> merging circRNAs [%s]" % files) mergefiles(tmp_dir + "tmp_findcirc." + indx, tmp_dir + "tmp_smallcircs." + indx, tmp_dir + "tmp_normalcircs." + indx) else: if strand: - print "\t=> locating circRNAs (stranded mode) [%s]" % files + print("\t=> locating circRNAs (stranded mode) [%s]" % files) f.findcirc(files, tmp_dir + "tmp_findcirc." + indx, strand=True) else: - print "\t=> locating circRNAs (unstranded mode) [%s]" % files + print("\t=> locating circRNAs (unstranded mode) [%s]" % files) f.findcirc(files, tmp_dir + "tmp_findcirc." + indx, strand=False) # Sort if strand: - print "\t=> sorting circRNAs (stranded mode) [%s]" % files + print("\t=> sorting circRNAs (stranded mode) [%s]" % files) sort.sort_count(tmp_dir + "tmp_findcirc." + indx, circfilename, strand=True) else: - print "\t=> sorting circRNAs (unstranded mode) [%s]" % files + print("\t=> sorting circRNAs (unstranded mode) [%s]" % files) sort.sort_count(tmp_dir + "tmp_findcirc." + indx, circfilename, strand=False) logging.info("finished circRNA detection from file %s" % files) - print "finished circRNA detection from file %s" % files + print("finished circRNA detection from file %s" % files) return circfilename diff --git a/setup.py b/setup.py index 86e944b..79a57ed 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ # Versions should comply with PEP440. For a discussion on single-sourcing # the version across setup.py and the project code, see # https://packaging.python.org/en/latest/single_source_version.html - version='0.4.8', + version='0.5.0', description='Detect circRNAs from chimeras', long_description=long_description, @@ -60,12 +60,17 @@ # Specify the Python versions you support here. In particular, ensure # that you indicate whether you support Python 2, Python 3 or both. # 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', + # 'Programming Language :: Python :: 2.6', + # 'Programming Language :: Python :: 2.7', # 'Programming Language :: Python :: 3', # 'Programming Language :: Python :: 3.2', # 'Programming Language :: Python :: 3.3', - # 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + ], # What does your project relate to? @@ -80,13 +85,13 @@ # requirements files see: # https://packaging.python.org/en/latest/requirements.html install_requires=[ - 'HTSeq>=0.11.0', + 'HTSeq >= 0.11.0', 'pysam >= 0.13', - 'numpy<1.17.0', - 'pandas<0.24.0' + 'numpy', + 'pandas' ], - python_requires='<3', + #python_requires='<3', # List additional groups of dependencies here (e.g. development # dependencies). You can install these using the following syntax, From 71488454e338c182853cf7df1a02b8a605bcd915 Mon Sep 17 00:00:00 2001 From: tjakobi Date: Thu, 30 Jul 2020 14:10:07 +0200 Subject: [PATCH 2/6] Fixing HTSeq setup issue --- setup.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index 79a57ed..2f96788 100644 --- a/setup.py +++ b/setup.py @@ -80,18 +80,23 @@ # simple. Or you can use find_packages(). packages=['DCC'], + # setup_requires=['Cython','pysam','matplotlib'], + # List run-time dependencies here. These will be installed by pip when # your project is installed. For an analysis of "install_requires" vs pip's # requirements files see: # https://packaging.python.org/en/latest/requirements.html - install_requires=[ - 'HTSeq >= 0.11.0', - 'pysam >= 0.13', - 'numpy', - 'pandas' + install_requires=[ + 'HTSeq', + # 'pysam >= 0.13', + # 'numpy', + # 'pandas', + # 'Cython' ], - #python_requires='<3', + #install_requires=read('requirements.txt').splitlines(), + + # python_requires='<3', # List additional groups of dependencies here (e.g. development # dependencies). You can install these using the following syntax, From 84633258303dd51fd3770b2a9409233d3de1b0a6 Mon Sep 17 00:00:00 2001 From: tjakobi Date: Thu, 30 Jul 2020 14:10:26 +0200 Subject: [PATCH 3/6] Fixing HTSeq setup issue --- requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..47fa4f1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +pysam +numpy +pandas +Cython From 513c4d0f8b19ae83012740390acd1a82a028a77d Mon Sep 17 00:00:00 2001 From: tjakobi Date: Thu, 30 Jul 2020 14:12:06 +0200 Subject: [PATCH 4/6] Fixing imports --- DCC/Circ_nonCirc_Exon_Match.py | 2 +- DCC/__init__.py | 16 ++++++++-------- DCC/circAnnotate.py | 2 +- DCC/circFilter.py | 2 +- DCC/main.py | 14 +++++++------- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/DCC/Circ_nonCirc_Exon_Match.py b/DCC/Circ_nonCirc_Exon_Match.py index 9e0b1fe..0009ee0 100644 --- a/DCC/Circ_nonCirc_Exon_Match.py +++ b/DCC/Circ_nonCirc_Exon_Match.py @@ -5,7 +5,7 @@ import HTSeq -from IntervalTree import IntervalTree +from .IntervalTree import IntervalTree class CircNonCircExon(object): diff --git a/DCC/__init__.py b/DCC/__init__.py index 0bc0e72..dcfe6dc 100644 --- a/DCC/__init__.py +++ b/DCC/__init__.py @@ -1,9 +1,9 @@ # Import modules -from findcircRNA import Findcirc -from circFilter import Circfilter -from circAnnotate import CircAnnotate -from genecount import Genecount -from CombineCounts import Combine -from Circ_nonCirc_Exon_Match import CircNonCircExon -from IntervalTree import IntervalTree -from main import main +from .findcircRNA import Findcirc +from .circFilter import Circfilter +from .circAnnotate import CircAnnotate +from .genecount import Genecount +from .CombineCounts import Combine +from .Circ_nonCirc_Exon_Match import CircNonCircExon +from .IntervalTree import IntervalTree +from .main import main diff --git a/DCC/circAnnotate.py b/DCC/circAnnotate.py index 0e5d4ef..0d7fefb 100644 --- a/DCC/circAnnotate.py +++ b/DCC/circAnnotate.py @@ -9,7 +9,7 @@ import HTSeq -from IntervalTree import IntervalTree +from .IntervalTree import IntervalTree class CircAnnotate(object): diff --git a/DCC/circFilter.py b/DCC/circFilter.py index 528baa5..9f82f44 100644 --- a/DCC/circFilter.py +++ b/DCC/circFilter.py @@ -4,7 +4,7 @@ import HTSeq -from IntervalTree import IntervalTree +from .IntervalTree import IntervalTree ########################## diff --git a/DCC/main.py b/DCC/main.py index 8cfabcb..5c26d4a 100644 --- a/DCC/main.py +++ b/DCC/main.py @@ -13,12 +13,12 @@ import pysam -import CombineCounts as Cc -import circAnnotate as Ca -import circFilter as Ft -import findcircRNA as Fc -import genecount as Gc -from fix2chimera import Fix2Chimera +from . import CombineCounts as Cc +from . import circAnnotate as Ca +from . import circFilter as Ft +from . import findcircRNA as Fc +from . import genecount as Gc +from .fix2chimera import Fix2Chimera def main(): @@ -654,7 +654,7 @@ def getbamfname(junctionfname): # CircSkip junctions def findCircSkipJunction(CircCoordinates, tmp_dir, gtffile, circfiles, SJ_out_tab, strand=True, same=False): - from Circ_nonCirc_Exon_Match import CircNonCircExon + from .Circ_nonCirc_Exon_Match import CircNonCircExon CircSkipfiles = [] CCEM = CircNonCircExon(tmp_dir) # Modify gtf file From 864f93c7c79d99ebeb2e367e7fc451fd1d54ca7e Mon Sep 17 00:00:00 2001 From: tjakobi Date: Fri, 7 Aug 2020 12:48:26 +0200 Subject: [PATCH 5/6] Adding direct circtools interface --- DCC/main.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/DCC/main.py b/DCC/main.py index 5c26d4a..b9aebb2 100644 --- a/DCC/main.py +++ b/DCC/main.py @@ -21,7 +21,7 @@ from .fix2chimera import Fix2Chimera -def main(): +def main(circtools_parser=None): version = "0.5.0" parser = argparse.ArgumentParser(prog="DCC", formatter_class=argparse.RawDescriptionHelpFormatter, @@ -104,6 +104,10 @@ def main(): parser.add_argument_group(group) + # called directly from circtools + if circtools_parser: + parser = circtools_parser + options = parser.parse_args() timestr = time.strftime("%Y-%m-%d_%H%M") From 07192e7efe028832d1fac0f1c8b70ccab91aba87 Mon Sep 17 00:00:00 2001 From: tjakobi Date: Wed, 12 Aug 2020 14:45:46 +0200 Subject: [PATCH 6/6] Updating setup.py --- setup.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/setup.py b/setup.py index 2f96788..7560a93 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ here = path.abspath(path.dirname(__file__)) # Get the long description from the relevant file -with open(path.join(here, 'DESCRIPTION.rst'), encoding='utf-8') as f: +with open(path.join(here, 'README.rst')) as f: long_description = f.read() setup( @@ -40,7 +40,7 @@ # Choose your license - license='GNU General Public License (GPL)', + license='License :: OSI Approved :: GNU General Public License (GPL)', # See https://pypi.python.org/pypi?%3Aaction=list_classifiers classifiers=[ @@ -51,11 +51,11 @@ 'Development Status :: 5 - Production/Stable', # Indicate who your project is intended for - 'Intended Audience :: Researchers', - 'Topic :: Software Development :: Build Tools', + 'Intended Audience :: Science/Research', + 'Topic :: Scientific/Engineering :: Bio-Informatics', # Pick your license as you wish (should match "license" above) - 'License :: GNU General Public License (GPL)', + 'License :: OSI Approved :: GNU General Public License (GPL)', # Specify the Python versions you support here. In particular, ensure # that you indicate whether you support Python 2, Python 3 or both. @@ -65,7 +65,7 @@ # 'Programming Language :: Python :: 3', # 'Programming Language :: Python :: 3.2', # 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', + # 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', @@ -130,5 +130,12 @@ }, scripts=[ 'scripts/DCC', - ] + ], + + project_urls={ # Optional + 'Bug Reports': 'https://github.com/dieterich-lab/DCC/issues', + 'Dieterich Lab': 'https://dieterichlab.org', + 'Source': 'https://github.com/dieterich-lab/DCC', + 'Documentation': 'http://docs.circ.tools' +}, )