diff --git a/benchmark.sh b/manuscript_demo/benchmark.sh
similarity index 100%
rename from benchmark.sh
rename to manuscript_demo/benchmark.sh
diff --git a/scripts/R2_annotate.R b/scripts/deprecated/R2_annotate.R
similarity index 100%
rename from scripts/R2_annotate.R
rename to scripts/deprecated/R2_annotate.R
diff --git a/scripts/R2_annotate_retainTranscriptVersion.R b/scripts/deprecated/R2_annotate_retainTranscriptVersion.R
similarity index 100%
rename from scripts/R2_annotate_retainTranscriptVersion.R
rename to scripts/deprecated/R2_annotate_retainTranscriptVersion.R
diff --git a/scripts/R2_lift.R b/scripts/deprecated/R2_lift.R
similarity index 100%
rename from scripts/R2_lift.R
rename to scripts/deprecated/R2_lift.R
diff --git a/scripts/deprecated/R2_lift_keepTranscriptVersion.R b/scripts/deprecated/R2_lift_keepTranscriptVersion.R
new file mode 100644
index 0000000..3738968
--- /dev/null
+++ b/scripts/deprecated/R2_lift_keepTranscriptVersion.R
@@ -0,0 +1,138 @@
+#!/usr/bin/env Rscript
+args = commandArgs(trailingOnly=TRUE)
+
+# test if there is at least one argument: if not, return an error
+if (length(args)!=3) {
+ stop("\nUsage: Rscript lift.R /path/to/foo.bed /path/to/bar.gtf /path/to/output.bed", call.=FALSE)
+}
+
+suppressMessages(suppressWarnings(library(GenomicFeatures, warn.conflicts = F, quietly = T)))
+suppressMessages(suppressWarnings(library(rtracklayer, warn.conflicts = F, quietly = T)))
+suppressMessages(suppressWarnings(library(tidyverse, warn.conflicts = F, quietly = T)))
+
+
+################################################################################
+################################################################################
+################################################################################
+
+# import bed file of transcriptome alignments
+mappedLocus <- read_tsv(file = args[1], col_names = T, guess_max = 999999999999, col_types = "fddfff") %>%
+ dplyr::rename(transcript_id = 1) %>%
+ # mutate(transcript_id = gsub("\\..*","",transcript_id)) %>%
+ dplyr::rename(tx_coord_start = 2) %>%
+ dplyr::rename(tx_coord_end = 3) %>%
+ dplyr::rename(name = 4, score = 5, strand = 6)
+
+
+# collect the column names of columns 7+
+targetNames <- colnames(mappedLocus)[c(4,7:length(colnames(mappedLocus)))]
+
+# merge columns c(4,7+)
+mappedLocus <- unite(mappedLocus, metaname, c(4,7:length(colnames(mappedLocus))), sep = ">_>", remove = TRUE, na.rm = FALSE)
+
+# deselect strand
+mappedLocus <- mappedLocus %>% dplyr::select(-6)
+
+##################################################
+
+# fetch transcript structures from transcriptome annotation
+
+# read in reference transcripts
+gtf <- makeTxDbFromGFF(file=args[2], format = "gtf")
+
+# make an exon database from the reference transcripts
+exons <- exonsBy(gtf, "tx", use.names=TRUE)
+
+# remove transcript versions from the transcript names
+#fixedNames <- names(exons) %>% as_tibble() %>% mutate(value = gsub("\\..*","",value)) %>% pull(value)
+fixedNames <- names(exons) %>% as_tibble() %>% pull(value)
+names(exons) <- fixedNames
+
+# prepare the exons
+exons_tib <- as_tibble(as(exons, "data.frame"))
+
+# make lookup table for strand
+print("preparing strand lookup table")
+strand_lookup <- exons_tib %>%
+ dplyr::rename(transcript_id = group_name) %>%
+ dplyr::select(transcript_id, strand) %>% dplyr::distinct() %>%
+ # mutate(transcript_id = gsub("\\..*","", transcript_id)) %>%
+ dplyr::distinct()
+
+##################################################
+
+# attach the correct strand to the bed sites
+print("Repairing strand")
+mappedLocus_fixedStrand <- inner_join(mappedLocus, strand_lookup, by = "transcript_id") %>%
+ mutate(score = ".", .after = metaname)
+
+# diagnostic print statements
+# print("mappedLocus_fixedStrand")
+# print(head(mappedLocus_fixedStrand))
+
+# write out the strand-repaired file as a temporary file
+print("writing strand bedfile")
+
+# Filter for rows where columns 1-3 or 6 are NA
+na_rows <- rowSums(is.na(mappedLocus_fixedStrand[c(1, 2, 3, 6)])) > 0
+na_count <- sum(na_rows)
+
+# Filter for rows where col3 - col2 is not greater than 1
+diff_not_greater <- (mappedLocus_fixedStrand$tx_coord_end - mappedLocus_fixedStrand$tx_coord_start) <= 0
+diff_count <- sum(diff_not_greater)
+
+# Combine both filters
+filters <- na_rows | diff_not_greater
+
+# Filter the data
+filtered_data <- mappedLocus_fixedStrand[!filters, ]
+
+# Write the filtered data to a TSV file
+write_tsv(filtered_data, args[3], col_names = FALSE)
+
+# Print the count of omitted rows
+cat("Omitted", na_count, "rows due to NAs in columns 1-3 or 6.\n")
+cat("Omitted", diff_count, "rows due to zero or negative width. \n")
+
+
+
+##################################################
+
+# read in the bed sites with corrected strand
+print("importing strand bedfile")
+mappedLocus <- import.bed(args[3])
+
+# map transcript coordinates to genome
+print("mapping transcript coordinates to genome")
+genomeLocus <- mapFromTranscripts(x=mappedLocus, transcripts=exons)
+
+# bind score to output
+# the score column contains cheui-specific output (e.g. stoich, prob, coverage, which we aggregate into the score column and delimit using semicolons)
+print("binding output")
+mcols(genomeLocus)<-cbind(mcols(genomeLocus),DataFrame(mappedLocus[genomeLocus$xHits]))
+
+# convert output to tibble
+genome_coordinates = as_tibble(as(genomeLocus, "data.frame"))
+
+# prepare the output by selecting bed-like coordinates from
+print("filtering output")
+output <- genome_coordinates %>% dplyr::select(seqnames, start, end, X.name, X.seqnames, strand) %>%
+ unique() %>%
+ dplyr::rename(chr = seqnames, data = X.name, transcript = X.seqnames) %>%
+ mutate(score = ".") %>%
+ dplyr::select(chr, start, end, transcript, score, strand, data) %>%
+ mutate(
+ start = ifelse(strand == "-", start - 2, start),
+ end = ifelse(strand == "+", end + 1, end - 1)
+ )
+
+# separate the output
+output <- output %>% separate(data, sep = ">_>", into = targetNames) %>%
+ # dplyr::select(chr, start, end, name, score, strand) %>%
+ dplyr::rename("#chr" = chr)
+
+##################################################
+
+# write the output
+print("writing final output")
+write_tsv(output, args[3], col_names = T, append = FALSE)
diff --git a/scripts/cheui_diff_to_bed.sh b/scripts/deprecated/cheui_diff_to_bed.sh
similarity index 100%
rename from scripts/cheui_diff_to_bed.sh
rename to scripts/deprecated/cheui_diff_to_bed.sh
diff --git a/scripts/deprecated/compare.ipynb b/scripts/deprecated/compare.ipynb
new file mode 100644
index 0000000..2e3e5ca
--- /dev/null
+++ b/scripts/deprecated/compare.ipynb
@@ -0,0 +1,699 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "0ef0f028-b645-48c1-ad19-313f8cf50333",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "# Define the working directory and file paths\n",
+ "wd = \"/g/data/lf10/as7425/R2DTool_demo/\"\n",
+ "file1 = f\"{wd}/methylation_calls_annotated.bed\"\n",
+ "file2 = f\"{wd}/methylation_calls_annotated_R.bed\"\n",
+ "\n",
+ "# Read the files into Pandas dataframes\n",
+ "df1 = pd.read_csv(file1, sep='\\t', low_memory=False)\n",
+ "df2 = pd.read_csv(file2, sep='\\t', low_memory=False)\n",
+ "\n",
+ "# Assuming you want to match 'transcript' in df1 with 'transcript_id' in df2\n",
+ "df2.rename(columns={'transcript_id': 'transcript'}, inplace=True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "cea4bd5d-c5aa-4a90-b5e5-199d8eda77d2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " transcript start end name score strand motif coverage \\\n",
+ "0 ENST00000000233 1007 1008 . . + CTTGAGTAA 648 \n",
+ "1 ENST00000000233 1011 1012 . . + AGTAATAAA 628 \n",
+ "2 ENST00000000233 137 138 . . + AAGCAGATG 467 \n",
+ "3 ENST00000000233 151 152 . . + TCTCATGGT 608 \n",
+ "4 ENST00000000233 164 165 . . + TTGGATGCG 544 \n",
+ "\n",
+ " stoichiometry probability ... transcript_biotype tx_len cds_start \\\n",
+ "0 0.1013215859030837 0.118574 ... protein_coding 1032 88.0 \n",
+ "1 0.3223684210526316 0.547572 ... protein_coding 1032 88.0 \n",
+ "2 0.2560240963855422 0.363126 ... protein_coding 1032 88.0 \n",
+ "3 0.5113636363636364 0.421296 ... protein_coding 1032 88.0 \n",
+ "4 0.17433414043583534 0.367664 ... protein_coding 1032 88.0 \n",
+ "\n",
+ " cds_end tx_end transcript_metacoordinate abs_cds_start abs_cds_end \\\n",
+ "0 628.0 1032.0 2.93812 919.0 379.0 \n",
+ "1 628.0 1032.0 2.94802 923.0 383.0 \n",
+ "2 628.0 1032.0 1.09074 49.0 -491.0 \n",
+ "3 628.0 1032.0 1.11667 63.0 -477.0 \n",
+ "4 628.0 1032.0 1.14074 76.0 -464.0 \n",
+ "\n",
+ " up_junc_dist down_junc_dist \n",
+ "0 463.0 NaN \n",
+ "1 467.0 NaN \n",
+ "2 NaN 18.0 \n",
+ "3 NaN 4.0 \n",
+ "4 9.0 72.0 \n",
+ "\n",
+ "[5 rows x 22 columns]\n"
+ ]
+ }
+ ],
+ "source": [
+ "df1['transcript'] = df1['transcript'].str.split('.').str[0]\n",
+ "print(df1.head())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "8757c384-a36b-4985-9eb0-296d04c7e9c0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " transcript | \n",
+ " start | \n",
+ " end | \n",
+ " name | \n",
+ " score | \n",
+ " strand | \n",
+ " motif | \n",
+ " coverage | \n",
+ " stoichiometry | \n",
+ " probability | \n",
+ " ... | \n",
+ " utr5_len | \n",
+ " utr3_len | \n",
+ " cds_start | \n",
+ " cds_end | \n",
+ " tx_end | \n",
+ " transcript_metacoordinate | \n",
+ " abs_cds_start | \n",
+ " abs_cds_end | \n",
+ " up_junc_dist | \n",
+ " down_junc_dist | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ENST00000000233 | \n",
+ " 1007 | \n",
+ " 1008 | \n",
+ " . | \n",
+ " . | \n",
+ " + | \n",
+ " CTTGAGTAA | \n",
+ " 648 | \n",
+ " 0.1013215859030837 | \n",
+ " 0.118574 | \n",
+ " ... | \n",
+ " 88 | \n",
+ " 401 | \n",
+ " 88 | \n",
+ " 631 | \n",
+ " 1032 | \n",
+ " 2.937656 | \n",
+ " 919 | \n",
+ " 376 | \n",
+ " 463.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " ENST00000000233 | \n",
+ " 1011 | \n",
+ " 1012 | \n",
+ " . | \n",
+ " . | \n",
+ " + | \n",
+ " AGTAATAAA | \n",
+ " 628 | \n",
+ " 0.3223684210526316 | \n",
+ " 0.547572 | \n",
+ " ... | \n",
+ " 88 | \n",
+ " 401 | \n",
+ " 88 | \n",
+ " 631 | \n",
+ " 1032 | \n",
+ " 2.947631 | \n",
+ " 923 | \n",
+ " 380 | \n",
+ " 467.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ENST00000000233 | \n",
+ " 137 | \n",
+ " 138 | \n",
+ " . | \n",
+ " . | \n",
+ " + | \n",
+ " AAGCAGATG | \n",
+ " 467 | \n",
+ " 0.2560240963855422 | \n",
+ " 0.363126 | \n",
+ " ... | \n",
+ " 88 | \n",
+ " 401 | \n",
+ " 88 | \n",
+ " 631 | \n",
+ " 1032 | \n",
+ " 1.090239 | \n",
+ " 49 | \n",
+ " -494 | \n",
+ " NaN | \n",
+ " 18.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ENST00000000233 | \n",
+ " 151 | \n",
+ " 152 | \n",
+ " . | \n",
+ " . | \n",
+ " + | \n",
+ " TCTCATGGT | \n",
+ " 608 | \n",
+ " 0.5113636363636364 | \n",
+ " 0.421296 | \n",
+ " ... | \n",
+ " 88 | \n",
+ " 401 | \n",
+ " 88 | \n",
+ " 631 | \n",
+ " 1032 | \n",
+ " 1.116022 | \n",
+ " 63 | \n",
+ " -480 | \n",
+ " NaN | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " ENST00000000233 | \n",
+ " 164 | \n",
+ " 165 | \n",
+ " . | \n",
+ " . | \n",
+ " + | \n",
+ " TTGGATGCG | \n",
+ " 544 | \n",
+ " 0.17433414043583534 | \n",
+ " 0.367664 | \n",
+ " ... | \n",
+ " 88 | \n",
+ " 401 | \n",
+ " 88 | \n",
+ " 631 | \n",
+ " 1032 | \n",
+ " 1.139963 | \n",
+ " 76 | \n",
+ " -467 | \n",
+ " 9.0 | \n",
+ " 72.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 25 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " transcript start end name score strand motif coverage \\\n",
+ "0 ENST00000000233 1007 1008 . . + CTTGAGTAA 648 \n",
+ "1 ENST00000000233 1011 1012 . . + AGTAATAAA 628 \n",
+ "2 ENST00000000233 137 138 . . + AAGCAGATG 467 \n",
+ "3 ENST00000000233 151 152 . . + TCTCATGGT 608 \n",
+ "4 ENST00000000233 164 165 . . + TTGGATGCG 544 \n",
+ "\n",
+ " stoichiometry probability ... utr5_len utr3_len cds_start cds_end \\\n",
+ "0 0.1013215859030837 0.118574 ... 88 401 88 631 \n",
+ "1 0.3223684210526316 0.547572 ... 88 401 88 631 \n",
+ "2 0.2560240963855422 0.363126 ... 88 401 88 631 \n",
+ "3 0.5113636363636364 0.421296 ... 88 401 88 631 \n",
+ "4 0.17433414043583534 0.367664 ... 88 401 88 631 \n",
+ "\n",
+ " tx_end transcript_metacoordinate abs_cds_start abs_cds_end \\\n",
+ "0 1032 2.937656 919 376 \n",
+ "1 1032 2.947631 923 380 \n",
+ "2 1032 1.090239 49 -494 \n",
+ "3 1032 1.116022 63 -480 \n",
+ "4 1032 1.139963 76 -467 \n",
+ "\n",
+ " up_junc_dist down_junc_dist \n",
+ "0 463.0 NaN \n",
+ "1 467.0 NaN \n",
+ "2 NaN 18.0 \n",
+ "3 NaN 4.0 \n",
+ "4 9.0 72.0 \n",
+ "\n",
+ "[5 rows x 25 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "1f115e32-1825-4054-adf3-f67abf936c74",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " transcript start end_rust name_rust score_rust strand_rust \\\n",
+ "0 ENST00000000233 1007 1008 . . + \n",
+ "1 ENST00000000233 1011 1012 . . + \n",
+ "2 ENST00000000233 137 138 . . + \n",
+ "3 ENST00000000233 151 152 . . + \n",
+ "4 ENST00000000233 164 165 . . + \n",
+ "\n",
+ " motif_rust coverage_rust stoichiometry_rust probability_rust ... \\\n",
+ "0 CTTGAGTAA 648 0.1013215859030837 0.118574 ... \n",
+ "1 AGTAATAAA 628 0.3223684210526316 0.547572 ... \n",
+ "2 AAGCAGATG 467 0.2560240963855422 0.363126 ... \n",
+ "3 TCTCATGGT 608 0.5113636363636364 0.421296 ... \n",
+ "4 TTGGATGCG 544 0.17433414043583534 0.367664 ... \n",
+ "\n",
+ " utr5_len utr3_len cds_start_R cds_end_R tx_end_R \\\n",
+ "0 88 401 88 631 1032 \n",
+ "1 88 401 88 631 1032 \n",
+ "2 88 401 88 631 1032 \n",
+ "3 88 401 88 631 1032 \n",
+ "4 88 401 88 631 1032 \n",
+ "\n",
+ " transcript_metacoordinate_R abs_cds_start_R abs_cds_end_R \\\n",
+ "0 2.937656 919 376 \n",
+ "1 2.947631 923 380 \n",
+ "2 1.090239 49 -494 \n",
+ "3 1.116022 63 -480 \n",
+ "4 1.139963 76 -467 \n",
+ "\n",
+ " up_junc_dist_R down_junc_dist_R \n",
+ "0 463.0 NaN \n",
+ "1 467.0 NaN \n",
+ "2 NaN 18.0 \n",
+ "3 NaN 4.0 \n",
+ "4 9.0 72.0 \n",
+ "\n",
+ "[5 rows x 45 columns]\n"
+ ]
+ }
+ ],
+ "source": [
+ "\n",
+ "# Merge the dataframes on 'transcript' and 'start' columns\n",
+ "merged_df = pd.merge(df1, df2, on=['transcript', 'start'], suffixes=('_rust', '_R'))\n",
+ "\n",
+ "# Output the merged dataframe if needed (can also save to file if required)\n",
+ "print(merged_df.head())\n",
+ "\n",
+ "# Optionally, save the merged dataframe to a file\n",
+ "# merged_df.to_csv(f\"{wd}/merged_output.bed\", sep='\\t', index=False)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "f4216e1c-0edf-4524-bbb9-a1d281f17790",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Index(['transcript', 'start', 'end_rust', 'name_rust', 'score_rust',\n",
+ " 'strand_rust', 'motif_rust', 'coverage_rust', 'stoichiometry_rust',\n",
+ " 'probability_rust', 'gene_id_rust', 'gene_name_rust',\n",
+ " 'transcript_biotype_rust', 'tx_len_rust', 'cds_start_rust',\n",
+ " 'cds_end_rust', 'tx_end_rust', 'transcript_metacoordinate_rust',\n",
+ " 'abs_cds_start_rust', 'abs_cds_end_rust', 'up_junc_dist_rust',\n",
+ " 'down_junc_dist_rust', 'end_R', 'name_R', 'score_R', 'strand_R',\n",
+ " 'motif_R', 'coverage_R', 'stoichiometry_R', 'probability_R',\n",
+ " 'transcript_biotype_R', 'gene_name_R', 'gene_id_R', 'tx_len_R',\n",
+ " 'cds_len', 'utr5_len', 'utr3_len', 'cds_start_R', 'cds_end_R',\n",
+ " 'tx_end_R', 'transcript_metacoordinate_R', 'abs_cds_start_R',\n",
+ " 'abs_cds_end_R', 'up_junc_dist_R', 'down_junc_dist_R'],\n",
+ " dtype='object')\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(merged_df.columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "83107972-69e8-4188-ae8c-370fd504e942",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "from scipy.stats import linregress\n",
+ "\n",
+ "# Function to plot scatterplot with trendline and R^2 value\n",
+ "def plot_scatter_with_trendline(df, x_col, y_col, log_scale=False):\n",
+ " \"\"\"\n",
+ " Plots a scatterplot with a trendline and R^2 value for two columns in a DataFrame.\n",
+ "\n",
+ " Parameters:\n",
+ " df (DataFrame): The DataFrame containing the data.\n",
+ " x_col (str): The name of the column to use as the x-axis.\n",
+ " y_col (str): The name of the column to use as the y-axis.\n",
+ " log_scale (bool): Whether to apply log-log scaling to both axes.\n",
+ " \"\"\"\n",
+ " print(df.columns)\n",
+ " # Check if columns exist in the DataFrame\n",
+ " if x_col not in df.columns or y_col not in df.columns:\n",
+ " raise ValueError(\"One or both columns not found in the DataFrame.\")\n",
+ " \n",
+ " # Creating a scatter plot\n",
+ " plt.figure(figsize=(10, 6))\n",
+ " sns.scatterplot(data=df, x=x_col, y=y_col, alpha=0.6)\n",
+ "\n",
+ " # Optionally apply log-log scaling\n",
+ " if log_scale:\n",
+ " plt.xscale('log')\n",
+ " plt.yscale('log')\n",
+ "\n",
+ " # Fit a linear regression model to get the trendline and R^2 value\n",
+ " slope, intercept, r_value, p_value, std_err = linregress(df[x_col].dropna(), df[y_col].dropna())\n",
+ " plt.plot(df[x_col], intercept + slope*df[x_col], color='red', label=f'Fit Line: y={slope:.2f}x+{intercept:.2f}')\n",
+ "\n",
+ " # Plot settings\n",
+ " plt.title(f'Scatter Plot with Trendline between {x_col} and {y_col}')\n",
+ " plt.xlabel(x_col)\n",
+ " plt.ylabel(y_col)\n",
+ " plt.legend(title=f'R-squared = {r_value**2:.2f}')\n",
+ "\n",
+ " # Show plot\n",
+ " plt.show()\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "f5cda0bd-845d-445a-854b-a9b943cbc939",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Index(['transcript', 'start', 'end_rust', 'name_rust', 'score_rust',\n",
+ " 'strand_rust', 'motif_rust', 'coverage_rust', 'stoichiometry_rust',\n",
+ " 'probability_rust', 'gene_id_rust', 'gene_name_rust',\n",
+ " 'transcript_biotype_rust', 'tx_len_rust', 'cds_start_rust',\n",
+ " 'cds_end_rust', 'tx_end_rust', 'transcript_metacoordinate_rust',\n",
+ " 'abs_cds_start_rust', 'abs_cds_end_rust', 'up_junc_dist_rust',\n",
+ " 'down_junc_dist_rust', 'end_R', 'name_R', 'score_R', 'strand_R',\n",
+ " 'motif_R', 'coverage_R', 'stoichiometry_R', 'probability_R',\n",
+ " 'transcript_biotype_R', 'gene_name_R', 'gene_id_R', 'tx_len_R',\n",
+ " 'cds_len', 'utr5_len', 'utr3_len', 'cds_start_R', 'cds_end_R',\n",
+ " 'tx_end_R', 'transcript_metacoordinate_R', 'abs_cds_start_R',\n",
+ " 'abs_cds_end_R', 'up_junc_dist_R', 'down_junc_dist_R'],\n",
+ " dtype='object')\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "