Skip to content

Commit

Permalink
Added fasta_synteny sub workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
GallVp committed Feb 27, 2024
1 parent fa0b294 commit fdd04ec
Show file tree
Hide file tree
Showing 23 changed files with 1,286 additions and 13 deletions.
32 changes: 32 additions & 0 deletions assets/schema_xref_assemblies.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/master/assets/schema_input.json",
"title": "plant-food-research-open/assemblyqc pipeline - params.synteny_xref_assemblies schema",
"description": "Schema for the file provided with params.synteny_xref_assemblies",
"type": "array",
"items": {
"type": "object",
"properties": {
"tag": {
"type": "string",
"pattern": "^\\w+$",
"errorMessage": "Assembly tags must be provided and can only contain alphanumeric characters including '_'"
},
"fasta": {
"type": "string",
"pattern": "^\\S+\\.f(ast|as|sa|na)?\\.gz$",
"errorMessage": "FASTA file path cannot contain spaces and must have extension '.f(ast|as|sa|na)' or '.f(ast|as|sa|na).gz'"
},
"synteny_labels": {
"errorMessage": "Synteny labels tsv path cannot contain spaces and must have extension '.tsv'",
"anyOf": [
{
"type": "string",
"pattern": "^\\S+\\.tsv$"
}
]
}
},
"required": ["tag", "fasta", "synteny_labels"]
}
}
77 changes: 77 additions & 0 deletions bin/colorbundlesbycontig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env python

import sys
import re

bundled_links_file_name = sys.argv[1]


def natural_key(string):
"""Return a list of keys that sort naturally."""
return [int(s) if s.isdigit() else s for s in re.split(r"(\d+)", string)]


def hsv2rgb(h, s, v):
"""Convert HSV color to RGB color."""
h = float(h)
s = float(s)
v = float(v)
h60 = h / 60.0
h60f = int(h60)
hi = int(h60f) % 6
f = h60 - h60f
p = v * (1 - s)
q = v * (1 - f * s)
t = v * (1 - (1 - f) * s)
r, g, b = 0, 0, 0
if hi == 0:
r, g, b = v, t, p
elif hi == 1:
r, g, b = q, v, p
elif hi == 2:
r, g, b = p, v, t
elif hi == 3:
r, g, b = p, q, v
elif hi == 4:
r, g, b = t, p, v
elif hi == 5:
r, g, b = v, p, q
return int(r * 255), int(g * 255), int(b * 255)


def generate_colors(num_colors):
"""Generate a list of colors"""
hue_step = int(360 / num_colors)
hue = 0
colors = []
for i in range(num_colors):
red, green, blue = hsv2rgb(hue, 0.8, 0.8)
colors.append(f"{red},{green},{blue},0.5")
hue += hue_step
return colors


def read_file_lines(file_path):
with open(file_path, "r") as f:
return f.readlines()


def generate_colors_by_ids(bundle_file_lines):
"""Create a dictionary to map unique target ids to colors"""
unique_ids = set(
line.split()[3] for line in bundle_file_lines
) # index 3: Target ids
num_unique_ids = len(unique_ids)
colors = generate_colors(num_unique_ids)
return dict(zip(sorted(unique_ids, key=natural_key), colors))


if __name__ == "__main__":
bundle_file_lines = read_file_lines(bundled_links_file_name)
id_to_color = generate_colors_by_ids(bundle_file_lines)

for line in bundle_file_lines:
parts = line.strip().split()
unique_id = parts[3] # index 3: Target ids
color = id_to_color[unique_id]
print(" ".join(parts[0:6] + [f"color=({color})", parts[6]]))
181 changes: 181 additions & 0 deletions bin/colorbundlesbysize.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
#!/usr/bin/perl
use strict;
use warnings;

=head1 DESCRIPTION
Adds colours to a CIRCOS bundle file.
=head1 AUTHOR
Original: Ross Crowhurst L<mailto:ross.crowhurst@plantandfood.co.nz>
Modified: Usman Rashid L<mailto:usman.rashid@plantandfood.co.nz>
=cut

my $low = 0;

my %bundleColorsRGB = (
3000 => "128,0,0,0.5",
2000 => "229,0,10,0.5",
1500 => "229,19,9,0.5",
1000 => "216,38,8,0.5",
500 => "210,57,7,0.5",
250 => "204,76,6,0.5",
100 => "198,95,5,0.5",
50 => "192,114,4,0.5",
25 => "186,113,3,0.5",
10 => "180,152,2,0.5",
5 => "174,171,1,0.5",
0 => "168,191,0,0.5"
);

my %bundleColorsRGBLow = (
55 => "128,0,0,0.5",
50 => "229,0,10,0.5",
45 => "229,19,9,0.5",
40 => "216,38,8,0.5",
35 => "210,57,7,0.5",
30 => "204,76,6,0.5",
25 => "198,95,5,0.5",
20 => "192,114,4,0.5",
15 => "186,113,3,0.5",
10 => "180,152,2,0.5",
5 => "174,171,1,0.5",
0 => "168,191,0,0.5"
);

sub usage {
print "USAGE: $0 -i=bundle_file_in -o=colored_bundle_file_out [-low]\n";
print "To get colors:\n\n";
print " $0 -colorsRGB [or -colorsRGBAsHTMLTable] [-low]\n";
print "or\n";
print " $0 -colorsHex [-low]\n";
print "or\n";
print " $0 -colorsHexAsHTMLKeyTable [-low]\n";
exit(0);
}

sub exportRGB {
if ($low)
{
foreach my $threshold (sort {$a <=> $b} keys %bundleColorsRGBLow)
{
print "$threshold\t$bundleColorsRGBLow{$threshold}\n";
}
}
else
{
foreach my $threshold (sort {$a <=> $b} keys %bundleColorsRGB)
{
print "$threshold\t$bundleColorsRGB{$threshold}\n";
}
}
exit(0);
}

sub exportRGBHTMLTable {
print "<table border=1>\n";
print "<tr><th>Bundled Links</th><th>RGB</th></tr>\n";
if ($low)
{
foreach my $threshold (sort {$a <=> $b} keys %bundleColorsRGBLow)
{
my $cellBgColor = rgbToHex($bundleColorsRGBLow{$threshold});
print qq{<tr><td>$threshold</td><td bgcolor="$cellBgColor">$bundleColorsRGBLow{$threshold}</td></tr>\n};
}
}
else
{
foreach my $threshold (sort {$a <=> $b} keys %bundleColorsRGB)
{
my $cellBgColor = rgbToHex($bundleColorsRGB{$threshold});
print qq{<tr><td>$threshold</td><td bgcolor="$cellBgColor">$bundleColorsRGB{$threshold}</td></tr>\n};
}
}
print "</table>\n";
exit(0);
}

sub exportAsHTMLKeyTable {
print "<table border=1>\n";
print "<tr><th>Bundled Links</th></tr>\n";
if ($low)
{
foreach my $threshold (sort {$a <=> $b} keys %bundleColorsRGBLow)
{
my $cellBgColor = rgbToHex($bundleColorsRGBLow{$threshold});
print qq{<tr><td bgcolor="$cellBgColor">&nbsp;<span style="color:white">$threshold</span></td></tr>\n};
}
}
else
{
foreach my $threshold (sort {$a <=> $b} keys %bundleColorsRGB)
{
my $cellBgColor = rgbToHex($bundleColorsRGB{$threshold});
print qq{<tr><td bgcolor="$cellBgColor">&nbsp;<span style="color:white">$threshold</span></td></tr>\n};
}
}
print "</table>\n";
exit(0);
}
sub rgbToHex {
my ($r, $g, $b) = split/,/, $_[0];
return sprintf ("#%2.2X%2.2X%2.2X", $r, $g, $b);
}

my $bundleFileIn = "";
my $bundleFileOut = "";

(@ARGV) or usage();
foreach my $arg (@ARGV)
{
($arg =~ m/^-(h|help)$/) and usage();
($arg =~ m/^-low$/) and $low = 1;
($arg =~ m/^-colorsRGB$/) and exportRGB();
($arg =~ m/^-colorsRGBAsHTMLTable$/) and exportRGBHTMLTable();
($arg =~ m/^-colorsHexAsHTMLKeyTable$/) and exportAsHTMLKeyTable();
($arg =~ m/^-i=(.+)$/) and $bundleFileIn = $1;
($arg =~ m/^-o=(.+)$/) and $bundleFileOut = $1;
}

open(OUT, ">$bundleFileOut") or die "ERROR: can not open bundle out file $bundleFileOut $!\n";
open(IN, "<$bundleFileIn") or die "ERROR: can not open bundle in file $bundleFileIn $!\n";
while (my $line = <IN>)
{
#ASB_LG19 13470754 14218750 Ss262 2177839 2976275 nlinks=672,bsize1=150447,bsize2=150419,bidentity1=0.201133,bidentity2=0.188392,depth1=0,depth2=0,
#ASB_LG19 14250080 15061508 Ss262 1303606 2191377 nlinks=1076,bsize1=279892,bsize2=278553,bidentity1=0.344937,bidentity2=0.313766,depth1=0,depth2=0,
#ASB_LG19 14314359 14314420 Ss262 7198136 7198167 nlinks=9,bsize1=62,bsize2=32,bidentity1=1.000000,bidentity2=1.000000,depth1=1,depth2=1,
#ASB_LG19 15064224 15625360 Ss262 672993 1254783 nlinks=881,bsize1=305520,bsize2=304727,bidentity1=0.544466,bidentity2=0.523774,depth1=0,depth2=0,
#ASB_LG19 15650721 16282135 Ss262 8995 672359 nlinks=786,bsize1=199405,bsize2=198505,bidentity1=0.315807,bidentity2=0.299239,depth1=0,depth2=0,
#ASB_LG19 17026943 17042421 Ss262 965 7848 nlinks=35,bsize1=7610,bsize2=4363,bidentity1=0.491634,bidentity2=0.633788,depth1=0,depth2=0,
chomp $line;
my @data = split/\s+/, $line;
my @bundleFields = split/,/, $data[6];
my ($label, $count) = split/=/, $bundleFields[0];
my $colorText = "color=(168,191,0)";
if ($low)
{
foreach my $threshold (sort {$a <=> $b} keys %bundleColorsRGBLow)
{
if ($count > $threshold)
{
$colorText = "color=($bundleColorsRGBLow{$threshold})";
}
}
}
else
{
foreach my $threshold (sort {$a <=> $b} keys %bundleColorsRGB)
{
if ($count > $threshold)
{
$colorText = "color=($bundleColorsRGB{$threshold})";
}
}
}
my $newline = join(" ", $data[0], $data[1], $data[2], $data[3], $data[4], $data[5], $colorText, $data[6]);
select OUT; print OUT "$newline\n";
}
close(OUT);
exit(0);
39 changes: 39 additions & 0 deletions bin/validateseqlists.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env bash

seqFileA=$1
seqFileB=$2

linesFileA=()
linesFileB=()

while IFS= read -r line; do
linesFileA+=("$line")
columns=($line)
if [[ ${#columns[@]} -ne 2 ]]; then
echo "Error: Sequence file $(basename "$seqFileA") does not have exactly two columns." >&2
exit 1
fi
done < "$seqFileA"

while IFS= read -r line; do
linesFileB+=("$line")
columns=($line)
if [[ ${#columns[@]} -ne 2 ]]; then
echo "Error: Sequence file $(basename "$seqFileB") does not have exactly two columns." >&2
exit 1
fi
done < "$seqFileB"

outputLines=("${linesFileA[@]}" "${linesFileB[@]}")

secondColumn=()
for line in "${outputLines[@]}"; do
columns=($line)
secondColumn+=("${columns[1]}")
done

uniqueSecondColumn=($(echo "${secondColumn[@]}" | tr ' ' '\n' | sort -u))
if [[ ${#secondColumn[@]} -ne ${#uniqueSecondColumn[@]} ]]; then
echo "Error: Duplicate sequence labels detected in second column for pair: $(basename "$seqFileA"), $(basename "$seqFileB")" >&2
exit 1
fi
3 changes: 3 additions & 0 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ process {
withName:SAMBLASTER {
time = { check_max( 20.h * task.attempt, 'time' ) }
}
withName:DNADIFF {
time = { check_max( 7.day * task.attempt, 'time' ) }
}
withName:CUSTOM_DUMPSOFTWAREVERSIONS {
cache = false
}
Expand Down
Loading

0 comments on commit fdd04ec

Please sign in to comment.