-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
23 changed files
with
1,286 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
{ | ||
"$schema": "http://json-schema.org/draft-07/schema", | ||
"$id": "https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/master/assets/schema_input.json", | ||
"title": "plant-food-research-open/assemblyqc pipeline - params.synteny_xref_assemblies schema", | ||
"description": "Schema for the file provided with params.synteny_xref_assemblies", | ||
"type": "array", | ||
"items": { | ||
"type": "object", | ||
"properties": { | ||
"tag": { | ||
"type": "string", | ||
"pattern": "^\\w+$", | ||
"errorMessage": "Assembly tags must be provided and can only contain alphanumeric characters including '_'" | ||
}, | ||
"fasta": { | ||
"type": "string", | ||
"pattern": "^\\S+\\.f(ast|as|sa|na)?\\.gz$", | ||
"errorMessage": "FASTA file path cannot contain spaces and must have extension '.f(ast|as|sa|na)' or '.f(ast|as|sa|na).gz'" | ||
}, | ||
"synteny_labels": { | ||
"errorMessage": "Synteny labels tsv path cannot contain spaces and must have extension '.tsv'", | ||
"anyOf": [ | ||
{ | ||
"type": "string", | ||
"pattern": "^\\S+\\.tsv$" | ||
} | ||
] | ||
} | ||
}, | ||
"required": ["tag", "fasta", "synteny_labels"] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#!/usr/bin/env python | ||
|
||
import sys | ||
import re | ||
|
||
bundled_links_file_name = sys.argv[1] | ||
|
||
|
||
def natural_key(string): | ||
"""Return a list of keys that sort naturally.""" | ||
return [int(s) if s.isdigit() else s for s in re.split(r"(\d+)", string)] | ||
|
||
|
||
def hsv2rgb(h, s, v): | ||
"""Convert HSV color to RGB color.""" | ||
h = float(h) | ||
s = float(s) | ||
v = float(v) | ||
h60 = h / 60.0 | ||
h60f = int(h60) | ||
hi = int(h60f) % 6 | ||
f = h60 - h60f | ||
p = v * (1 - s) | ||
q = v * (1 - f * s) | ||
t = v * (1 - (1 - f) * s) | ||
r, g, b = 0, 0, 0 | ||
if hi == 0: | ||
r, g, b = v, t, p | ||
elif hi == 1: | ||
r, g, b = q, v, p | ||
elif hi == 2: | ||
r, g, b = p, v, t | ||
elif hi == 3: | ||
r, g, b = p, q, v | ||
elif hi == 4: | ||
r, g, b = t, p, v | ||
elif hi == 5: | ||
r, g, b = v, p, q | ||
return int(r * 255), int(g * 255), int(b * 255) | ||
|
||
|
||
def generate_colors(num_colors): | ||
"""Generate a list of colors""" | ||
hue_step = int(360 / num_colors) | ||
hue = 0 | ||
colors = [] | ||
for i in range(num_colors): | ||
red, green, blue = hsv2rgb(hue, 0.8, 0.8) | ||
colors.append(f"{red},{green},{blue},0.5") | ||
hue += hue_step | ||
return colors | ||
|
||
|
||
def read_file_lines(file_path): | ||
with open(file_path, "r") as f: | ||
return f.readlines() | ||
|
||
|
||
def generate_colors_by_ids(bundle_file_lines): | ||
"""Create a dictionary to map unique target ids to colors""" | ||
unique_ids = set( | ||
line.split()[3] for line in bundle_file_lines | ||
) # index 3: Target ids | ||
num_unique_ids = len(unique_ids) | ||
colors = generate_colors(num_unique_ids) | ||
return dict(zip(sorted(unique_ids, key=natural_key), colors)) | ||
|
||
|
||
if __name__ == "__main__": | ||
bundle_file_lines = read_file_lines(bundled_links_file_name) | ||
id_to_color = generate_colors_by_ids(bundle_file_lines) | ||
|
||
for line in bundle_file_lines: | ||
parts = line.strip().split() | ||
unique_id = parts[3] # index 3: Target ids | ||
color = id_to_color[unique_id] | ||
print(" ".join(parts[0:6] + [f"color=({color})", parts[6]])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
#!/usr/bin/perl | ||
use strict; | ||
use warnings; | ||
|
||
=head1 DESCRIPTION | ||
Adds colours to a CIRCOS bundle file. | ||
=head1 AUTHOR | ||
Original: Ross Crowhurst L<mailto:ross.crowhurst@plantandfood.co.nz> | ||
Modified: Usman Rashid L<mailto:usman.rashid@plantandfood.co.nz> | ||
=cut | ||
|
||
my $low = 0; | ||
|
||
my %bundleColorsRGB = ( | ||
3000 => "128,0,0,0.5", | ||
2000 => "229,0,10,0.5", | ||
1500 => "229,19,9,0.5", | ||
1000 => "216,38,8,0.5", | ||
500 => "210,57,7,0.5", | ||
250 => "204,76,6,0.5", | ||
100 => "198,95,5,0.5", | ||
50 => "192,114,4,0.5", | ||
25 => "186,113,3,0.5", | ||
10 => "180,152,2,0.5", | ||
5 => "174,171,1,0.5", | ||
0 => "168,191,0,0.5" | ||
); | ||
|
||
my %bundleColorsRGBLow = ( | ||
55 => "128,0,0,0.5", | ||
50 => "229,0,10,0.5", | ||
45 => "229,19,9,0.5", | ||
40 => "216,38,8,0.5", | ||
35 => "210,57,7,0.5", | ||
30 => "204,76,6,0.5", | ||
25 => "198,95,5,0.5", | ||
20 => "192,114,4,0.5", | ||
15 => "186,113,3,0.5", | ||
10 => "180,152,2,0.5", | ||
5 => "174,171,1,0.5", | ||
0 => "168,191,0,0.5" | ||
); | ||
|
||
sub usage { | ||
print "USAGE: $0 -i=bundle_file_in -o=colored_bundle_file_out [-low]\n"; | ||
print "To get colors:\n\n"; | ||
print " $0 -colorsRGB [or -colorsRGBAsHTMLTable] [-low]\n"; | ||
print "or\n"; | ||
print " $0 -colorsHex [-low]\n"; | ||
print "or\n"; | ||
print " $0 -colorsHexAsHTMLKeyTable [-low]\n"; | ||
exit(0); | ||
} | ||
|
||
sub exportRGB { | ||
if ($low) | ||
{ | ||
foreach my $threshold (sort {$a <=> $b} keys %bundleColorsRGBLow) | ||
{ | ||
print "$threshold\t$bundleColorsRGBLow{$threshold}\n"; | ||
} | ||
} | ||
else | ||
{ | ||
foreach my $threshold (sort {$a <=> $b} keys %bundleColorsRGB) | ||
{ | ||
print "$threshold\t$bundleColorsRGB{$threshold}\n"; | ||
} | ||
} | ||
exit(0); | ||
} | ||
|
||
sub exportRGBHTMLTable { | ||
print "<table border=1>\n"; | ||
print "<tr><th>Bundled Links</th><th>RGB</th></tr>\n"; | ||
if ($low) | ||
{ | ||
foreach my $threshold (sort {$a <=> $b} keys %bundleColorsRGBLow) | ||
{ | ||
my $cellBgColor = rgbToHex($bundleColorsRGBLow{$threshold}); | ||
print qq{<tr><td>$threshold</td><td bgcolor="$cellBgColor">$bundleColorsRGBLow{$threshold}</td></tr>\n}; | ||
} | ||
} | ||
else | ||
{ | ||
foreach my $threshold (sort {$a <=> $b} keys %bundleColorsRGB) | ||
{ | ||
my $cellBgColor = rgbToHex($bundleColorsRGB{$threshold}); | ||
print qq{<tr><td>$threshold</td><td bgcolor="$cellBgColor">$bundleColorsRGB{$threshold}</td></tr>\n}; | ||
} | ||
} | ||
print "</table>\n"; | ||
exit(0); | ||
} | ||
|
||
sub exportAsHTMLKeyTable { | ||
print "<table border=1>\n"; | ||
print "<tr><th>Bundled Links</th></tr>\n"; | ||
if ($low) | ||
{ | ||
foreach my $threshold (sort {$a <=> $b} keys %bundleColorsRGBLow) | ||
{ | ||
my $cellBgColor = rgbToHex($bundleColorsRGBLow{$threshold}); | ||
print qq{<tr><td bgcolor="$cellBgColor"> <span style="color:white">$threshold</span></td></tr>\n}; | ||
} | ||
} | ||
else | ||
{ | ||
foreach my $threshold (sort {$a <=> $b} keys %bundleColorsRGB) | ||
{ | ||
my $cellBgColor = rgbToHex($bundleColorsRGB{$threshold}); | ||
print qq{<tr><td bgcolor="$cellBgColor"> <span style="color:white">$threshold</span></td></tr>\n}; | ||
} | ||
} | ||
print "</table>\n"; | ||
exit(0); | ||
} | ||
sub rgbToHex { | ||
my ($r, $g, $b) = split/,/, $_[0]; | ||
return sprintf ("#%2.2X%2.2X%2.2X", $r, $g, $b); | ||
} | ||
|
||
my $bundleFileIn = ""; | ||
my $bundleFileOut = ""; | ||
|
||
(@ARGV) or usage(); | ||
foreach my $arg (@ARGV) | ||
{ | ||
($arg =~ m/^-(h|help)$/) and usage(); | ||
($arg =~ m/^-low$/) and $low = 1; | ||
($arg =~ m/^-colorsRGB$/) and exportRGB(); | ||
($arg =~ m/^-colorsRGBAsHTMLTable$/) and exportRGBHTMLTable(); | ||
($arg =~ m/^-colorsHexAsHTMLKeyTable$/) and exportAsHTMLKeyTable(); | ||
($arg =~ m/^-i=(.+)$/) and $bundleFileIn = $1; | ||
($arg =~ m/^-o=(.+)$/) and $bundleFileOut = $1; | ||
} | ||
|
||
open(OUT, ">$bundleFileOut") or die "ERROR: can not open bundle out file $bundleFileOut $!\n"; | ||
open(IN, "<$bundleFileIn") or die "ERROR: can not open bundle in file $bundleFileIn $!\n"; | ||
while (my $line = <IN>) | ||
{ | ||
#ASB_LG19 13470754 14218750 Ss262 2177839 2976275 nlinks=672,bsize1=150447,bsize2=150419,bidentity1=0.201133,bidentity2=0.188392,depth1=0,depth2=0, | ||
#ASB_LG19 14250080 15061508 Ss262 1303606 2191377 nlinks=1076,bsize1=279892,bsize2=278553,bidentity1=0.344937,bidentity2=0.313766,depth1=0,depth2=0, | ||
#ASB_LG19 14314359 14314420 Ss262 7198136 7198167 nlinks=9,bsize1=62,bsize2=32,bidentity1=1.000000,bidentity2=1.000000,depth1=1,depth2=1, | ||
#ASB_LG19 15064224 15625360 Ss262 672993 1254783 nlinks=881,bsize1=305520,bsize2=304727,bidentity1=0.544466,bidentity2=0.523774,depth1=0,depth2=0, | ||
#ASB_LG19 15650721 16282135 Ss262 8995 672359 nlinks=786,bsize1=199405,bsize2=198505,bidentity1=0.315807,bidentity2=0.299239,depth1=0,depth2=0, | ||
#ASB_LG19 17026943 17042421 Ss262 965 7848 nlinks=35,bsize1=7610,bsize2=4363,bidentity1=0.491634,bidentity2=0.633788,depth1=0,depth2=0, | ||
chomp $line; | ||
my @data = split/\s+/, $line; | ||
my @bundleFields = split/,/, $data[6]; | ||
my ($label, $count) = split/=/, $bundleFields[0]; | ||
my $colorText = "color=(168,191,0)"; | ||
if ($low) | ||
{ | ||
foreach my $threshold (sort {$a <=> $b} keys %bundleColorsRGBLow) | ||
{ | ||
if ($count > $threshold) | ||
{ | ||
$colorText = "color=($bundleColorsRGBLow{$threshold})"; | ||
} | ||
} | ||
} | ||
else | ||
{ | ||
foreach my $threshold (sort {$a <=> $b} keys %bundleColorsRGB) | ||
{ | ||
if ($count > $threshold) | ||
{ | ||
$colorText = "color=($bundleColorsRGB{$threshold})"; | ||
} | ||
} | ||
} | ||
my $newline = join(" ", $data[0], $data[1], $data[2], $data[3], $data[4], $data[5], $colorText, $data[6]); | ||
select OUT; print OUT "$newline\n"; | ||
} | ||
close(OUT); | ||
exit(0); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
#!/usr/bin/env bash | ||
|
||
seqFileA=$1 | ||
seqFileB=$2 | ||
|
||
linesFileA=() | ||
linesFileB=() | ||
|
||
while IFS= read -r line; do | ||
linesFileA+=("$line") | ||
columns=($line) | ||
if [[ ${#columns[@]} -ne 2 ]]; then | ||
echo "Error: Sequence file $(basename "$seqFileA") does not have exactly two columns." >&2 | ||
exit 1 | ||
fi | ||
done < "$seqFileA" | ||
|
||
while IFS= read -r line; do | ||
linesFileB+=("$line") | ||
columns=($line) | ||
if [[ ${#columns[@]} -ne 2 ]]; then | ||
echo "Error: Sequence file $(basename "$seqFileB") does not have exactly two columns." >&2 | ||
exit 1 | ||
fi | ||
done < "$seqFileB" | ||
|
||
outputLines=("${linesFileA[@]}" "${linesFileB[@]}") | ||
|
||
secondColumn=() | ||
for line in "${outputLines[@]}"; do | ||
columns=($line) | ||
secondColumn+=("${columns[1]}") | ||
done | ||
|
||
uniqueSecondColumn=($(echo "${secondColumn[@]}" | tr ' ' '\n' | sort -u)) | ||
if [[ ${#secondColumn[@]} -ne ${#uniqueSecondColumn[@]} ]]; then | ||
echo "Error: Duplicate sequence labels detected in second column for pair: $(basename "$seqFileA"), $(basename "$seqFileB")" >&2 | ||
exit 1 | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.