Skip to content

Commit

Permalink
fix: keep order of samples within snakemake
Browse files Browse the repository at this point in the history
  • Loading branch information
mkatsanto committed Feb 20, 2024
1 parent 2afde9d commit ef48042
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
12 changes: 6 additions & 6 deletions tests/test_integration_workflow_with_conda/test.slurm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ snakemake \
--report="snakemake_report.html"

# Check md5 sum of some output files
find results/ -type f -name \*\.gz -exec gunzip '{}' \;
find results/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \;
find results/homo_sapiens/ -type f -name \*\.gz -exec gunzip '{}' \;
find results/homo_sapiens/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \;
md5sum --check "expected_output.md5"

# Check whether STAR produces expected alignments
Expand All @@ -49,15 +49,15 @@ md5sum --check "expected_output.md5"
echo "Verifying STAR output"
result=$(bedtools intersect -F 1 -v -bed \
-a ../input_files/synthetic.mate_1.bed \
-b results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/map_genome/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.Aligned.sortedByCoord.out.bam \
-b results/homo_sapiens/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/map_genome/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.Aligned.sortedByCoord.out.bam \
| wc -l)
if [ $result != "0" ]; then
echo "Alignments for mate 1 reads are not consistent with ground truth"
exit 1
fi
result=$(bedtools intersect -F 1 -v -bed \
-a <(cat ../input_files/synthetic.mate_1.bed ../input_files/synthetic.mate_2.bed) \
-b results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.Aligned.sortedByCoord.out.bam \
-b results/homo_sapiens/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/map_genome/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.Aligned.sortedByCoord.out.bam \
| wc -l)
if [ $result != "0" ]; then
echo "Alignments for mate 1 reads are not consistent with ground truth"
Expand All @@ -67,8 +67,8 @@ fi
# Check whether Salmon assigns reads to expected genes
echo "Verifying Salmon output"
diff \
<(cat results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat results/homo_sapiens/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.salmon.se/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat ../input_files/synthetic.mate_1.bed | cut -f7 | sort | uniq -c | sort -k2nr | awk '{printf($2"\t"$1"\n")}')
diff \
<(cat results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat results/homo_sapiens/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/synthetic_10_reads_paired_synthetic_10_reads_paired.salmon.pe/quant.genes.sf | cut -f1,5 | tail -n +2 | sort -k1,1) \
<(cat ../input_files/synthetic.mate_1.bed | cut -f7 | sort | uniq -c | sort -k2nr | awk '{printf($2"\t"$1"\n")}')
2 changes: 1 addition & 1 deletion workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def get_sample(column_id, search_id=None, search_value=None):

def get_all_samples(search_id=None, search_value=None):
return list(
set(samples_table.index[samples_table[search_id] == search_value].values)
pd.unique(samples_table.index[samples_table[search_id] == search_value].values)
)


Expand Down

0 comments on commit ef48042

Please sign in to comment.