Skip to content

Commit

Permalink
fix #484 (#488)
Browse files Browse the repository at this point in the history
* fix #484 CDS must not be attached to Level2 if no Parent/ID relationship and locus name not the same as previous L2. Should be locus parsing not sequential + test
  • Loading branch information
Juke34 authored Sep 10, 2024
1 parent 8f76b46 commit f29a883
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 6 deletions.
6 changes: 4 additions & 2 deletions lib/AGAT/OmniscientI.pm
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,9 @@ sub manage_one_feature{
$skip_last_l2=1;
dual_print ($log, "skip last l2\n", $verbose) if ( $debug );
}
} else {
$skip_last_l2=1;
dual_print ($log, "skip last l2\n", $verbose) if ( $debug );
}
}

Expand All @@ -878,8 +881,7 @@ sub manage_one_feature{
# but only if the last_comon tag is different as the parent of the last_l2_f
# (In that case we can use the last L2 feature. It was missing the comon tag in it).
if(! $last_l2_f or
($locusTAGvalue and (lc($locusTAGvalue) ne lc($last_locusTAGvalue) )
and lc($last_locusTAGvalue) ne lc($parent_of_last_l2) or $skip_last_l2) ){
($locusTAGvalue and ( lc($locusTAGvalue) ne lc($last_locusTAGvalue) ) and ( lc($last_locusTAGvalue) ne lc($parent_of_last_l2) or $skip_last_l2) ) ){
dual_print ($log, "Come in the complex case L3!!!\n", $verbose) if ($debug);
#######################
# Change referentiel => based on the last L2 link to this locus
Expand Down
7 changes: 3 additions & 4 deletions t/gff_syntax.t
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use strict;
use warnings;
use File::Basename;
use Test::More tests => 45;
use Test::More tests => 47;

=head1 DESCRIPTION
Expand Down Expand Up @@ -55,12 +55,11 @@ foreach my $file (sort { (($a =~ /^(\d+)/)[0] || 0) <=> (($b =~ /^(\d+)/)[0] ||
if ($file =~ m/^8_/ or $file =~ m/^33_/ or $file =~ m/^34_/ or $file =~ m/^36_/){
system("$script --gff $input_path/$file -o $pathtmp 2>&1 1>/dev/null");
}
# peculiar case 28
elsif($file =~ m/^28_/){
# peculiar cases with locus_tag Name
elsif($file =~ m/^28_/ or $file =~ m/^45_/ or $file =~ m/^46_/){
system("$script_agat config --expose --locus_tag Name 2>&1 1>/dev/null"); # set special config for the test
system("$script --gff $input_path/$file -o $pathtmp 2>&1 1>/dev/null");
}

# standard cases
else{
system("$script_agat config --expose --merge_loci 2>&1 1>/dev/null"); # set special config for the test
Expand Down
2 changes: 2 additions & 0 deletions t/gff_syntax/README
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ This is an explanations of the different test files used to check the GFF3 parse
42: No attribute tag in L1; No attribute tag in L2; No attribute tag in L3; Single value in 9th column (GFF1)
43: Issue 290 - level3 features (exons CDS) directly attached to the gene, while it exists an mRNA feature. The mRNA feature is also attached to the gene.
44: Issue 350 - Exonerate output - No L2, ID only for L1.
45: Issue 484 - CDS without Parent but not related to previous L2 because it has locus name while previous L2 had parent/ID attributes and no locus name.
46: Issue 484 - Same but start by CDS

/!\ If only level3 features are defined, and no locus tag present (see test 26), the tool cannot deal with it. I will create by default one umbrella level1, or if you on attribute as uniq locus ID, It will create a l1 for each feature => If only exon or only CDS features so the result will be fine, but if there are two different features that has to be linked together (two CDS or a CDS and a signal peptide as in the test case 26) , the tool will not perform properly.

Expand Down
8 changes: 8 additions & 0 deletions t/gff_syntax/in/45_test.gff
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
BK063639.1 tpg tRNA 1637 1705 . + . ID=rna-BK063639.1:1637..1705;gbkey=tRNA;product=tRNA-Ile
BK063639.1 tpg exon 1637 1705 . + . ID=exon-BK063639.1:1637..1705-1;Parent=rna-BK063639.1:1637..1705;gbkey=tRNA;product=tRNA-Ile
BK063639.1 tpg CDS 1790 2779 . + 0 ID=cds-DBA43806.1;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5
BK063639.1 tpg tRNA 2768 2840 . - . ID=rna-BK063639.1:2768..2840;gbkey=tRNA;product=tRNA-Cys
BK063639.1 tpg exon 2768 2840 . - . ID=exon-BK063639.1:2768..2840-1;Parent=rna-BK063639.1:2768..2840;gbkey=tRNA;product=tRNA-Cys
BK063639.1 tpg tRNA 3030 3098 . - . ID=rna-BK063639.1:3030..3098;gbkey=tRNA;product=tRNA-Trp
BK063639.1 tpg exon 3030 3098 . - . ID=exon-BK063639.1:3030..3098-1;Parent=rna-BK063639.1:3030..3098;gbkey=tRNA;product=tRNA-Trp
BK063639.1 tpg CDS 3114 4658 . + 0 ID=cds-DBA43807.1;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5
6 changes: 6 additions & 0 deletions t/gff_syntax/in/46_test.gff
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
BK063639.1 tpg CDS 1790 2779 . + 0 ID=cds-DBA43806.1;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5
BK063639.1 tpg tRNA 2768 2840 . - . ID=rna-BK063639.1:2768..2840;gbkey=tRNA;product=tRNA-Cys
BK063639.1 tpg exon 2768 2840 . - . ID=exon-BK063639.1:2768..2840-1;Parent=rna-BK063639.1:2768..2840;gbkey=tRNA;product=tRNA-Cys
BK063639.1 tpg tRNA 3030 3098 . - . ID=rna-BK063639.1:3030..3098;gbkey=tRNA;product=tRNA-Trp
BK063639.1 tpg exon 3030 3098 . - . ID=exon-BK063639.1:3030..3098-1;Parent=rna-BK063639.1:3030..3098;gbkey=tRNA;product=tRNA-Trp
BK063639.1 tpg CDS 3114 4658 . + 0 ID=cds-DBA43807.1;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5
18 changes: 18 additions & 0 deletions t/gff_syntax/out/45_correct_output.gff
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
##gff-version 3
BK063639.1 AGAT gene 1637 1705 . + . ID=agat-gene-1;gbkey=tRNA;product=tRNA-Ile
BK063639.1 tpg tRNA 1637 1705 . + . ID=rna-BK063639.1:1637..1705;Parent=agat-gene-1;gbkey=tRNA;product=tRNA-Ile
BK063639.1 tpg exon 1637 1705 . + . ID=exon-BK063639.1:1637..1705-1;Parent=rna-BK063639.1:1637..1705;gbkey=tRNA;product=tRNA-Ile
BK063639.1 AGAT gene 1790 2779 . + . ID=agat-gene-4;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5
BK063639.1 AGAT mRNA 1790 2779 . + . ID=agat-rna-1;Parent=agat-gene-4;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5
BK063639.1 AGAT exon 1790 2779 . + . ID=agat-exon-1;Parent=agat-rna-1;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5
BK063639.1 tpg CDS 1790 2779 . + 0 ID=cds-DBA43806.1;Parent=agat-rna-1;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5
BK063639.1 AGAT gene 2768 2840 . - . ID=agat-gene-2;gbkey=tRNA;product=tRNA-Cys
BK063639.1 tpg tRNA 2768 2840 . - . ID=rna-BK063639.1:2768..2840;Parent=agat-gene-2;gbkey=tRNA;product=tRNA-Cys
BK063639.1 tpg exon 2768 2840 . - . ID=exon-BK063639.1:2768..2840-1;Parent=rna-BK063639.1:2768..2840;gbkey=tRNA;product=tRNA-Cys
BK063639.1 AGAT gene 3030 3098 . - . ID=agat-gene-3;gbkey=tRNA;product=tRNA-Trp
BK063639.1 tpg tRNA 3030 3098 . - . ID=rna-BK063639.1:3030..3098;Parent=agat-gene-3;gbkey=tRNA;product=tRNA-Trp
BK063639.1 tpg exon 3030 3098 . - . ID=exon-BK063639.1:3030..3098-1;Parent=rna-BK063639.1:3030..3098;gbkey=tRNA;product=tRNA-Trp
BK063639.1 AGAT gene 3114 4658 . + . ID=agat-gene-5;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5
BK063639.1 AGAT mRNA 3114 4658 . + . ID=agat-rna-2;Parent=agat-gene-5;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5
BK063639.1 AGAT exon 3114 4658 . + . ID=agat-exon-2;Parent=agat-rna-2;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5
BK063639.1 tpg CDS 3114 4658 . + 0 ID=cds-DBA43807.1;Parent=agat-rna-2;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5
15 changes: 15 additions & 0 deletions t/gff_syntax/out/46_correct_output.gff
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
##gff-version 3
BK063639.1 AGAT gene 1790 2779 . + . ID=agat-gene-3;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5
BK063639.1 AGAT mRNA 1790 2779 . + . ID=agat-rna-1;Parent=agat-gene-3;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5
BK063639.1 AGAT exon 1790 2779 . + . ID=agat-exon-1;Parent=agat-rna-1;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5
BK063639.1 tpg CDS 1790 2779 . + 0 ID=cds-DBA43806.1;Parent=agat-rna-1;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5
BK063639.1 AGAT gene 2768 2840 . - . ID=agat-gene-1;gbkey=tRNA;product=tRNA-Cys
BK063639.1 tpg tRNA 2768 2840 . - . ID=rna-BK063639.1:2768..2840;Parent=agat-gene-1;gbkey=tRNA;product=tRNA-Cys
BK063639.1 tpg exon 2768 2840 . - . ID=exon-BK063639.1:2768..2840-1;Parent=rna-BK063639.1:2768..2840;gbkey=tRNA;product=tRNA-Cys
BK063639.1 AGAT gene 3030 3098 . - . ID=agat-gene-2;gbkey=tRNA;product=tRNA-Trp
BK063639.1 tpg tRNA 3030 3098 . - . ID=rna-BK063639.1:3030..3098;Parent=agat-gene-2;gbkey=tRNA;product=tRNA-Trp
BK063639.1 tpg exon 3030 3098 . - . ID=exon-BK063639.1:3030..3098-1;Parent=rna-BK063639.1:3030..3098;gbkey=tRNA;product=tRNA-Trp
BK063639.1 AGAT gene 3114 4658 . + . ID=agat-gene-4;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5
BK063639.1 AGAT mRNA 3114 4658 . + . ID=agat-rna-2;Parent=agat-gene-4;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5
BK063639.1 AGAT exon 3114 4658 . + . ID=agat-exon-2;Parent=agat-rna-2;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5
BK063639.1 tpg CDS 3114 4658 . + 0 ID=cds-DBA43807.1;Parent=agat-rna-2;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5

0 comments on commit f29a883

Please sign in to comment.