From ed9681b92e68317ff3406b184da6f04f12a6a8f2 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Mon, 9 Sep 2024 17:31:50 +0200 Subject: [PATCH 1/2] fix #484 CDS must not be attached to Level2 if no Parent/ID relationship and locus name not the same as previous L2. Should be locus parsing not sequential --- lib/AGAT/OmniscientI.pm | 6 ++++-- t/gff_syntax.t | 7 +++---- t/gff_syntax/README | 2 ++ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/AGAT/OmniscientI.pm b/lib/AGAT/OmniscientI.pm index 7dff6c08..9a9884c6 100644 --- a/lib/AGAT/OmniscientI.pm +++ b/lib/AGAT/OmniscientI.pm @@ -866,6 +866,9 @@ sub manage_one_feature{ $skip_last_l2=1; dual_print ($log, "skip last l2\n", $verbose) if ( $debug ); } + } else { + $skip_last_l2=1; + dual_print ($log, "skip last l2\n", $verbose) if ( $debug ); } } @@ -878,8 +881,7 @@ sub manage_one_feature{ # but only if the last_comon tag is different as the parent of the last_l2_f # (In that case we can use the last L2 feature. It was missing the comon tag in it). if(! $last_l2_f or - ($locusTAGvalue and (lc($locusTAGvalue) ne lc($last_locusTAGvalue) ) - and lc($last_locusTAGvalue) ne lc($parent_of_last_l2) or $skip_last_l2) ){ + ($locusTAGvalue and ( lc($locusTAGvalue) ne lc($last_locusTAGvalue) ) and ( lc($last_locusTAGvalue) ne lc($parent_of_last_l2) or $skip_last_l2) ) ){ dual_print ($log, "Come in the complex case L3!!!\n", $verbose) if ($debug); ####################### # Change referentiel => based on the last L2 link to this locus diff --git a/t/gff_syntax.t b/t/gff_syntax.t index 3fcb55d7..9c0a8cd6 100644 --- a/t/gff_syntax.t +++ b/t/gff_syntax.t @@ -3,7 +3,7 @@ use strict; use warnings; use File::Basename; -use Test::More tests => 45; +use Test::More tests => 47; =head1 DESCRIPTION @@ -55,12 +55,11 @@ foreach my $file (sort { (($a =~ /^(\d+)/)[0] || 0) <=> (($b =~ /^(\d+)/)[0] || if ($file =~ m/^8_/ or $file =~ m/^33_/ or $file =~ m/^34_/ or $file =~ m/^36_/){ system("$script --gff $input_path/$file -o $pathtmp 2>&1 1>/dev/null"); } - # peculiar case 28 - elsif($file =~ m/^28_/){ + # peculiar cases with locus_tag Name + elsif($file =~ m/^28_/ or $file =~ m/^45_/ or $file =~ m/^46_/){ system("$script_agat config --expose --locus_tag Name 2>&1 1>/dev/null"); # set special config for the test system("$script --gff $input_path/$file -o $pathtmp 2>&1 1>/dev/null"); } - # standard cases else{ system("$script_agat config --expose --merge_loci 2>&1 1>/dev/null"); # set special config for the test diff --git a/t/gff_syntax/README b/t/gff_syntax/README index 29cfa544..6468b373 100644 --- a/t/gff_syntax/README +++ b/t/gff_syntax/README @@ -53,6 +53,8 @@ This is an explanations of the different test files used to check the GFF3 parse 42: No attribute tag in L1; No attribute tag in L2; No attribute tag in L3; Single value in 9th column (GFF1) 43: Issue 290 - level3 features (exons CDS) directly attached to the gene, while it exists an mRNA feature. The mRNA feature is also attached to the gene. 44: Issue 350 - Exonerate output - No L2, ID only for L1. +45: Issue 484 - CDS without Parent but not related to previous L2 because it has locus name while previous L2 had parent/ID attributes and no locus name. +46: Issue 484 - Same but start by CDS /!\ If only level3 features are defined, and no locus tag present (see test 26), the tool cannot deal with it. I will create by default one umbrella level1, or if you on attribute as uniq locus ID, It will create a l1 for each feature => If only exon or only CDS features so the result will be fine, but if there are two different features that has to be linked together (two CDS or a CDS and a signal peptide as in the test case 26) , the tool will not perform properly. From 8f3582807f4d0d4e260ca763839a258129582302 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Mon, 9 Sep 2024 17:32:11 +0200 Subject: [PATCH 2/2] add test --- t/gff_syntax/in/45_test.gff | 8 ++++++++ t/gff_syntax/in/46_test.gff | 6 ++++++ t/gff_syntax/out/45_correct_output.gff | 18 ++++++++++++++++++ t/gff_syntax/out/46_correct_output.gff | 15 +++++++++++++++ 4 files changed, 47 insertions(+) create mode 100644 t/gff_syntax/in/45_test.gff create mode 100644 t/gff_syntax/in/46_test.gff create mode 100644 t/gff_syntax/out/45_correct_output.gff create mode 100644 t/gff_syntax/out/46_correct_output.gff diff --git a/t/gff_syntax/in/45_test.gff b/t/gff_syntax/in/45_test.gff new file mode 100644 index 00000000..4b9a164f --- /dev/null +++ b/t/gff_syntax/in/45_test.gff @@ -0,0 +1,8 @@ +BK063639.1 tpg tRNA 1637 1705 . + . ID=rna-BK063639.1:1637..1705;gbkey=tRNA;product=tRNA-Ile +BK063639.1 tpg exon 1637 1705 . + . ID=exon-BK063639.1:1637..1705-1;Parent=rna-BK063639.1:1637..1705;gbkey=tRNA;product=tRNA-Ile +BK063639.1 tpg CDS 1790 2779 . + 0 ID=cds-DBA43806.1;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5 +BK063639.1 tpg tRNA 2768 2840 . - . ID=rna-BK063639.1:2768..2840;gbkey=tRNA;product=tRNA-Cys +BK063639.1 tpg exon 2768 2840 . - . ID=exon-BK063639.1:2768..2840-1;Parent=rna-BK063639.1:2768..2840;gbkey=tRNA;product=tRNA-Cys +BK063639.1 tpg tRNA 3030 3098 . - . ID=rna-BK063639.1:3030..3098;gbkey=tRNA;product=tRNA-Trp +BK063639.1 tpg exon 3030 3098 . - . ID=exon-BK063639.1:3030..3098-1;Parent=rna-BK063639.1:3030..3098;gbkey=tRNA;product=tRNA-Trp +BK063639.1 tpg CDS 3114 4658 . + 0 ID=cds-DBA43807.1;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5 diff --git a/t/gff_syntax/in/46_test.gff b/t/gff_syntax/in/46_test.gff new file mode 100644 index 00000000..8444f9f4 --- /dev/null +++ b/t/gff_syntax/in/46_test.gff @@ -0,0 +1,6 @@ +BK063639.1 tpg CDS 1790 2779 . + 0 ID=cds-DBA43806.1;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5 +BK063639.1 tpg tRNA 2768 2840 . - . ID=rna-BK063639.1:2768..2840;gbkey=tRNA;product=tRNA-Cys +BK063639.1 tpg exon 2768 2840 . - . ID=exon-BK063639.1:2768..2840-1;Parent=rna-BK063639.1:2768..2840;gbkey=tRNA;product=tRNA-Cys +BK063639.1 tpg tRNA 3030 3098 . - . ID=rna-BK063639.1:3030..3098;gbkey=tRNA;product=tRNA-Trp +BK063639.1 tpg exon 3030 3098 . - . ID=exon-BK063639.1:3030..3098-1;Parent=rna-BK063639.1:3030..3098;gbkey=tRNA;product=tRNA-Trp +BK063639.1 tpg CDS 3114 4658 . + 0 ID=cds-DBA43807.1;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5 diff --git a/t/gff_syntax/out/45_correct_output.gff b/t/gff_syntax/out/45_correct_output.gff new file mode 100644 index 00000000..2e766092 --- /dev/null +++ b/t/gff_syntax/out/45_correct_output.gff @@ -0,0 +1,18 @@ +##gff-version 3 +BK063639.1 AGAT gene 1637 1705 . + . ID=agat-gene-1;gbkey=tRNA;product=tRNA-Ile +BK063639.1 tpg tRNA 1637 1705 . + . ID=rna-BK063639.1:1637..1705;Parent=agat-gene-1;gbkey=tRNA;product=tRNA-Ile +BK063639.1 tpg exon 1637 1705 . + . ID=exon-BK063639.1:1637..1705-1;Parent=rna-BK063639.1:1637..1705;gbkey=tRNA;product=tRNA-Ile +BK063639.1 AGAT gene 1790 2779 . + . ID=agat-gene-4;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5 +BK063639.1 AGAT mRNA 1790 2779 . + . ID=agat-rna-1;Parent=agat-gene-4;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5 +BK063639.1 AGAT exon 1790 2779 . + . ID=agat-exon-1;Parent=agat-rna-1;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5 +BK063639.1 tpg CDS 1790 2779 . + 0 ID=cds-DBA43806.1;Parent=agat-rna-1;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5 +BK063639.1 AGAT gene 2768 2840 . - . ID=agat-gene-2;gbkey=tRNA;product=tRNA-Cys +BK063639.1 tpg tRNA 2768 2840 . - . ID=rna-BK063639.1:2768..2840;Parent=agat-gene-2;gbkey=tRNA;product=tRNA-Cys +BK063639.1 tpg exon 2768 2840 . - . ID=exon-BK063639.1:2768..2840-1;Parent=rna-BK063639.1:2768..2840;gbkey=tRNA;product=tRNA-Cys +BK063639.1 AGAT gene 3030 3098 . - . ID=agat-gene-3;gbkey=tRNA;product=tRNA-Trp +BK063639.1 tpg tRNA 3030 3098 . - . ID=rna-BK063639.1:3030..3098;Parent=agat-gene-3;gbkey=tRNA;product=tRNA-Trp +BK063639.1 tpg exon 3030 3098 . - . ID=exon-BK063639.1:3030..3098-1;Parent=rna-BK063639.1:3030..3098;gbkey=tRNA;product=tRNA-Trp +BK063639.1 AGAT gene 3114 4658 . + . ID=agat-gene-5;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5 +BK063639.1 AGAT mRNA 3114 4658 . + . ID=agat-rna-2;Parent=agat-gene-5;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5 +BK063639.1 AGAT exon 3114 4658 . + . ID=agat-exon-2;Parent=agat-rna-2;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5 +BK063639.1 tpg CDS 3114 4658 . + 0 ID=cds-DBA43807.1;Parent=agat-rna-2;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5 diff --git a/t/gff_syntax/out/46_correct_output.gff b/t/gff_syntax/out/46_correct_output.gff new file mode 100644 index 00000000..7c0c10a6 --- /dev/null +++ b/t/gff_syntax/out/46_correct_output.gff @@ -0,0 +1,15 @@ +##gff-version 3 +BK063639.1 AGAT gene 1790 2779 . + . ID=agat-gene-3;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5 +BK063639.1 AGAT mRNA 1790 2779 . + . ID=agat-rna-1;Parent=agat-gene-3;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5 +BK063639.1 AGAT exon 1790 2779 . + . ID=agat-exon-1;Parent=agat-rna-1;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5 +BK063639.1 tpg CDS 1790 2779 . + 0 ID=cds-DBA43806.1;Parent=agat-rna-1;Dbxref=NCBI_GP:DBA43806.1;Name=DBA43806.1;gbkey=CDS;product=ND2;protein_id=DBA43806.1;transl_table=5 +BK063639.1 AGAT gene 2768 2840 . - . ID=agat-gene-1;gbkey=tRNA;product=tRNA-Cys +BK063639.1 tpg tRNA 2768 2840 . - . ID=rna-BK063639.1:2768..2840;Parent=agat-gene-1;gbkey=tRNA;product=tRNA-Cys +BK063639.1 tpg exon 2768 2840 . - . ID=exon-BK063639.1:2768..2840-1;Parent=rna-BK063639.1:2768..2840;gbkey=tRNA;product=tRNA-Cys +BK063639.1 AGAT gene 3030 3098 . - . ID=agat-gene-2;gbkey=tRNA;product=tRNA-Trp +BK063639.1 tpg tRNA 3030 3098 . - . ID=rna-BK063639.1:3030..3098;Parent=agat-gene-2;gbkey=tRNA;product=tRNA-Trp +BK063639.1 tpg exon 3030 3098 . - . ID=exon-BK063639.1:3030..3098-1;Parent=rna-BK063639.1:3030..3098;gbkey=tRNA;product=tRNA-Trp +BK063639.1 AGAT gene 3114 4658 . + . ID=agat-gene-4;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5 +BK063639.1 AGAT mRNA 3114 4658 . + . ID=agat-rna-2;Parent=agat-gene-4;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5 +BK063639.1 AGAT exon 3114 4658 . + . ID=agat-exon-2;Parent=agat-rna-2;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5 +BK063639.1 tpg CDS 3114 4658 . + 0 ID=cds-DBA43807.1;Parent=agat-rna-2;Dbxref=NCBI_GP:DBA43807.1;Name=DBA43807.1;gbkey=CDS;product=COX1;protein_id=DBA43807.1;transl_table=5