Mercurial > repos > genouest > feelnc2asko
comparison feelnc2asko.pl @ 0:e323c49b8bcc draft
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/feelnc2asko commit 92849224db1963d090fbb25d410cc659a5449241
author | genouest |
---|---|
date | Thu, 12 Apr 2018 06:05:23 -0400 |
parents | |
children | af75f883cab4 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e323c49b8bcc |
---|---|
1 use strict; | |
2 use warnings; | |
3 use Getopt::Long; | |
4 use Bio::Tools::GFF; | |
5 | |
6 my ($anngff, $lncgff, $newgff); | |
7 | |
8 GetOptions("ann=s" => \$anngff, "lnc=s" => \$lncgff, "new=s" => \$newgff); | |
9 | |
10 my $gffout = Bio::Tools::GFF->new(-fh=> \*STDOUT, -gff_version => 3); | |
11 | |
12 #1. the standard annotation | |
13 my $gffin = Bio::Tools::GFF->new(-file => $anngff, -gff_version => 3); | |
14 | |
15 | |
16 while (my $feature = $gffin->next_feature()) { | |
17 if ($feature-> primary_tag eq 'mRNA') { | |
18 my ($gene)=$feature->get_tag_values("gene"); | |
19 $feature->remove_tag("Parent"); | |
20 $feature->add_tag_value("Parent", $gene); | |
21 $feature->add_tag_value("feelnc_type", "standard"); | |
22 $gffout->write_feature($feature); | |
23 } | |
24 if ($feature-> primary_tag eq 'gene') { | |
25 my ($name)=$feature->get_tag_values("Name"); | |
26 $feature->remove_tag("ID"); | |
27 $feature->add_tag_value("ID", $name); | |
28 $feature->add_tag_value("feelnc_type", "standard"); | |
29 $gffout->write_feature($feature); | |
30 } | |
31 } | |
32 $gffin->close(); | |
33 | |
34 | |
35 my %genes=(); | |
36 my %transcripts=(); | |
37 | |
38 #2. The lncRNA gtf | |
39 my $fncgtf = Bio::Tools::GFF->new( -file => $lncgff, -gff_version => '2' ); | |
40 | |
41 while (my $feat = $fncgtf->next_feature()) { | |
42 next if ($feat->primary_tag() ne 'exon'); | |
43 | |
44 my $mrna= ($feat->get_tag_values('transcript_id'))[0]; | |
45 my $gene= ($feat->get_tag_values('gene_id'))[0]; | |
46 # print STDERR "str: ", $feat->strand(), "\n"; | |
47 if (exists($genes{$gene})) { | |
48 if ($genes{$gene}->start() > $feat->start()) { | |
49 $genes{$gene}->start($feat->start()); | |
50 } | |
51 if ($genes{$gene}->end() < $feat->end()) { | |
52 $genes{$gene}->end($feat->end()); | |
53 } | |
54 } | |
55 else { | |
56 my $geneft = Bio::SeqFeature::Generic->new( | |
57 -start => $feat->start(), | |
58 -end => $feat->end(), | |
59 -strand => $feat->strand(), | |
60 -primary_tag => 'gene', | |
61 -source_tag => $feat->source_tag(), | |
62 -seq_id => $feat->seq_id()); | |
63 # $geneft->add_tag_value("feelnc_type", "lncRNA"); | |
64 $genes{$gene}=$geneft; | |
65 } | |
66 | |
67 if (exists($transcripts{$mrna})) { | |
68 if ($transcripts{$mrna}->start() > $feat->start()) { | |
69 $transcripts{$mrna}->start($feat->start()); | |
70 } | |
71 if ($transcripts{$mrna}->end() < $feat->end()) { | |
72 $transcripts{$mrna}->end($feat->end()); | |
73 } | |
74 } | |
75 else { | |
76 my $tr = Bio::SeqFeature::Generic->new( | |
77 -start => $feat->start(), | |
78 -end => $feat->end(), | |
79 -strand => $feat->strand(), | |
80 -primary_tag => 'mRNA', | |
81 -source_tag => $feat->source_tag(), | |
82 -seq_id => $feat->seq_id()); | |
83 $tr->add_tag_value("ID", $mrna); | |
84 $tr->add_tag_value("Parent",$gene); | |
85 $tr->add_tag_value("feelnc_type", "lncRNA"); | |
86 $transcripts{$mrna}=$tr; | |
87 } | |
88 } | |
89 | |
90 #3. The new mRNA gtf | |
91 my $nmgtf = Bio::Tools::GFF->new( -file => $newgff, -gff_version => '2' ); | |
92 | |
93 while (my $feat = $nmgtf->next_feature()) { | |
94 next if ($feat->primary_tag() ne 'exon'); | |
95 | |
96 my $mrna= ($feat->get_tag_values('transcript_id'))[0]; | |
97 my $gene= ($feat->get_tag_values('gene_id'))[0]; | |
98 # print STDERR "str: ", $feat->strand() , "\n"; | |
99 if (exists($genes{$gene})) { | |
100 if ($genes{$gene}->start() > $feat->start()) { | |
101 $genes{$gene}->start($feat->start()); | |
102 } | |
103 if ($genes{$gene}->end() < $feat->end()) { | |
104 $genes{$gene}->end($feat->end()); | |
105 } | |
106 } | |
107 else { | |
108 my $geneft = Bio::SeqFeature::Generic->new( | |
109 -start => $feat->start(), | |
110 -end => $feat->end(), | |
111 -strand => $feat->strand(), | |
112 -primary_tag => 'gene', | |
113 -source_tag => $feat->source_tag(), | |
114 -seq_id => $feat->seq_id()); | |
115 $geneft->add_tag_value("ID", $gene); | |
116 # $geneft->add_tag_value("feelnc_type", "new"); | |
117 $genes{$gene}=$geneft; | |
118 } | |
119 | |
120 if (exists($transcripts{$mrna})) { | |
121 if ($transcripts{$mrna}->start() > $feat->start()) { | |
122 $transcripts{$mrna}->start($feat->start()); | |
123 } | |
124 if ($transcripts{$mrna}->end() < $feat->end()) { | |
125 $transcripts{$mrna}->end($feat->end()); | |
126 } | |
127 } | |
128 else { | |
129 my $tr = Bio::SeqFeature::Generic->new( | |
130 -start => $feat->start(), | |
131 -end => $feat->end(), | |
132 -strand => $feat->strand(), | |
133 -primary_tag => 'mRNA', | |
134 -source_tag => $feat->source_tag(), | |
135 -seq_id => $feat->seq_id()); | |
136 $tr->add_tag_value("ID", $mrna); | |
137 $tr->add_tag_value("Parent",$gene); | |
138 $tr->add_tag_value("feelnc_type", "new"); | |
139 $transcripts{$mrna}=$tr; | |
140 } | |
141 } | |
142 | |
143 foreach my $mrna (keys %transcripts) { | |
144 my ($parent)=$transcripts{$mrna}->get_tag_values("Parent"); | |
145 $gffout->write_feature($genes{$parent}); | |
146 $gffout->write_feature($transcripts{$mrna}); | |
147 } |