comparison carpet-src-1/tools/CARPET/annotation_expr_intron.pl @ 0:cdd489d98766

Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author matces
date Tue, 07 Jun 2011 16:50:41 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:cdd489d98766
1 #!/usr/bin/perl
2
3 # Copyright 2009 Matteo Cesaroni, Lucilla Luzi
4 #
5
6 # This program is free software; ; you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or (at your
9 # option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15
16
17
18 $|=1;
19 my $infile = $ARGV[0];
20 my $infile2=$ARGV[1];
21 my $file_output=$ARGV[2];
22
23 open (INFILE, "<$infile");
24 open (INFILE2, "<$infile2");
25 open (OUTFILE1, ">$file_output") or die "Cannot find file $file_output\n";
26
27 $campi_t=0;
28 while (defined (my $line_down = <INFILE2>)) {
29 $line_down=~ s/\#//g;
30 chomp($line_down);
31 $campi_t++;
32 my @tmp_down=split(/\s+/, $line_down);
33 if($campi_t==1){
34 $z=0;
35 foreach $campo_t(@tmp_down){
36 if(($campo_t eq "name") || ($campo_t eq "qName")){
37 $zRef=$z;
38 }
39 if(($campo_t eq "txStart") || ($campo_t eq "tStart") || ($campo_t eq "chromStart")){
40 $ztxStart=$z;
41 }
42 if(($campo_t eq "txEnd") || ($campo_t eq "tEnd") || ($campo_t eq "chromEnd")){
43 $ztxEnd=$z;
44 }
45 if($campo_t eq "strand"){
46 $zstrand=$z;
47 }
48 if(($campo_t eq "chrom") || ($campo_t eq "tName")){
49 $zchrom=$z;
50 }
51 if(($campo_t eq "exonStarts") || ($campo_t eq "tStarts")){
52 $zexonstart=$z;
53 }
54 if($campo_t eq "exonEnds"){
55 $zexonend=$z;
56 }
57 if($campo_t eq "blockSizes"){
58 $zblocksize=$z;
59 }
60 if($campo_t eq "name2"){
61 $zname=$z;
62 }
63 if(($campo_t eq "exonCount")||($campo_t eq "blockCount")){
64 $zcount=$z;
65 }
66 $z++;
67 }
68 if(!$zname){
69 $zname=$zRef;
70 }
71 if(!$zexonstart){
72 $zexonstart=$ztxStart;
73 }
74 if(!$zexonend){
75 $zexonend=$ztxEnd;
76 }
77 if(($zRef eq "") || ($ztxStart eq "") || ($zstrand eq "") || ($zchrom eq "")){
78 print "Annotation file is not in the accepted format\n";
79 exit;
80 }else{print "Expression chip annotation";}
81 next;
82 }
83 chomp $tmp_down[$zchrom];
84 $tab_ann{$tmp_down[$zchrom]}.="$line_down\n";
85 }
86
87 while (defined (my $line_down = <INFILE>)) {
88 my @tmp_down = split("\t", $line_down);
89 chomp $tmp_down[0];
90 $tab_probe{$tmp_down[0]}.=$line_down;
91 }
92
93 @chrom_probes= keys(%tab_probe);
94
95 &expression;
96
97
98 exit 0;
99
100
101 ###########
102 #subrutine#
103 ###########
104
105 sub expression
106 {
107 foreach $chromosoma (@chrom_probes){
108 %gene_cen="";
109 @file2=split("\n", $tab_ann{$chromosoma});
110 foreach $linea(@file2) {
111 chomp $linea;
112 $linea=~ s/#//g;
113 my @kEle=split(/\s+/, $linea);
114 $ref=$kEle[$zRef];
115 $chrom=$kEle[$zchrom];
116 $strand=$kEle[$zstrand];
117 $transcriptStart=$kEle[$ztxStart];
118 $transcriptStop=$kEle[$ztxEnd];
119 if($zcount){
120 $exoncount=$kEle[$zcount];
121 }
122 else
123 {
124 $exoncount=1;
125 }
126 $geneName=$kEle[$zname];
127 $exonStartref=$kEle[$zexonstart];
128
129 my @exonStart=split(",", $exonStartref);
130
131 if (!$zblocksize){
132 $exonEndref=$kEle[$zexonend];
133 }
134 else {
135 @blockStop=split(",", $kEle[$zblocksize]);
136 $exonEndref="";
137 for ($jj=0; $jj<=$#exonStart; $jj++){
138 $end_block=$exonStart[$jj]+$blockStop[$jj];
139 $exonEndref.="$end_block,";
140
141 }
142 }
143
144 my @exonStop=split(",", $exonEndref);
145
146 #print @exonStart;
147
148 @file1=split("\n",$tab_probe{$chromosoma});
149
150 foreach $line(@file1) {
151 chomp $line;
152 #chop $line;
153 if ($line=~/track/g){next;}
154 if ($line=~/#/g){next;}
155 if ($line=~/^\s+$/g){next;}
156 my @Line=split(/\t/, $line);
157 my $Chrom=$Line[0];
158 my $Start=$Line[3];
159 my $Stop=$Line[4];
160 my $ProbeName=$Line[5];
161 my $feature="ciccio";
162 if ($Chrom eq $chrom) {
163 if(($Start<=$transcriptStart && $Stop>$transcriptStart) || ($Start<$transcriptStop && $Stop>=$transcriptStop) || ($Start>=$transcriptStart && $Stop<=$transcriptStop) ){
164 #print "sono entrato con start $Start stop $Stop e $transcriptStart e $transcriptStop\n";
165
166 for($i=0;$i<=$#exonStart;$i++) {
167 if ($strand eq "+"){
168 $exoncount1=$i+1;
169 $exoncount2=$exoncount1;
170 if($i==$#exonStart) {$exoncount2="last";}
171 }
172 if ($strand eq "-"){
173 $exoncount1=($#exonStart+1)-$i;
174 $exoncount2=$exoncount1;
175 if($i==0) {$exoncount2="last";}
176 }
177
178 if(($Start<=$exonStart[$i]) && ($i==0) && ($strand eq "+")){
179 $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tprom_exon $exoncount2\t$exoncount1";
180 $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
181 last;
182 }
183 if(($Start<=$exonStop[$i]) && ($i==$#exonStart) && ($strand eq "-") && ($Stop>=$exonStop[$i])){
184 $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tprom_exon $exoncount2\t$exoncount1";
185 $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
186 last;
187 }
188 if(($Start<=$exonStart[$i]) && ($Stop>$exonStart[$i])){
189 $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\t*intronexon $exoncount2\t$exoncount1";
190 $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
191 last;
192 }
193 if(($Start>=$exonStart[$i]) && ($Stop<=$exonStop[$i])){
194 $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\texon $exoncount2\t$exoncount1";
195 $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
196 last;
197 }
198 if(($Start<$exonStop[$i]) && ($Stop>$exonStop[$i])){
199 $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\t*exonintron $exoncount2\t$exoncount1";
200 $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
201 last;
202 }
203 if(($Start>=$exonStop[$i]) && ($Stop<=$exonStart[$i+1]) && ($check==0)){
204 $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tintron $exoncount2\t$exoncount1";
205 $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
206 last;
207 }
208
209 }
210
211
212 }
213
214 }
215
216 }
217 }
218
219 foreach $nome (keys %gene_cen){
220 foreach $description (keys %{$gene_cen {$nome}}) {
221 print OUTFILE1 "$nome\t$description\t$gene_cen{$nome}{$description}\n";
222
223 }
224
225 }
226 }
227 close INFILE;
228 close INFILE2;
229 }
230
231
232