Mercurial > repos > matces > carpet_toolsuite
comparison carpet-src-1/tools/CARPET/annotation_expr_intron.pl @ 0:cdd489d98766
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author | matces |
---|---|
date | Tue, 07 Jun 2011 16:50:41 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:cdd489d98766 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 # Copyright 2009 Matteo Cesaroni, Lucilla Luzi | |
4 # | |
5 | |
6 # This program is free software; ; you can redistribute it and/or modify | |
7 # it under the terms of the GNU Lesser General Public License as published by | |
8 # the Free Software Foundation; either version 3 of the License, or (at your | |
9 # option) any later version. | |
10 # | |
11 # This program is distributed in the hope that it will be useful, | |
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 # GNU General Public License for more details. | |
15 | |
16 | |
17 | |
18 $|=1; | |
19 my $infile = $ARGV[0]; | |
20 my $infile2=$ARGV[1]; | |
21 my $file_output=$ARGV[2]; | |
22 | |
23 open (INFILE, "<$infile"); | |
24 open (INFILE2, "<$infile2"); | |
25 open (OUTFILE1, ">$file_output") or die "Cannot find file $file_output\n"; | |
26 | |
27 $campi_t=0; | |
28 while (defined (my $line_down = <INFILE2>)) { | |
29 $line_down=~ s/\#//g; | |
30 chomp($line_down); | |
31 $campi_t++; | |
32 my @tmp_down=split(/\s+/, $line_down); | |
33 if($campi_t==1){ | |
34 $z=0; | |
35 foreach $campo_t(@tmp_down){ | |
36 if(($campo_t eq "name") || ($campo_t eq "qName")){ | |
37 $zRef=$z; | |
38 } | |
39 if(($campo_t eq "txStart") || ($campo_t eq "tStart") || ($campo_t eq "chromStart")){ | |
40 $ztxStart=$z; | |
41 } | |
42 if(($campo_t eq "txEnd") || ($campo_t eq "tEnd") || ($campo_t eq "chromEnd")){ | |
43 $ztxEnd=$z; | |
44 } | |
45 if($campo_t eq "strand"){ | |
46 $zstrand=$z; | |
47 } | |
48 if(($campo_t eq "chrom") || ($campo_t eq "tName")){ | |
49 $zchrom=$z; | |
50 } | |
51 if(($campo_t eq "exonStarts") || ($campo_t eq "tStarts")){ | |
52 $zexonstart=$z; | |
53 } | |
54 if($campo_t eq "exonEnds"){ | |
55 $zexonend=$z; | |
56 } | |
57 if($campo_t eq "blockSizes"){ | |
58 $zblocksize=$z; | |
59 } | |
60 if($campo_t eq "name2"){ | |
61 $zname=$z; | |
62 } | |
63 if(($campo_t eq "exonCount")||($campo_t eq "blockCount")){ | |
64 $zcount=$z; | |
65 } | |
66 $z++; | |
67 } | |
68 if(!$zname){ | |
69 $zname=$zRef; | |
70 } | |
71 if(!$zexonstart){ | |
72 $zexonstart=$ztxStart; | |
73 } | |
74 if(!$zexonend){ | |
75 $zexonend=$ztxEnd; | |
76 } | |
77 if(($zRef eq "") || ($ztxStart eq "") || ($zstrand eq "") || ($zchrom eq "")){ | |
78 print "Annotation file is not in the accepted format\n"; | |
79 exit; | |
80 }else{print "Expression chip annotation";} | |
81 next; | |
82 } | |
83 chomp $tmp_down[$zchrom]; | |
84 $tab_ann{$tmp_down[$zchrom]}.="$line_down\n"; | |
85 } | |
86 | |
87 while (defined (my $line_down = <INFILE>)) { | |
88 my @tmp_down = split("\t", $line_down); | |
89 chomp $tmp_down[0]; | |
90 $tab_probe{$tmp_down[0]}.=$line_down; | |
91 } | |
92 | |
93 @chrom_probes= keys(%tab_probe); | |
94 | |
95 &expression; | |
96 | |
97 | |
98 exit 0; | |
99 | |
100 | |
101 ########### | |
102 #subrutine# | |
103 ########### | |
104 | |
105 sub expression | |
106 { | |
107 foreach $chromosoma (@chrom_probes){ | |
108 %gene_cen=""; | |
109 @file2=split("\n", $tab_ann{$chromosoma}); | |
110 foreach $linea(@file2) { | |
111 chomp $linea; | |
112 $linea=~ s/#//g; | |
113 my @kEle=split(/\s+/, $linea); | |
114 $ref=$kEle[$zRef]; | |
115 $chrom=$kEle[$zchrom]; | |
116 $strand=$kEle[$zstrand]; | |
117 $transcriptStart=$kEle[$ztxStart]; | |
118 $transcriptStop=$kEle[$ztxEnd]; | |
119 if($zcount){ | |
120 $exoncount=$kEle[$zcount]; | |
121 } | |
122 else | |
123 { | |
124 $exoncount=1; | |
125 } | |
126 $geneName=$kEle[$zname]; | |
127 $exonStartref=$kEle[$zexonstart]; | |
128 | |
129 my @exonStart=split(",", $exonStartref); | |
130 | |
131 if (!$zblocksize){ | |
132 $exonEndref=$kEle[$zexonend]; | |
133 } | |
134 else { | |
135 @blockStop=split(",", $kEle[$zblocksize]); | |
136 $exonEndref=""; | |
137 for ($jj=0; $jj<=$#exonStart; $jj++){ | |
138 $end_block=$exonStart[$jj]+$blockStop[$jj]; | |
139 $exonEndref.="$end_block,"; | |
140 | |
141 } | |
142 } | |
143 | |
144 my @exonStop=split(",", $exonEndref); | |
145 | |
146 #print @exonStart; | |
147 | |
148 @file1=split("\n",$tab_probe{$chromosoma}); | |
149 | |
150 foreach $line(@file1) { | |
151 chomp $line; | |
152 #chop $line; | |
153 if ($line=~/track/g){next;} | |
154 if ($line=~/#/g){next;} | |
155 if ($line=~/^\s+$/g){next;} | |
156 my @Line=split(/\t/, $line); | |
157 my $Chrom=$Line[0]; | |
158 my $Start=$Line[3]; | |
159 my $Stop=$Line[4]; | |
160 my $ProbeName=$Line[5]; | |
161 my $feature="ciccio"; | |
162 if ($Chrom eq $chrom) { | |
163 if(($Start<=$transcriptStart && $Stop>$transcriptStart) || ($Start<$transcriptStop && $Stop>=$transcriptStop) || ($Start>=$transcriptStart && $Stop<=$transcriptStop) ){ | |
164 #print "sono entrato con start $Start stop $Stop e $transcriptStart e $transcriptStop\n"; | |
165 | |
166 for($i=0;$i<=$#exonStart;$i++) { | |
167 if ($strand eq "+"){ | |
168 $exoncount1=$i+1; | |
169 $exoncount2=$exoncount1; | |
170 if($i==$#exonStart) {$exoncount2="last";} | |
171 } | |
172 if ($strand eq "-"){ | |
173 $exoncount1=($#exonStart+1)-$i; | |
174 $exoncount2=$exoncount1; | |
175 if($i==0) {$exoncount2="last";} | |
176 } | |
177 | |
178 if(($Start<=$exonStart[$i]) && ($i==0) && ($strand eq "+")){ | |
179 $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tprom_exon $exoncount2\t$exoncount1"; | |
180 $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; | |
181 last; | |
182 } | |
183 if(($Start<=$exonStop[$i]) && ($i==$#exonStart) && ($strand eq "-") && ($Stop>=$exonStop[$i])){ | |
184 $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tprom_exon $exoncount2\t$exoncount1"; | |
185 $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; | |
186 last; | |
187 } | |
188 if(($Start<=$exonStart[$i]) && ($Stop>$exonStart[$i])){ | |
189 $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\t*intronexon $exoncount2\t$exoncount1"; | |
190 $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; | |
191 last; | |
192 } | |
193 if(($Start>=$exonStart[$i]) && ($Stop<=$exonStop[$i])){ | |
194 $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\texon $exoncount2\t$exoncount1"; | |
195 $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; | |
196 last; | |
197 } | |
198 if(($Start<$exonStop[$i]) && ($Stop>$exonStop[$i])){ | |
199 $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\t*exonintron $exoncount2\t$exoncount1"; | |
200 $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; | |
201 last; | |
202 } | |
203 if(($Start>=$exonStop[$i]) && ($Stop<=$exonStart[$i+1]) && ($check==0)){ | |
204 $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tintron $exoncount2\t$exoncount1"; | |
205 $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; | |
206 last; | |
207 } | |
208 | |
209 } | |
210 | |
211 | |
212 } | |
213 | |
214 } | |
215 | |
216 } | |
217 } | |
218 | |
219 foreach $nome (keys %gene_cen){ | |
220 foreach $description (keys %{$gene_cen {$nome}}) { | |
221 print OUTFILE1 "$nome\t$description\t$gene_cen{$nome}{$description}\n"; | |
222 | |
223 } | |
224 | |
225 } | |
226 } | |
227 close INFILE; | |
228 close INFILE2; | |
229 } | |
230 | |
231 | |
232 |