Mercurial > repos > matces > carpet_toolsuite
view carpet-src-1/tools/CARPET/annotation_expr_intron.pl @ 1:78770028dcf1 default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | matces |
---|---|
date | Tue, 07 Jun 2011 16:59:33 -0400 |
parents | cdd489d98766 |
children |
line wrap: on
line source
#!/usr/bin/perl # Copyright 2009 Matteo Cesaroni, Lucilla Luzi # # This program is free software; ; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. $|=1; my $infile = $ARGV[0]; my $infile2=$ARGV[1]; my $file_output=$ARGV[2]; open (INFILE, "<$infile"); open (INFILE2, "<$infile2"); open (OUTFILE1, ">$file_output") or die "Cannot find file $file_output\n"; $campi_t=0; while (defined (my $line_down = <INFILE2>)) { $line_down=~ s/\#//g; chomp($line_down); $campi_t++; my @tmp_down=split(/\s+/, $line_down); if($campi_t==1){ $z=0; foreach $campo_t(@tmp_down){ if(($campo_t eq "name") || ($campo_t eq "qName")){ $zRef=$z; } if(($campo_t eq "txStart") || ($campo_t eq "tStart") || ($campo_t eq "chromStart")){ $ztxStart=$z; } if(($campo_t eq "txEnd") || ($campo_t eq "tEnd") || ($campo_t eq "chromEnd")){ $ztxEnd=$z; } if($campo_t eq "strand"){ $zstrand=$z; } if(($campo_t eq "chrom") || ($campo_t eq "tName")){ $zchrom=$z; } if(($campo_t eq "exonStarts") || ($campo_t eq "tStarts")){ $zexonstart=$z; } if($campo_t eq "exonEnds"){ $zexonend=$z; } if($campo_t eq "blockSizes"){ $zblocksize=$z; } if($campo_t eq "name2"){ $zname=$z; } if(($campo_t eq "exonCount")||($campo_t eq "blockCount")){ $zcount=$z; } $z++; } if(!$zname){ $zname=$zRef; } if(!$zexonstart){ $zexonstart=$ztxStart; } if(!$zexonend){ $zexonend=$ztxEnd; } if(($zRef eq "") || ($ztxStart eq "") || ($zstrand eq "") || ($zchrom eq "")){ print "Annotation file is not in the accepted format\n"; exit; }else{print "Expression chip annotation";} next; } chomp $tmp_down[$zchrom]; $tab_ann{$tmp_down[$zchrom]}.="$line_down\n"; } while (defined (my $line_down = <INFILE>)) { my @tmp_down = split("\t", $line_down); chomp $tmp_down[0]; $tab_probe{$tmp_down[0]}.=$line_down; } @chrom_probes= keys(%tab_probe); &expression; exit 0; ########### #subrutine# ########### sub expression { foreach $chromosoma (@chrom_probes){ %gene_cen=""; @file2=split("\n", $tab_ann{$chromosoma}); foreach $linea(@file2) { chomp $linea; $linea=~ s/#//g; my @kEle=split(/\s+/, $linea); $ref=$kEle[$zRef]; $chrom=$kEle[$zchrom]; $strand=$kEle[$zstrand]; $transcriptStart=$kEle[$ztxStart]; $transcriptStop=$kEle[$ztxEnd]; if($zcount){ $exoncount=$kEle[$zcount]; } else { $exoncount=1; } $geneName=$kEle[$zname]; $exonStartref=$kEle[$zexonstart]; my @exonStart=split(",", $exonStartref); if (!$zblocksize){ $exonEndref=$kEle[$zexonend]; } else { @blockStop=split(",", $kEle[$zblocksize]); $exonEndref=""; for ($jj=0; $jj<=$#exonStart; $jj++){ $end_block=$exonStart[$jj]+$blockStop[$jj]; $exonEndref.="$end_block,"; } } my @exonStop=split(",", $exonEndref); #print @exonStart; @file1=split("\n",$tab_probe{$chromosoma}); foreach $line(@file1) { chomp $line; #chop $line; if ($line=~/track/g){next;} if ($line=~/#/g){next;} if ($line=~/^\s+$/g){next;} my @Line=split(/\t/, $line); my $Chrom=$Line[0]; my $Start=$Line[3]; my $Stop=$Line[4]; my $ProbeName=$Line[5]; my $feature="ciccio"; if ($Chrom eq $chrom) { if(($Start<=$transcriptStart && $Stop>$transcriptStart) || ($Start<$transcriptStop && $Stop>=$transcriptStop) || ($Start>=$transcriptStart && $Stop<=$transcriptStop) ){ #print "sono entrato con start $Start stop $Stop e $transcriptStart e $transcriptStop\n"; for($i=0;$i<=$#exonStart;$i++) { if ($strand eq "+"){ $exoncount1=$i+1; $exoncount2=$exoncount1; if($i==$#exonStart) {$exoncount2="last";} } if ($strand eq "-"){ $exoncount1=($#exonStart+1)-$i; $exoncount2=$exoncount1; if($i==0) {$exoncount2="last";} } if(($Start<=$exonStart[$i]) && ($i==0) && ($strand eq "+")){ $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tprom_exon $exoncount2\t$exoncount1"; $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; last; } if(($Start<=$exonStop[$i]) && ($i==$#exonStart) && ($strand eq "-") && ($Stop>=$exonStop[$i])){ $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tprom_exon $exoncount2\t$exoncount1"; $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; last; } if(($Start<=$exonStart[$i]) && ($Stop>$exonStart[$i])){ $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\t*intronexon $exoncount2\t$exoncount1"; $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; last; } if(($Start>=$exonStart[$i]) && ($Stop<=$exonStop[$i])){ $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\texon $exoncount2\t$exoncount1"; $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; last; } if(($Start<$exonStop[$i]) && ($Stop>$exonStop[$i])){ $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\t*exonintron $exoncount2\t$exoncount1"; $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; last; } if(($Start>=$exonStop[$i]) && ($Stop<=$exonStart[$i+1]) && ($check==0)){ $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tintron $exoncount2\t$exoncount1"; $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; last; } } } } } } foreach $nome (keys %gene_cen){ foreach $description (keys %{$gene_cen {$nome}}) { print OUTFILE1 "$nome\t$description\t$gene_cen{$nome}{$description}\n"; } } } close INFILE; close INFILE2; }