Mercurial > repos > matces > carpet_toolsuite
diff carpet-src-1/tools/CARPET/annotation_expr_intron.pl @ 0:cdd489d98766
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author | matces |
---|---|
date | Tue, 07 Jun 2011 16:50:41 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/carpet-src-1/tools/CARPET/annotation_expr_intron.pl Tue Jun 07 16:50:41 2011 -0400 @@ -0,0 +1,232 @@ +#!/usr/bin/perl + +# Copyright 2009 Matteo Cesaroni, Lucilla Luzi +# + +# This program is free software; ; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + + + +$|=1; +my $infile = $ARGV[0]; +my $infile2=$ARGV[1]; +my $file_output=$ARGV[2]; + +open (INFILE, "<$infile"); +open (INFILE2, "<$infile2"); +open (OUTFILE1, ">$file_output") or die "Cannot find file $file_output\n"; + +$campi_t=0; +while (defined (my $line_down = <INFILE2>)) { + $line_down=~ s/\#//g; + chomp($line_down); + $campi_t++; + my @tmp_down=split(/\s+/, $line_down); + if($campi_t==1){ + $z=0; + foreach $campo_t(@tmp_down){ + if(($campo_t eq "name") || ($campo_t eq "qName")){ + $zRef=$z; + } + if(($campo_t eq "txStart") || ($campo_t eq "tStart") || ($campo_t eq "chromStart")){ + $ztxStart=$z; + } + if(($campo_t eq "txEnd") || ($campo_t eq "tEnd") || ($campo_t eq "chromEnd")){ + $ztxEnd=$z; + } + if($campo_t eq "strand"){ + $zstrand=$z; + } + if(($campo_t eq "chrom") || ($campo_t eq "tName")){ + $zchrom=$z; + } + if(($campo_t eq "exonStarts") || ($campo_t eq "tStarts")){ + $zexonstart=$z; + } + if($campo_t eq "exonEnds"){ + $zexonend=$z; + } + if($campo_t eq "blockSizes"){ + $zblocksize=$z; + } + if($campo_t eq "name2"){ + $zname=$z; + } + if(($campo_t eq "exonCount")||($campo_t eq "blockCount")){ + $zcount=$z; + } + $z++; + } + if(!$zname){ + $zname=$zRef; + } + if(!$zexonstart){ + $zexonstart=$ztxStart; + } + if(!$zexonend){ + $zexonend=$ztxEnd; + } + if(($zRef eq "") || ($ztxStart eq "") || ($zstrand eq "") || ($zchrom eq "")){ + print "Annotation file is not in the accepted format\n"; + exit; + }else{print "Expression chip annotation";} + next; + } + chomp $tmp_down[$zchrom]; + $tab_ann{$tmp_down[$zchrom]}.="$line_down\n"; +} + +while (defined (my $line_down = <INFILE>)) { + my @tmp_down = split("\t", $line_down); + chomp $tmp_down[0]; + $tab_probe{$tmp_down[0]}.=$line_down; +} + +@chrom_probes= keys(%tab_probe); + +&expression; + + +exit 0; + + +########### +#subrutine# +########### + +sub expression +{ +foreach $chromosoma (@chrom_probes){ + %gene_cen=""; + @file2=split("\n", $tab_ann{$chromosoma}); + foreach $linea(@file2) { + chomp $linea; + $linea=~ s/#//g; + my @kEle=split(/\s+/, $linea); + $ref=$kEle[$zRef]; + $chrom=$kEle[$zchrom]; + $strand=$kEle[$zstrand]; + $transcriptStart=$kEle[$ztxStart]; + $transcriptStop=$kEle[$ztxEnd]; + if($zcount){ + $exoncount=$kEle[$zcount]; + } + else + { + $exoncount=1; + } + $geneName=$kEle[$zname]; + $exonStartref=$kEle[$zexonstart]; + + my @exonStart=split(",", $exonStartref); + + if (!$zblocksize){ + $exonEndref=$kEle[$zexonend]; + } + else { + @blockStop=split(",", $kEle[$zblocksize]); + $exonEndref=""; + for ($jj=0; $jj<=$#exonStart; $jj++){ + $end_block=$exonStart[$jj]+$blockStop[$jj]; + $exonEndref.="$end_block,"; + + } + } + + my @exonStop=split(",", $exonEndref); + + #print @exonStart; + + @file1=split("\n",$tab_probe{$chromosoma}); + + foreach $line(@file1) { + chomp $line; + #chop $line; + if ($line=~/track/g){next;} + if ($line=~/#/g){next;} + if ($line=~/^\s+$/g){next;} + my @Line=split(/\t/, $line); + my $Chrom=$Line[0]; + my $Start=$Line[3]; + my $Stop=$Line[4]; + my $ProbeName=$Line[5]; + my $feature="ciccio"; + if ($Chrom eq $chrom) { + if(($Start<=$transcriptStart && $Stop>$transcriptStart) || ($Start<$transcriptStop && $Stop>=$transcriptStop) || ($Start>=$transcriptStart && $Stop<=$transcriptStop) ){ + #print "sono entrato con start $Start stop $Stop e $transcriptStart e $transcriptStop\n"; + + for($i=0;$i<=$#exonStart;$i++) { + if ($strand eq "+"){ + $exoncount1=$i+1; + $exoncount2=$exoncount1; + if($i==$#exonStart) {$exoncount2="last";} + } + if ($strand eq "-"){ + $exoncount1=($#exonStart+1)-$i; + $exoncount2=$exoncount1; + if($i==0) {$exoncount2="last";} + } + + if(($Start<=$exonStart[$i]) && ($i==0) && ($strand eq "+")){ + $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tprom_exon $exoncount2\t$exoncount1"; + $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; + last; + } + if(($Start<=$exonStop[$i]) && ($i==$#exonStart) && ($strand eq "-") && ($Stop>=$exonStop[$i])){ + $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tprom_exon $exoncount2\t$exoncount1"; + $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; + last; + } + if(($Start<=$exonStart[$i]) && ($Stop>$exonStart[$i])){ + $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\t*intronexon $exoncount2\t$exoncount1"; + $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; + last; + } + if(($Start>=$exonStart[$i]) && ($Stop<=$exonStop[$i])){ + $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\texon $exoncount2\t$exoncount1"; + $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; + last; + } + if(($Start<$exonStop[$i]) && ($Stop>$exonStop[$i])){ + $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\t*exonintron $exoncount2\t$exoncount1"; + $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; + last; + } + if(($Start>=$exonStop[$i]) && ($Stop<=$exonStart[$i+1]) && ($check==0)){ + $feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tintron $exoncount2\t$exoncount1"; + $gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,"; + last; + } + + } + + + } + + } + + } + } + +foreach $nome (keys %gene_cen){ + foreach $description (keys %{$gene_cen {$nome}}) { + print OUTFILE1 "$nome\t$description\t$gene_cen{$nome}{$description}\n"; + + } + +} +} +close INFILE; +close INFILE2; +} + + +