view carpet-src-1/tools/CARPET/annotation_expr_intron.pl @ 1:78770028dcf1 default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author matces
date Tue, 07 Jun 2011 16:59:33 -0400
parents cdd489d98766
children
line wrap: on
line source

#!/usr/bin/perl

# Copyright 2009 Matteo Cesaroni, Lucilla Luzi
#

# This program is free software; ; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.



$|=1;
my $infile = $ARGV[0];
my $infile2=$ARGV[1];
my $file_output=$ARGV[2];

open (INFILE, "<$infile");
open (INFILE2, "<$infile2");
open (OUTFILE1, ">$file_output") or die "Cannot find file $file_output\n";   

$campi_t=0;
while (defined (my $line_down = <INFILE2>)) {
		$line_down=~ s/\#//g;
		chomp($line_down);
		$campi_t++;
		my @tmp_down=split(/\s+/, $line_down);
		if($campi_t==1){
			$z=0;
			foreach $campo_t(@tmp_down){
				if(($campo_t eq "name") || ($campo_t eq "qName")){
					$zRef=$z;
				}
				if(($campo_t eq "txStart") || ($campo_t eq "tStart") || ($campo_t eq "chromStart")){
					$ztxStart=$z;
				}
				if(($campo_t eq "txEnd") || ($campo_t eq "tEnd") || ($campo_t eq "chromEnd")){
					$ztxEnd=$z;
				}
				if($campo_t eq "strand"){
					$zstrand=$z;
				}
				if(($campo_t eq "chrom") || ($campo_t eq "tName")){
					$zchrom=$z;
				}
				if(($campo_t eq "exonStarts") || ($campo_t eq "tStarts")){
					$zexonstart=$z;
				}
				if($campo_t eq "exonEnds"){
					$zexonend=$z;
				}
				if($campo_t eq "blockSizes"){
					$zblocksize=$z;
				}
				if($campo_t eq "name2"){
					$zname=$z;
				}
				if(($campo_t eq "exonCount")||($campo_t eq "blockCount")){
					$zcount=$z;
				}
				$z++;
			}
			if(!$zname){
				$zname=$zRef;
			}
			if(!$zexonstart){
				$zexonstart=$ztxStart;
			}
			if(!$zexonend){
				$zexonend=$ztxEnd;
			}
			if(($zRef eq "") || ($ztxStart eq "") || ($zstrand eq "") || ($zchrom eq "")){
				print "Annotation file is not in the accepted format\n";
				exit;
			}else{print "Expression chip annotation";}
		next;
		} 
   		chomp $tmp_down[$zchrom];
   		$tab_ann{$tmp_down[$zchrom]}.="$line_down\n";
}

while (defined (my $line_down = <INFILE>)) {
   		my @tmp_down = split("\t", $line_down);  
   		chomp $tmp_down[0];
   		$tab_probe{$tmp_down[0]}.=$line_down; 
}

@chrom_probes= keys(%tab_probe); 

&expression;


exit 0;


###########
#subrutine#
###########

sub expression
{
foreach $chromosoma (@chrom_probes){
	%gene_cen="";
	@file2=split("\n", $tab_ann{$chromosoma});
	foreach $linea(@file2) {
		chomp $linea;
		$linea=~ s/#//g;
		my @kEle=split(/\s+/, $linea);
		$ref=$kEle[$zRef];
		$chrom=$kEle[$zchrom];
		$strand=$kEle[$zstrand];
		$transcriptStart=$kEle[$ztxStart];
		$transcriptStop=$kEle[$ztxEnd];
		if($zcount){
			$exoncount=$kEle[$zcount];
		}
		else
		{
			$exoncount=1;
		}
		$geneName=$kEle[$zname];
		$exonStartref=$kEle[$zexonstart];
		
		my @exonStart=split(",", $exonStartref);
		
		if (!$zblocksize){
			$exonEndref=$kEle[$zexonend];
		}
		else {
			@blockStop=split(",", $kEle[$zblocksize]);
			$exonEndref="";
			for ($jj=0; $jj<=$#exonStart; $jj++){
				$end_block=$exonStart[$jj]+$blockStop[$jj];
				$exonEndref.="$end_block,";
			
			}	
		}
		
		my @exonStop=split(",", $exonEndref);	
		
		#print @exonStart;
		
		@file1=split("\n",$tab_probe{$chromosoma});
		
		foreach $line(@file1) {
			chomp $line;
			#chop $line;
			if ($line=~/track/g){next;}
    		if ($line=~/#/g){next;}
    		if ($line=~/^\s+$/g){next;}
			my @Line=split(/\t/, $line);
			my $Chrom=$Line[0];
			my $Start=$Line[3];
			my $Stop=$Line[4];
			my $ProbeName=$Line[5];
			my $feature="ciccio";
			if ($Chrom eq $chrom)  {
				if(($Start<=$transcriptStart && $Stop>$transcriptStart) || ($Start<$transcriptStop && $Stop>=$transcriptStop) || ($Start>=$transcriptStart && $Stop<=$transcriptStop) ){
				#print "sono entrato con start $Start stop $Stop e $transcriptStart e $transcriptStop\n";
				
					for($i=0;$i<=$#exonStart;$i++) {
						if ($strand eq "+"){
							$exoncount1=$i+1;
							$exoncount2=$exoncount1;
							if($i==$#exonStart) {$exoncount2="last";}
						}
						if ($strand eq "-"){
							$exoncount1=($#exonStart+1)-$i;
							$exoncount2=$exoncount1;
							if($i==0) {$exoncount2="last";}
						}
						
						if(($Start<=$exonStart[$i])  && ($i==0) && ($strand eq "+")){
							$feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tprom_exon $exoncount2\t$exoncount1";
							$gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
							last;
						}
						if(($Start<=$exonStop[$i])  && ($i==$#exonStart) && ($strand eq "-") && ($Stop>=$exonStop[$i])){
							$feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tprom_exon $exoncount2\t$exoncount1";
							$gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
							last;
						}
						if(($Start<=$exonStart[$i]) && ($Stop>$exonStart[$i])){
							$feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\t*intronexon $exoncount2\t$exoncount1";
							$gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
							last;
						}
						if(($Start>=$exonStart[$i]) && ($Stop<=$exonStop[$i])){
							$feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\texon $exoncount2\t$exoncount1";
							$gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
							last;
						}
						if(($Start<$exonStop[$i]) && ($Stop>$exonStop[$i])){
							$feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\t*exonintron $exoncount2\t$exoncount1";
							$gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
							last;
						}
						if(($Start>=$exonStop[$i]) && ($Stop<=$exonStart[$i+1]) && ($check==0)){
							$feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tintron $exoncount2\t$exoncount1";
							$gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
							last;
						}
						
					}
			
			
				}
			
			}
			
		}	
	}			
	
foreach $nome (keys %gene_cen){
	foreach $description (keys %{$gene_cen {$nome}}) {
		print OUTFILE1 "$nome\t$description\t$gene_cen{$nome}{$description}\n";
	
	}

}
}
close INFILE;
close INFILE2;						
}