diff carpet-src-1/tools/CARPET/annotation_expr_intron.pl @ 0:cdd489d98766

Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author matces
date Tue, 07 Jun 2011 16:50:41 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/carpet-src-1/tools/CARPET/annotation_expr_intron.pl	Tue Jun 07 16:50:41 2011 -0400
@@ -0,0 +1,232 @@
+#!/usr/bin/perl
+
+# Copyright 2009 Matteo Cesaroni, Lucilla Luzi
+#
+
+# This program is free software; ; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+
+
+$|=1;
+my $infile = $ARGV[0];
+my $infile2=$ARGV[1];
+my $file_output=$ARGV[2];
+
+open (INFILE, "<$infile");
+open (INFILE2, "<$infile2");
+open (OUTFILE1, ">$file_output") or die "Cannot find file $file_output\n";   
+
+$campi_t=0;
+while (defined (my $line_down = <INFILE2>)) {
+		$line_down=~ s/\#//g;
+		chomp($line_down);
+		$campi_t++;
+		my @tmp_down=split(/\s+/, $line_down);
+		if($campi_t==1){
+			$z=0;
+			foreach $campo_t(@tmp_down){
+				if(($campo_t eq "name") || ($campo_t eq "qName")){
+					$zRef=$z;
+				}
+				if(($campo_t eq "txStart") || ($campo_t eq "tStart") || ($campo_t eq "chromStart")){
+					$ztxStart=$z;
+				}
+				if(($campo_t eq "txEnd") || ($campo_t eq "tEnd") || ($campo_t eq "chromEnd")){
+					$ztxEnd=$z;
+				}
+				if($campo_t eq "strand"){
+					$zstrand=$z;
+				}
+				if(($campo_t eq "chrom") || ($campo_t eq "tName")){
+					$zchrom=$z;
+				}
+				if(($campo_t eq "exonStarts") || ($campo_t eq "tStarts")){
+					$zexonstart=$z;
+				}
+				if($campo_t eq "exonEnds"){
+					$zexonend=$z;
+				}
+				if($campo_t eq "blockSizes"){
+					$zblocksize=$z;
+				}
+				if($campo_t eq "name2"){
+					$zname=$z;
+				}
+				if(($campo_t eq "exonCount")||($campo_t eq "blockCount")){
+					$zcount=$z;
+				}
+				$z++;
+			}
+			if(!$zname){
+				$zname=$zRef;
+			}
+			if(!$zexonstart){
+				$zexonstart=$ztxStart;
+			}
+			if(!$zexonend){
+				$zexonend=$ztxEnd;
+			}
+			if(($zRef eq "") || ($ztxStart eq "") || ($zstrand eq "") || ($zchrom eq "")){
+				print "Annotation file is not in the accepted format\n";
+				exit;
+			}else{print "Expression chip annotation";}
+		next;
+		} 
+   		chomp $tmp_down[$zchrom];
+   		$tab_ann{$tmp_down[$zchrom]}.="$line_down\n";
+}
+
+while (defined (my $line_down = <INFILE>)) {
+   		my @tmp_down = split("\t", $line_down);  
+   		chomp $tmp_down[0];
+   		$tab_probe{$tmp_down[0]}.=$line_down; 
+}
+
+@chrom_probes= keys(%tab_probe); 
+
+&expression;
+
+
+exit 0;
+
+
+###########
+#subrutine#
+###########
+
+sub expression
+{
+foreach $chromosoma (@chrom_probes){
+	%gene_cen="";
+	@file2=split("\n", $tab_ann{$chromosoma});
+	foreach $linea(@file2) {
+		chomp $linea;
+		$linea=~ s/#//g;
+		my @kEle=split(/\s+/, $linea);
+		$ref=$kEle[$zRef];
+		$chrom=$kEle[$zchrom];
+		$strand=$kEle[$zstrand];
+		$transcriptStart=$kEle[$ztxStart];
+		$transcriptStop=$kEle[$ztxEnd];
+		if($zcount){
+			$exoncount=$kEle[$zcount];
+		}
+		else
+		{
+			$exoncount=1;
+		}
+		$geneName=$kEle[$zname];
+		$exonStartref=$kEle[$zexonstart];
+		
+		my @exonStart=split(",", $exonStartref);
+		
+		if (!$zblocksize){
+			$exonEndref=$kEle[$zexonend];
+		}
+		else {
+			@blockStop=split(",", $kEle[$zblocksize]);
+			$exonEndref="";
+			for ($jj=0; $jj<=$#exonStart; $jj++){
+				$end_block=$exonStart[$jj]+$blockStop[$jj];
+				$exonEndref.="$end_block,";
+			
+			}	
+		}
+		
+		my @exonStop=split(",", $exonEndref);	
+		
+		#print @exonStart;
+		
+		@file1=split("\n",$tab_probe{$chromosoma});
+		
+		foreach $line(@file1) {
+			chomp $line;
+			#chop $line;
+			if ($line=~/track/g){next;}
+    		if ($line=~/#/g){next;}
+    		if ($line=~/^\s+$/g){next;}
+			my @Line=split(/\t/, $line);
+			my $Chrom=$Line[0];
+			my $Start=$Line[3];
+			my $Stop=$Line[4];
+			my $ProbeName=$Line[5];
+			my $feature="ciccio";
+			if ($Chrom eq $chrom)  {
+				if(($Start<=$transcriptStart && $Stop>$transcriptStart) || ($Start<$transcriptStop && $Stop>=$transcriptStop) || ($Start>=$transcriptStart && $Stop<=$transcriptStop) ){
+				#print "sono entrato con start $Start stop $Stop e $transcriptStart e $transcriptStop\n";
+				
+					for($i=0;$i<=$#exonStart;$i++) {
+						if ($strand eq "+"){
+							$exoncount1=$i+1;
+							$exoncount2=$exoncount1;
+							if($i==$#exonStart) {$exoncount2="last";}
+						}
+						if ($strand eq "-"){
+							$exoncount1=($#exonStart+1)-$i;
+							$exoncount2=$exoncount1;
+							if($i==0) {$exoncount2="last";}
+						}
+						
+						if(($Start<=$exonStart[$i])  && ($i==0) && ($strand eq "+")){
+							$feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tprom_exon $exoncount2\t$exoncount1";
+							$gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
+							last;
+						}
+						if(($Start<=$exonStop[$i])  && ($i==$#exonStart) && ($strand eq "-") && ($Stop>=$exonStop[$i])){
+							$feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tprom_exon $exoncount2\t$exoncount1";
+							$gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
+							last;
+						}
+						if(($Start<=$exonStart[$i]) && ($Stop>$exonStart[$i])){
+							$feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\t*intronexon $exoncount2\t$exoncount1";
+							$gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
+							last;
+						}
+						if(($Start>=$exonStart[$i]) && ($Stop<=$exonStop[$i])){
+							$feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\texon $exoncount2\t$exoncount1";
+							$gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
+							last;
+						}
+						if(($Start<$exonStop[$i]) && ($Stop>$exonStop[$i])){
+							$feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\t*exonintron $exoncount2\t$exoncount1";
+							$gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
+							last;
+						}
+						if(($Start>=$exonStop[$i]) && ($Stop<=$exonStart[$i+1]) && ($check==0)){
+							$feature="$chrom\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tintron $exoncount2\t$exoncount1";
+							$gene_cen{"$ref\t$geneName"}{$feature}.="$Chrom-$Start,";
+							last;
+						}
+						
+					}
+			
+			
+				}
+			
+			}
+			
+		}	
+	}			
+	
+foreach $nome (keys %gene_cen){
+	foreach $description (keys %{$gene_cen {$nome}}) {
+		print OUTFILE1 "$nome\t$description\t$gene_cen{$nome}{$description}\n";
+	
+	}
+
+}
+}
+close INFILE;
+close INFILE2;						
+}			
+			
+			
+