Mercurial > repos > matces > carpet_toolsuite
view carpet-src-1/tools/CARPET/MapToExon_RefSeqMat_new.pl @ 1:78770028dcf1 default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | matces |
---|---|
date | Tue, 07 Jun 2011 16:59:33 -0400 |
parents | cdd489d98766 |
children |
line wrap: on
line source
#!/usr/bin/perl # Copyright 2009 Matteo Cesaroni, Lucilla Luzi # # This program is free software; ; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. $|=1; my $infile = $ARGV[0]; my $infile2=$ARGV[1]; my $definition_pro=$ARGV[2]; my $definition_tre=$ARGV[3]; my $priority=$ARGV[4]; my $file_output=$ARGV[5]; open (INFILE, "<$infile"); open (INFILE2, "<$infile2"); open (OUTFILE1, ">$file_output") or die "Cannot find file $file_output\n"; $campi_t=0; $definition_out=$definition_pro-2000; while (defined (my $line_down = <INFILE2>)) { $line_down=~ s/\#//g; chomp($line_down); $campi_t++; my @tmp_down=split(/\s+/, $line_down); if($campi_t==1){ $z=0; foreach $campo_t(@tmp_down){ if(($campo_t eq "name") || ($campo_t eq "qName") || ($campo_t eq "repClass")){ $zRef=$z; } if(($campo_t eq "txStart") || ($campo_t eq "tStart") || ($campo_t eq "chromStart")|| ($campo_t eq "genoStart")){ $ztxStart=$z; } if(($campo_t eq "txEnd") || ($campo_t eq "tEnd") || ($campo_t eq "chromEnd")|| ($campo_t eq "genoEnd")){ $ztxEnd=$z; } if($campo_t eq "strand"){ $zstrand=$z; } if(($campo_t eq "chrom") || ($campo_t eq "tName")|| ($campo_t eq "genoName")){ $zchrom=$z; } if(($campo_t eq "exonStarts") || ($campo_t eq "tStarts")){ $zexonstart=$z; } if($campo_t eq "exonEnds"){ $zexonend=$z; } if($campo_t eq "blockSizes"){ $zblocksize=$z; } if(($campo_t eq "name2") || ($campo_t eq "repFamily")){ $zname=$z; } if(($campo_t eq "exonCount")||($campo_t eq "blockCount")){ $zcount=$z; } $z++; } if(!$zname){ $zname=$zRef; } if(!$zexonstart){ $zexonstart=$ztxStart; } if(!$zexonend){ $zexonend=$ztxEnd; } if(($zRef eq "") || ($ztxStart eq "") || ($zstrand eq "") || ($zchrom eq "")){ print "Annotation file is not in the accepted format\n"; exit; }else{print "promoter=$definition_pro, priority=$priority";} next; } chomp $tmp_down[$zchrom]; $tab_ann{$tmp_down[$zchrom]}.="$line_down\n"; } while (defined (my $line_down = <INFILE>)) { my @tmp_down = split("\t", $line_down); chomp $tmp_down[0]; $tab_probe{$tmp_down[0]}.=$line_down; } @chrom_probes= keys(%tab_probe); &chip; exit 0; ########### #subrutine# ########### sub chip { foreach $chromosoma (@chrom_probes){ @file1=split("\n", $tab_probe{$chromosoma}); foreach $line(@file1) { chomp $line; #chop $line; if ($line=~/track/g){next;} if ($line=~/#/g){next;} if ($line=~/^\s+$/g){next;} my @Line=split(/\t/, $line); my $Chrom=$Line[0]; my $Start=$Line[3]; my $Stop=$Line[4]; #my $value=$Line[5]; my $ProbeName=$Line[5]; my $feature="ciccio"; my $check=0; my $relative_dist=10000000; $double_check=0; @file2=split("\n", $tab_ann{$chromosoma}); foreach $linea(@file2) { chomp $linea; $linea=~ s/\#//g; my @kEle=split("\t", $linea); $ref=$kEle[$zRef]; $chrom=$kEle[$zchrom]; $strand=$kEle[$zstrand]; $transcriptStart=$kEle[$ztxStart]; $transcriptStop=$kEle[$ztxEnd]; if($zcount){ $exoncount=$kEle[$zcount]; } else { $exoncount=1; } $geneName=$kEle[$zname]; $exonStartref=$kEle[$zexonstart]; #my $feature="$ref\t$geneName\t$transcriptStart\t$transcriptStop\t$strand"; my @exonStart=split(",", $exonStartref); if (!$zblocksize){ $exonEndref=$kEle[$zexonend]; } else { @blockStop=split(",", $kEle[$zblocksize]); $exonEndref=""; for ($jj=0; $jj<=$#exonStart; $jj++){ $end_block=$exonStart[$jj]+$blockStop[$jj]; $exonEndref.="$end_block,"; } } my @exonStop=split(",", $exonEndref); #print "$ref / $chrom / $strand / $transcriptStart/$transcriptStop/$exoncount/$geneName [$#exonStop]\n"; #print "pippo $exonStart[0] - $exonStop[0],$exonStart[1] - $exonStop[1],$exonStart[2] - $exonStop[2] \n"; if ($Chrom eq $chrom) { #print "cazzo"; if ($strand eq "+"){ $promotore=$transcriptStart+$definition_pro; $distanzaTSS=int((($Start+$Stop)/2)-$transcriptStart); $trepr=$transcriptStop+$definition_tre; $rel_start=$promotore; $rel_stop=$trepr; } if ($strand eq "-"){ $promotore=$transcriptStop-$definition_pro; $distanzaTSS=int($transcriptStop-(($Start+$Stop)/2)); $trepr=$transcriptStart-$definition_tre; $rel_start=$trepr; $rel_stop=$promotore; } #print OUTFILE1 "$ref\t$distanzaTSS\n"; #if(($Start<=$transcriptStart && $Stop>$transcriptStart) || ($Start>=$promotore && $Stop<=$transcriptStart) || ($Start>=$transcriptStop && $Stop<=$promotore) || ($Start<=$promotore && $Stop>$promotore) || ($Start<$transcriptStop && $Stop>=$transcriptStop) || ($Start>=$transcriptStart && $Stop<=$transcriptStop) ){ #print "sono entrato con start $Start stop $Stop e $transcriptStart e $transcriptStop\n"; if($Start<=$rel_stop && $rel_start<=$Stop){ for(my $i=0;$i<=$#exonStart;$i++) { if ($strand eq "+"){ $exoncount1=$i+1; $exoncount2=$exoncount1; $introncount=$exoncount1; if($i==$#exonStart) { $exoncount2="last"; $introncount="last"; } if($i==$#exonStart-1) { $introncount="last"; } } if ($strand eq "-"){ $exoncount1=($#exonStart+1)-$i; $exoncount2=$exoncount1; $introncount=$exoncount1-1; #if(($i==0) && ($#exonStart != 0)) {$exoncount2="last";} if($i==0) { $exoncount2="last"; $introncount="last"; } } #print "esone start $exonStart[$i] e $exonStop[$i] e start $Start\n"; #print OUTFILE1 "$ref\t$exonStart[$i]\t$exonStop[$i]\t$Start\t$Stop\t$i\t$#exonStart\t$priority\n"; #if($priority eq "prom" && $check==1){ # last; #} #if($priority eq "gene" && $check==2){ # next; #} if(($Start<=$exonStart[$i]) && ($Stop>$exonStart[$i])){ $feature="$ref\t$geneName\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tintronexon $exoncount2\t$distanzaTSS"; if($priority eq "prom"){ $check=2; } else{ $check=1; #last; } } if(($Start>=$exonStart[$i]) && ($Stop<=$exonStop[$i])){ $feature="$ref\t$geneName\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\texon $exoncount2\t$distanzaTSS"; if($priority eq "prom"){ $check=2; } else{ $check=1; #last; } } if(($Start<$exonStop[$i]) && ($Stop>$exonStop[$i])){ $feature="$ref\t$geneName\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\texonintron $introncount\t$distanzaTSS"; if($priority eq "prom"){ $check=2; } else{ $check=1; #last; } } if($priority eq "prom"){ if(($Start>=$exonStop[$i]) && ($Stop<=$exonStart[$i+1]) && ($check==0)){ $feature="$ref\t$geneName\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tintron $introncount\t$distanzaTSS"; #print "intron\n"; $check=2; } } else{ if(($Start>=$exonStop[$i]) && ($Stop<=$exonStart[$i+1])){ $feature="$ref\t$geneName\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tintron $introncount\t$distanzaTSS"; #print "intron\n"; $check=2; } } if (($strand eq "+") && ($Start>=$transcriptStop)) { $feature="$ref\t$geneName\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\t3_prime\t$distanzaTSS"; $distanzaTSS=int($transcriptStop-(($Start+$Stop)/2)); if($priority eq "prom"){ $check=2; #last; } else{$check=1;} } if (($strand eq "-") && ($Stop<=$transcriptStart)) { $feature="$ref\t$geneName\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\t3_prime\t$distanzaTSS"; $distanzaTSS=int((($Start+$Stop)/2)-$transcriptStart); if($priority eq "prom"){ $check=2; #last; } else{$check=1;} } if (($strand eq "+") && (($Start<=$promotore && $Stop>$promotore) || ($Start>$promotore && $Stop<=$transcriptStart))) { $feature="$ref\t$geneName\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tpromoter\t$distanzaTSS"; if($priority eq "prom"){ $check=1; #last; } else{$check=2;} } if (($strand eq "-") && (($Start<=$promotore && $Stop>=$promotore) || ($Start>=$transcriptStop && $Stop<$promotore))) { $feature="$ref\t$geneName\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tpromoter\t$distanzaTSS"; if($priority eq "prom"){ $check=1; #last; } else{$check=2;} } if(($Start<=$exonStart[$i]) && ($i==0) && ($strand eq "+") && ($Stop>=$exonStart[$i])){ $feature="$ref\t$geneName\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tprom_exon $exoncount2\t$distanzaTSS"; #print "prom-exon e $exoncount1\n"; if($priority eq "prom"){ $check=1; #last; } else{$check=2;} } if(($Start<=$exonStop[$i]) && ($i==$#exonStart) && ($strand eq "-") && ($Stop>=$exonStop[$i])){ $feature="$ref\t$geneName\t$transcriptStart\t$transcriptStop\t$strand\t$exoncount\tprom_exon $exoncount2\t$distanzaTSS"; if($priority eq "prom"){ $check=1; #last; } else{$check=2;} } } #if ($check==1){ # print "$chrom\t$Start\t$Stop\t$geneName\t$exoncount\t$ref\t$strand\t$feature\n"; #} }else{next;} #exit; }else{next;} #print OUTFILE1 "$ref\t$feature\n"; #print "$geneName\t$transcriptStart\t$Chrom\t$Start\t$Stop\t$distanzaTSS\t$feature\n"; if (($priority eq "gene") && ($relative_dist>abs($distanzaTSS))){ $relative_dist=abs($distanzaTSS); $stampa="$Chrom\t$Start\t$Stop\t$ProbeName\t$feature\n"; $double_check=1; } if (($check==1) && ($priority eq "prom") && ($relative_dist>abs($distanzaTSS))){ $relative_dist=abs($distanzaTSS); $double_check=1; $stampa="$Chrom\t$Start\t$Stop\t$ProbeName\t$feature\n"; } } if($double_check==1){ print OUTFILE1 "$stampa"; next; } if (($check==2) && ($priority eq "prom")){ print OUTFILE1 "$Chrom\t$Start\t$Stop\t$ProbeName\t$feature\n"; next; } if($check==0){ print OUTFILE1 "$Chrom\t$Start\t$Stop\t$ProbeName\tintergenic\tintergenic\t$Start\t$Stop\t+\t0\tOUT\t$definition_out\n"; next; } } } close INFILE; close INFILE2; }