Mercurial > repos > mkhan1980 > ctcf_analysis
view check.pl @ 5:1c09a4a56459 draft
Uploaded
author | mkhan1980 |
---|---|
date | Thu, 25 Apr 2013 11:20:08 -0400 |
parents | ebad609b8a6d |
children |
line wrap: on
line source
#!/usr/bin/perl -w use strict; # Define variables my @temp=(); my $result1; my $result2; my $result3; my $result4; my $result5; my $result6; my $resultfinal; my $count; my $coun; my $cou; my @digit=(); my $digit; my $marks; my $log; my $coll; my @scorearray=(); my $scorearray; my $percent; my $kount; my @result=(); my $result; my %final=(); my $final; my @c=(); my @matrix1; my @matrix2; my $matrix1; my $matrix2; $coll=0; my $count2; my $var; my $entry1; my $entry2; my $reventry1; my $reventry2; my $revvar; my @revmatrix1; my $revkount; my $revcoun; my $revcount2; my @revtemp; my $revcoll; my @revdigit; my $revdigit; my $revmarks; my $revresult1; my $revresult2; my $revresult3; my $revresult4; my $revresult5; my $revresult6; my $revresultfinal; my @revscorearray; my $revscorearray; #define variables from configuration file open (IN, "<$ARGV[0]"); open (IN2, "<$ARGV[1]"); open (OUT, ">$ARGV[2]"); #assign arrays to variables from configuration file my @array5=<IN>; my @coordinates=<IN2>; #split the chromosome number and starting position from coordinates file into 2 separate strings foreach my $coordinates(@coordinates) { chomp($coordinates); my @coordinates2=split(/\s+/, $coordinates); my $coordinates2; $entry1=$coordinates2[0]; $entry2=$coordinates2[1]; } print OUT "CTCF Site", "\t", "Chromosome no.", "\t", "Start", "\t", "End", "\t", "Score", "\t", "Strand", "\n"; chomp (@array5); my $digits=join("", @array5); my @yeslap = $digits =~ /(?=(\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w))/g; $var = "@yeslap\n"; @matrix1=qw/87.25 291.25 76.25 459.25 167.25 145.25 414.25 187.25 281.25 49.25 449.25 134.25 56.25 800.25 21.25 36.25 8.25 903.25 0.25 2.25 744.25 13.25 65.25 91.25 40.25 528.25 334.25 11.25 107.25 433.25 48.25 324.25 851.25 11.25 32.25 18.25 5.25 0.25 903.25 3.25 333.25 3.25 566.25 9.25 54.25 12.25 504.25 341.25 12.25 0.25 890.25 8.25 56.25 8.25 775.25 71.25 104.25 733.25 5.25 67.25 372.25 13.25 507.25 17.25 82.25 482.25 307.25 37.25 117.25 322.25 73.25 396.25 402.25 181.25 266.25 59.25/; $kount=0; $coun=0; # Define the pattern for CTCF. Because of pseudocount, a wildcard is allowed at #each position. my $pattern = "[ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN]"; # Compare the pattern with the 19 bp nucleotide segments. while($var =~ m/$pattern/gi) { $coun++; $count2++; my $endpos = pos $var; # Get the starting and ending positions of the matched pattern. my $startpos=($endpos+1)-19; my $lastpos=$endpos; my $consensus = substr($var, ($startpos-1), 19); push(@temp, $consensus, $startpos, $lastpos); $coll=0; $kount++; # Split the matched pattern into 19 single bases. @digit = split(//, $consensus); # For each base, if the base is A, calculate the weight score of A according to #its frequency in the CTCF Position Frequency Matrix. foreach $digit (@digit) { if($digit =~ m/A/) { my $ref = \@matrix1; $marks = @{$ref}[$coll]; $result1 = sqrt(914); $result2 = $result1*0.3; $result3 = $result2+$marks; $result4 = sqrt(914); $result5 = $result4+914; $result6 = 0.3; $resultfinal = log($result3/$result5/$result6)/log(2); push(@scorearray, $resultfinal); } if($digit =~ m/C/) { my $ref = \@matrix1; $marks = @{$ref}[$coll + 1]; $result1 = sqrt(914); $result2 = $result1*0.2; $result3 = $result2+$marks; $result4 = sqrt(914); $result5 = $result4+914; $result6 = 0.2; $resultfinal = log($result3/$result5/$result6)/log(2); push(@scorearray, $resultfinal); } if($digit =~ m/G/) { my $ref = \@matrix1; $marks = @{$ref}[$coll+2]; $result1 = sqrt(914); $result2 = $result1*0.2; $result3 = $result2+$marks; $result4 = sqrt(914); $result5 = $result4+914; $result6 = 0.2; $resultfinal = log($result3/$result5/$result6)/log(2); push(@scorearray, $resultfinal); } if($digit =~ m/T/) { my $ref = \@matrix1; $marks = @{$ref}[$coll+3]; $result1 = sqrt(914); $result2 = $result1*0.3; $result3 = $result2+$marks; $result4 = sqrt(914); $result5 = $result4+914; $result6 = 0.3; $resultfinal = log($result3/$result5/$result6)/log(2); push(@scorearray, $resultfinal); } $coll=$coll + 4; } @digit=(); my $tem=0; foreach $scorearray(@scorearray) { $tem = $tem + $scorearray; } @scorearray = (); my $fpercent = $tem; if ($fpercent >= 18) { print OUT $consensus, "\t", $entry1, "\t", $count2 + $entry2, "\t", $count2 + $entry2 + 18, "\t", "$fpercent", "\t", "+", "\n"; } } close ( OUT ); close ( IN ); close ( IN2 );