# HG changeset patch # User mkhan1980 # Date 1362396966 18000 # Node ID ebad609b8a6d7c0032a0b0798f758f42eef309fe Uploaded diff -r 000000000000 -r ebad609b8a6d check.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/check.pl Mon Mar 04 06:36:06 2013 -0500 @@ -0,0 +1,274 @@ + #!/usr/bin/perl -w +use strict; + +# Define variables +my @temp=(); +my $result1; +my $result2; +my $result3; +my $result4; +my $result5; +my $result6; +my $resultfinal; +my $count; +my $coun; +my $cou; +my @digit=(); +my $digit; +my $marks; +my $log; +my $coll; +my @scorearray=(); +my $scorearray; +my $percent; +my $kount; +my @result=(); +my $result; +my %final=(); +my $final; +my @c=(); +my @matrix1; +my @matrix2; +my $matrix1; +my $matrix2; +$coll=0; +my $count2; +my $var; +my $entry1; +my $entry2; +my $reventry1; +my $reventry2; +my $revvar; +my @revmatrix1; +my $revkount; +my $revcoun; +my $revcount2; +my @revtemp; +my $revcoll; +my @revdigit; +my $revdigit; +my $revmarks; +my $revresult1; +my $revresult2; +my $revresult3; +my $revresult4; +my $revresult5; +my $revresult6; +my $revresultfinal; +my @revscorearray; +my $revscorearray; + + + +#define variables from configuration file +open (IN, "<$ARGV[0]"); +open (IN2, "<$ARGV[1]"); +open (OUT, ">$ARGV[2]"); + +#assign arrays to variables from configuration file +my @array5=; + +my @coordinates=; + + +#split the chromosome number and starting position from coordinates file into 2 separate strings + +foreach my $coordinates(@coordinates) { + +chomp($coordinates); + +my @coordinates2=split(/\s+/, $coordinates); + +my $coordinates2; + +$entry1=$coordinates2[0]; +$entry2=$coordinates2[1]; + +} + + +print OUT "CTCF Site", "\t", "Chromosome no.", "\t", "Start", "\t", "End", "\t", "Score", "\t", "Strand", "\n"; + +chomp (@array5); + +my $digits=join("", @array5); + +my @yeslap = $digits =~ /(?=(\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w))/g; + + + $var = "@yeslap\n"; + + +@matrix1=qw/87.25 291.25 76.25 459.25 167.25 145.25 414.25 187.25 281.25 49.25 449.25 134.25 56.25 800.25 21.25 36.25 8.25 903.25 0.25 2.25 744.25 13.25 65.25 91.25 40.25 528.25 334.25 11.25 107.25 433.25 48.25 324.25 851.25 11.25 32.25 18.25 5.25 0.25 903.25 3.25 333.25 3.25 566.25 9.25 54.25 12.25 504.25 341.25 12.25 0.25 890.25 8.25 56.25 8.25 775.25 71.25 104.25 733.25 5.25 67.25 372.25 13.25 507.25 17.25 82.25 482.25 307.25 37.25 117.25 322.25 73.25 396.25 402.25 181.25 266.25 59.25/; + + $kount=0; + $coun=0; + +# Define the pattern for CTCF. Because of pseudocount, a wildcard is allowed at +#each position. + my $pattern = "[ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC]"; + +# Compare the pattern with the 19 bp nucleotide segments. + + while($var =~ m/$pattern/gi) + { + $coun++; + $count2++; + my $endpos = pos $var; + +# Get the starting and ending positions of the matched pattern. + + my $startpos=($endpos+1)-19; + my $lastpos=$endpos; + + + my $consensus = substr($var, ($startpos-1), 19); + + push(@temp, $consensus, $startpos, $lastpos); + + $coll=0; + $kount++; + + +# Split the matched pattern into 19 single bases. + + @digit = split(//, $consensus); + + +# For each base, if the base is A, calculate the weight score of A according to +#its frequency in the CTCF Position Frequency Matrix. + foreach $digit (@digit) + { + + + if($digit =~ m/A/) + { + + + my $ref = \@matrix1; + $marks = @{$ref}[$coll]; + + + $result1 = sqrt(914); + + $result2 = $result1*0.3; + + $result3 = $result2+$marks; + $result4 = sqrt(914); + $result5 = $result4+914; + $result6 = 0.3; + +$resultfinal = log($result3/$result5/$result6)/log(2); + + + + + +push(@scorearray, $resultfinal); + + + + } + +if($digit =~ m/C/) + +{ + + my $ref = \@matrix1; + + $marks = @{$ref}[$coll + 1]; + + $result1 = sqrt(914); + $result2 = $result1*0.2; + $result3 = $result2+$marks; + $result4 = sqrt(914); + $result5 = $result4+914; + $result6 = 0.2; + +$resultfinal = log($result3/$result5/$result6)/log(2); + +push(@scorearray, $resultfinal); + + +} + + +if($digit =~ m/G/) + +{ + + my $ref = \@matrix1; + + $marks = @{$ref}[$coll+2]; + + $result1 = sqrt(914); + $result2 = $result1*0.2; + $result3 = $result2+$marks; + $result4 = sqrt(914); + $result5 = $result4+914; + $result6 = 0.2; + +$resultfinal = log($result3/$result5/$result6)/log(2); + + +push(@scorearray, $resultfinal); + +} + +if($digit =~ m/T/) + +{ + + my $ref = \@matrix1; + + $marks = @{$ref}[$coll+3]; + + $result1 = sqrt(914); + $result2 = $result1*0.3; + $result3 = $result2+$marks; + $result4 = sqrt(914); + $result5 = $result4+914; + $result6 = 0.3; + +$resultfinal = log($result3/$result5/$result6)/log(2); + + +push(@scorearray, $resultfinal); + +} + + $coll=$coll + 4; + + } + + @digit=(); +my $tem=0; + + +foreach $scorearray(@scorearray) +{ + + $tem = $tem + $scorearray; + + +} + +@scorearray = (); + + +my $fpercent = $tem; + + +if ($fpercent >= 18) { + + print OUT $consensus, "\t", $entry1, "\t", $count2 + $entry2, "\t", $count2 + $entry2 + 18, "\t", "$fpercent", "\t", "+", "\n"; + + +} + + } + + +close ( OUT ); +close ( IN ); +close ( IN2 );