comparison check.pl @ 0:ebad609b8a6d draft

Uploaded
author mkhan1980
date Mon, 04 Mar 2013 06:36:06 -0500
parents
children 1c09a4a56459
comparison
equal deleted inserted replaced
-1:000000000000 0:ebad609b8a6d
1 #!/usr/bin/perl -w
2 use strict;
3
4 # Define variables
5 my @temp=();
6 my $result1;
7 my $result2;
8 my $result3;
9 my $result4;
10 my $result5;
11 my $result6;
12 my $resultfinal;
13 my $count;
14 my $coun;
15 my $cou;
16 my @digit=();
17 my $digit;
18 my $marks;
19 my $log;
20 my $coll;
21 my @scorearray=();
22 my $scorearray;
23 my $percent;
24 my $kount;
25 my @result=();
26 my $result;
27 my %final=();
28 my $final;
29 my @c=();
30 my @matrix1;
31 my @matrix2;
32 my $matrix1;
33 my $matrix2;
34 $coll=0;
35 my $count2;
36 my $var;
37 my $entry1;
38 my $entry2;
39 my $reventry1;
40 my $reventry2;
41 my $revvar;
42 my @revmatrix1;
43 my $revkount;
44 my $revcoun;
45 my $revcount2;
46 my @revtemp;
47 my $revcoll;
48 my @revdigit;
49 my $revdigit;
50 my $revmarks;
51 my $revresult1;
52 my $revresult2;
53 my $revresult3;
54 my $revresult4;
55 my $revresult5;
56 my $revresult6;
57 my $revresultfinal;
58 my @revscorearray;
59 my $revscorearray;
60
61
62
63 #define variables from configuration file
64 open (IN, "<$ARGV[0]");
65 open (IN2, "<$ARGV[1]");
66 open (OUT, ">$ARGV[2]");
67
68 #assign arrays to variables from configuration file
69 my @array5=<IN>;
70
71 my @coordinates=<IN2>;
72
73
74 #split the chromosome number and starting position from coordinates file into 2 separate strings
75
76 foreach my $coordinates(@coordinates) {
77
78 chomp($coordinates);
79
80 my @coordinates2=split(/\s+/, $coordinates);
81
82 my $coordinates2;
83
84 $entry1=$coordinates2[0];
85 $entry2=$coordinates2[1];
86
87 }
88
89
90 print OUT "CTCF Site", "\t", "Chromosome no.", "\t", "Start", "\t", "End", "\t", "Score", "\t", "Strand", "\n";
91
92 chomp (@array5);
93
94 my $digits=join("", @array5);
95
96 my @yeslap = $digits =~ /(?=(\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w))/g;
97
98
99 $var = "@yeslap\n";
100
101
102 @matrix1=qw/87.25 291.25 76.25 459.25 167.25 145.25 414.25 187.25 281.25 49.25 449.25 134.25 56.25 800.25 21.25 36.25 8.25 903.25 0.25 2.25 744.25 13.25 65.25 91.25 40.25 528.25 334.25 11.25 107.25 433.25 48.25 324.25 851.25 11.25 32.25 18.25 5.25 0.25 903.25 3.25 333.25 3.25 566.25 9.25 54.25 12.25 504.25 341.25 12.25 0.25 890.25 8.25 56.25 8.25 775.25 71.25 104.25 733.25 5.25 67.25 372.25 13.25 507.25 17.25 82.25 482.25 307.25 37.25 117.25 322.25 73.25 396.25 402.25 181.25 266.25 59.25/;
103
104 $kount=0;
105 $coun=0;
106
107 # Define the pattern for CTCF. Because of pseudocount, a wildcard is allowed at
108 #each position.
109 my $pattern = "[ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC][ATGC]";
110
111 # Compare the pattern with the 19 bp nucleotide segments.
112
113 while($var =~ m/$pattern/gi)
114 {
115 $coun++;
116 $count2++;
117 my $endpos = pos $var;
118
119 # Get the starting and ending positions of the matched pattern.
120
121 my $startpos=($endpos+1)-19;
122 my $lastpos=$endpos;
123
124
125 my $consensus = substr($var, ($startpos-1), 19);
126
127 push(@temp, $consensus, $startpos, $lastpos);
128
129 $coll=0;
130 $kount++;
131
132
133 # Split the matched pattern into 19 single bases.
134
135 @digit = split(//, $consensus);
136
137
138 # For each base, if the base is A, calculate the weight score of A according to
139 #its frequency in the CTCF Position Frequency Matrix.
140 foreach $digit (@digit)
141 {
142
143
144 if($digit =~ m/A/)
145 {
146
147
148 my $ref = \@matrix1;
149 $marks = @{$ref}[$coll];
150
151
152 $result1 = sqrt(914);
153
154 $result2 = $result1*0.3;
155
156 $result3 = $result2+$marks;
157 $result4 = sqrt(914);
158 $result5 = $result4+914;
159 $result6 = 0.3;
160
161 $resultfinal = log($result3/$result5/$result6)/log(2);
162
163
164
165
166
167 push(@scorearray, $resultfinal);
168
169
170
171 }
172
173 if($digit =~ m/C/)
174
175 {
176
177 my $ref = \@matrix1;
178
179 $marks = @{$ref}[$coll + 1];
180
181 $result1 = sqrt(914);
182 $result2 = $result1*0.2;
183 $result3 = $result2+$marks;
184 $result4 = sqrt(914);
185 $result5 = $result4+914;
186 $result6 = 0.2;
187
188 $resultfinal = log($result3/$result5/$result6)/log(2);
189
190 push(@scorearray, $resultfinal);
191
192
193 }
194
195
196 if($digit =~ m/G/)
197
198 {
199
200 my $ref = \@matrix1;
201
202 $marks = @{$ref}[$coll+2];
203
204 $result1 = sqrt(914);
205 $result2 = $result1*0.2;
206 $result3 = $result2+$marks;
207 $result4 = sqrt(914);
208 $result5 = $result4+914;
209 $result6 = 0.2;
210
211 $resultfinal = log($result3/$result5/$result6)/log(2);
212
213
214 push(@scorearray, $resultfinal);
215
216 }
217
218 if($digit =~ m/T/)
219
220 {
221
222 my $ref = \@matrix1;
223
224 $marks = @{$ref}[$coll+3];
225
226 $result1 = sqrt(914);
227 $result2 = $result1*0.3;
228 $result3 = $result2+$marks;
229 $result4 = sqrt(914);
230 $result5 = $result4+914;
231 $result6 = 0.3;
232
233 $resultfinal = log($result3/$result5/$result6)/log(2);
234
235
236 push(@scorearray, $resultfinal);
237
238 }
239
240 $coll=$coll + 4;
241
242 }
243
244 @digit=();
245 my $tem=0;
246
247
248 foreach $scorearray(@scorearray)
249 {
250
251 $tem = $tem + $scorearray;
252
253
254 }
255
256 @scorearray = ();
257
258
259 my $fpercent = $tem;
260
261
262 if ($fpercent >= 18) {
263
264 print OUT $consensus, "\t", $entry1, "\t", $count2 + $entry2, "\t", $count2 + $entry2 + 18, "\t", "$fpercent", "\t", "+", "\n";
265
266
267 }
268
269 }
270
271
272 close ( OUT );
273 close ( IN );
274 close ( IN2 );