annotate Tools/Second/remove_motifs_galaxy.pl @ 3:b30ba2b06326 draft

Uploaded
author amadeo
date Mon, 05 Sep 2016 06:01:48 -0400
parents 229d36377838
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
229d36377838 Uploaded
amadeo
parents:
diff changeset
1 #!/usr/bin/perl -w
229d36377838 Uploaded
amadeo
parents:
diff changeset
2
229d36377838 Uploaded
amadeo
parents:
diff changeset
3 $|=1;
229d36377838 Uploaded
amadeo
parents:
diff changeset
4 use warnings;
229d36377838 Uploaded
amadeo
parents:
diff changeset
5 use strict;
229d36377838 Uploaded
amadeo
parents:
diff changeset
6 #Script that takes a gff format file from step2.pl as input and compares contiguous motifs listed in the gff file.
229d36377838 Uploaded
amadeo
parents:
diff changeset
7 #If motifs overlap and surpass the threshold, then it will remove that motif with the highest p value.
229d36377838 Uploaded
amadeo
parents:
diff changeset
8
229d36377838 Uploaded
amadeo
parents:
diff changeset
9 my $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
10 my @cols;
229d36377838 Uploaded
amadeo
parents:
diff changeset
11 my %hash;
229d36377838 Uploaded
amadeo
parents:
diff changeset
12 my %hash_negative;
229d36377838 Uploaded
amadeo
parents:
diff changeset
13 my $gene;
229d36377838 Uploaded
amadeo
parents:
diff changeset
14 my @sequences;
229d36377838 Uploaded
amadeo
parents:
diff changeset
15 my $seq_len;
229d36377838 Uploaded
amadeo
parents:
diff changeset
16 my $OL;
229d36377838 Uploaded
amadeo
parents:
diff changeset
17 my @output_pos;
229d36377838 Uploaded
amadeo
parents:
diff changeset
18 my @output_neg;
229d36377838 Uploaded
amadeo
parents:
diff changeset
19 my $actual_pvalue;
229d36377838 Uploaded
amadeo
parents:
diff changeset
20 my $actual_pvalue_neg;
229d36377838 Uploaded
amadeo
parents:
diff changeset
21 my $pvalue;
229d36377838 Uploaded
amadeo
parents:
diff changeset
22 my $pvalue_neg;
229d36377838 Uploaded
amadeo
parents:
diff changeset
23
229d36377838 Uploaded
amadeo
parents:
diff changeset
24
229d36377838 Uploaded
amadeo
parents:
diff changeset
25 if(@ARGV < 4){
229d36377838 Uploaded
amadeo
parents:
diff changeset
26 print "\nUsage: rm_overlap_motifs_posneg.pl fimo-test-sue.gff fimo-nol-pos.gff fimo-nol-neg.gff overlap_percentage\n\n";
229d36377838 Uploaded
amadeo
parents:
diff changeset
27 exit(0);
229d36377838 Uploaded
amadeo
parents:
diff changeset
28 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
29
229d36377838 Uploaded
amadeo
parents:
diff changeset
30
229d36377838 Uploaded
amadeo
parents:
diff changeset
31
229d36377838 Uploaded
amadeo
parents:
diff changeset
32 open(FIMO, "<$ARGV[0]") ||
229d36377838 Uploaded
amadeo
parents:
diff changeset
33 die "File '$ARGV[0]' not found\n";
229d36377838 Uploaded
amadeo
parents:
diff changeset
34 open(POSITIVE, ">$ARGV[1]") ||
229d36377838 Uploaded
amadeo
parents:
diff changeset
35 die "File '>$ARGV[1]' not found\n";
229d36377838 Uploaded
amadeo
parents:
diff changeset
36 open(NEGATIVE, ">$ARGV[2]") ||
229d36377838 Uploaded
amadeo
parents:
diff changeset
37 die "File '>$ARGV[2]' not found\n";
229d36377838 Uploaded
amadeo
parents:
diff changeset
38
229d36377838 Uploaded
amadeo
parents:
diff changeset
39 # Getting overlap value form user and testing to see if it's 0-100 and
229d36377838 Uploaded
amadeo
parents:
diff changeset
40 # converting to 0-1 scale.
229d36377838 Uploaded
amadeo
parents:
diff changeset
41 if ($ARGV[3] >0.0 && $ARGV[3] <=100){
229d36377838 Uploaded
amadeo
parents:
diff changeset
42 $OL=$ARGV[3]/100;
229d36377838 Uploaded
amadeo
parents:
diff changeset
43 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
44 else{
229d36377838 Uploaded
amadeo
parents:
diff changeset
45 print" ERROR: overlap is a value 0-100\n";
229d36377838 Uploaded
amadeo
parents:
diff changeset
46 exit(0);
229d36377838 Uploaded
amadeo
parents:
diff changeset
47 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
48 #print "OL is $OL\n";
229d36377838 Uploaded
amadeo
parents:
diff changeset
49
229d36377838 Uploaded
amadeo
parents:
diff changeset
50 while (<FIMO>) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
51 $line=$_; #assigning line to variable $line | $_ is a special default variable that here holds the line contents
229d36377838 Uploaded
amadeo
parents:
diff changeset
52 chomp $line; #avoid \n on last field
229d36377838 Uploaded
amadeo
parents:
diff changeset
53 @cols=split;#Splits the string EXPR into a list of strings and returns the list in list context, or the size of the list in scalar context.
229d36377838 Uploaded
amadeo
parents:
diff changeset
54 #This is very useful because the data of the gff file can be called using this variable.
229d36377838 Uploaded
amadeo
parents:
diff changeset
55 my $pos1;
229d36377838 Uploaded
amadeo
parents:
diff changeset
56 my $pos2;
229d36377838 Uploaded
amadeo
parents:
diff changeset
57 my $scalar;
229d36377838 Uploaded
amadeo
parents:
diff changeset
58 my $decimal;
229d36377838 Uploaded
amadeo
parents:
diff changeset
59 my $e;
229d36377838 Uploaded
amadeo
parents:
diff changeset
60
229d36377838 Uploaded
amadeo
parents:
diff changeset
61 my @list=();
229d36377838 Uploaded
amadeo
parents:
diff changeset
62 if ($line=~/^#/){
229d36377838 Uploaded
amadeo
parents:
diff changeset
63 printf POSITIVE"%s\n", $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
64 printf NEGATIVE"%s\n", $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
65 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
66 elsif ($line!~/^##/ and $cols[6]eq"+") {
229d36377838 Uploaded
amadeo
parents:
diff changeset
67 @cols=split;
229d36377838 Uploaded
amadeo
parents:
diff changeset
68 #$TF= substr $cols[8],5,8; #in this case we don't need that the hash considers the motif
229d36377838 Uploaded
amadeo
parents:
diff changeset
69 $gene=substr $cols[0],0,21;
229d36377838 Uploaded
amadeo
parents:
diff changeset
70 $pos1 = $cols[3]; #start position of the motif
229d36377838 Uploaded
amadeo
parents:
diff changeset
71 $pos2=$cols[4]; #end position of the motif
229d36377838 Uploaded
amadeo
parents:
diff changeset
72 @list=();
229d36377838 Uploaded
amadeo
parents:
diff changeset
73 @list=($pos1,$pos2);
229d36377838 Uploaded
amadeo
parents:
diff changeset
74 @sequences= split( "=", $cols[9]);
229d36377838 Uploaded
amadeo
parents:
diff changeset
75 $seq_len = int(length (substr $sequences[1],0,-1)); #returns the length of the sequence
229d36377838 Uploaded
amadeo
parents:
diff changeset
76 ####These variables consider the p value####
229d36377838 Uploaded
amadeo
parents:
diff changeset
77 $decimal= substr $cols[8],-16,4;
229d36377838 Uploaded
amadeo
parents:
diff changeset
78 $e=substr $cols[8],-11,3;
229d36377838 Uploaded
amadeo
parents:
diff changeset
79 $decimal =~ s/[^.\d]//g; #This removes all nondigit characters from the string.
229d36377838 Uploaded
amadeo
parents:
diff changeset
80 $actual_pvalue=$decimal*(10**$e); #it will take the p value of the current line
229d36377838 Uploaded
amadeo
parents:
diff changeset
81 ####====###
229d36377838 Uploaded
amadeo
parents:
diff changeset
82 if (not exists $hash{$gene}) { #Every time that a block of a gene with all the different motifs starts, it will register
229d36377838 Uploaded
amadeo
parents:
diff changeset
83 #the gene in a hash: gene as a key and pos1 and pos2 as values.
229d36377838 Uploaded
amadeo
parents:
diff changeset
84 $hash{$gene}=\@list;
229d36377838 Uploaded
amadeo
parents:
diff changeset
85 $pvalue=$actual_pvalue; #p value of the current line that it will be compared in the next loop
229d36377838 Uploaded
amadeo
parents:
diff changeset
86 push @output_pos, $line; #it saves the information of the gene motif in the array
229d36377838 Uploaded
amadeo
parents:
diff changeset
87 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
88
229d36377838 Uploaded
amadeo
parents:
diff changeset
89 elsif (not($pos1>=@{$hash{$gene}}[0] and $pos1<=@{$hash{$gene}}[1])
229d36377838 Uploaded
amadeo
parents:
diff changeset
90 and not($pos2>=@{$hash{$gene}}[0] and $pos2<=@{$hash{$gene}}[1])) {#if the gene exists and the motif is not overlaped
229d36377838 Uploaded
amadeo
parents:
diff changeset
91 #with the previous one
229d36377838 Uploaded
amadeo
parents:
diff changeset
92 #then it will take the line in the list and it will
229d36377838 Uploaded
amadeo
parents:
diff changeset
93 #consider the p value in the next loop
229d36377838 Uploaded
amadeo
parents:
diff changeset
94 $hash{$gene}=\@list;
229d36377838 Uploaded
amadeo
parents:
diff changeset
95 $pvalue=$actual_pvalue;
229d36377838 Uploaded
amadeo
parents:
diff changeset
96 push @output_pos, $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
97 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
98
229d36377838 Uploaded
amadeo
parents:
diff changeset
99
229d36377838 Uploaded
amadeo
parents:
diff changeset
100 elsif (
229d36377838 Uploaded
amadeo
parents:
diff changeset
101
229d36377838 Uploaded
amadeo
parents:
diff changeset
102 (not($pos1>=@{$hash{$gene}}[0] and $pos1<=@{$hash{$gene}}[1])and
229d36377838 Uploaded
amadeo
parents:
diff changeset
103 ($pos2>=@{$hash{$gene}}[0] and $pos2<=@{$hash{$gene}}[1]) and (int($pos2-(@{$hash{$gene}}[0]))/$seq_len)<$OL)
229d36377838 Uploaded
amadeo
parents:
diff changeset
104
229d36377838 Uploaded
amadeo
parents:
diff changeset
105 ) {#If the actual motif overlaps with the previous motif and the overlaping sequence includes the second position
229d36377838 Uploaded
amadeo
parents:
diff changeset
106 #position and not the first one of the actual motif AND it doesn't surpass the threshold $OL then it will consider the line.
229d36377838 Uploaded
amadeo
parents:
diff changeset
107 #It will store it in the array and its p value it will consider in the next loop.
229d36377838 Uploaded
amadeo
parents:
diff changeset
108 $hash{$gene}=\@list;
229d36377838 Uploaded
amadeo
parents:
diff changeset
109 $pvalue=$actual_pvalue;
229d36377838 Uploaded
amadeo
parents:
diff changeset
110 push @output_pos, $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
111 #print $pvalue , "\n";
229d36377838 Uploaded
amadeo
parents:
diff changeset
112 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
113 elsif (
229d36377838 Uploaded
amadeo
parents:
diff changeset
114
229d36377838 Uploaded
amadeo
parents:
diff changeset
115 (not($pos1>=@{$hash{$gene}}[0] and $pos1<=@{$hash{$gene}}[1])and
229d36377838 Uploaded
amadeo
parents:
diff changeset
116 ($pos2>=@{$hash{$gene}}[0] and $pos2<=@{$hash{$gene}}[1]) and (int($pos2-(@{$hash{$gene}}[0]))/$seq_len)>$OL)
229d36377838 Uploaded
amadeo
parents:
diff changeset
117 and $actual_pvalue<$pvalue
229d36377838 Uploaded
amadeo
parents:
diff changeset
118
229d36377838 Uploaded
amadeo
parents:
diff changeset
119
229d36377838 Uploaded
amadeo
parents:
diff changeset
120 ) { #If the actual motif overlaps with the previous motif and the overlaping sequence includes the second
229d36377838 Uploaded
amadeo
parents:
diff changeset
121 #position and not the first one of the actual motif AND it DOES surpass the threshold $OL but the actual motif has a lower p value
229d36377838 Uploaded
amadeo
parents:
diff changeset
122 #than the last considered;then it will consider the line and it will remove the previous motif from the array; considering the motif
229d36377838 Uploaded
amadeo
parents:
diff changeset
123 #with the lowest p value. This p value will consider in the next loop.
229d36377838 Uploaded
amadeo
parents:
diff changeset
124 pop @output_pos;
229d36377838 Uploaded
amadeo
parents:
diff changeset
125 $hash{$gene}=\@list;
229d36377838 Uploaded
amadeo
parents:
diff changeset
126 $pvalue=$actual_pvalue;
229d36377838 Uploaded
amadeo
parents:
diff changeset
127 push @output_pos, $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
128 #print $pvalue , "\n";
229d36377838 Uploaded
amadeo
parents:
diff changeset
129 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
130 elsif (
229d36377838 Uploaded
amadeo
parents:
diff changeset
131
229d36377838 Uploaded
amadeo
parents:
diff changeset
132 ((($pos1>=@{$hash{$gene}}[0] and $pos1<=@{$hash{$gene}}[1]) and (int((@{$hash{$gene}}[1])-$pos1)/$seq_len)<$OL )
229d36377838 Uploaded
amadeo
parents:
diff changeset
133 and not($pos2>=@{$hash{$gene}}[0] and $pos2<=@{$hash{$gene}}[1]))
229d36377838 Uploaded
amadeo
parents:
diff changeset
134
229d36377838 Uploaded
amadeo
parents:
diff changeset
135 ) {#If the actual motif overlaps with the previous motif and the overlaping sequence includes the first position
229d36377838 Uploaded
amadeo
parents:
diff changeset
136 #position and not the first one of the actual motif AND it doesn't surpass the threshold $OL then it will consider the line.
229d36377838 Uploaded
amadeo
parents:
diff changeset
137 #It will store it in the array and its p value it will consider in the next loop.
229d36377838 Uploaded
amadeo
parents:
diff changeset
138
229d36377838 Uploaded
amadeo
parents:
diff changeset
139 $hash{$gene}=\@list;
229d36377838 Uploaded
amadeo
parents:
diff changeset
140 $pvalue=$actual_pvalue;
229d36377838 Uploaded
amadeo
parents:
diff changeset
141 push @output_pos, $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
142 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
143 elsif (
229d36377838 Uploaded
amadeo
parents:
diff changeset
144
229d36377838 Uploaded
amadeo
parents:
diff changeset
145 ((($pos1>=@{$hash{$gene}}[0] and $pos1<=@{$hash{$gene}}[1]) and (int((@{$hash{$gene}}[1])-$pos1)/$seq_len)>$OL )
229d36377838 Uploaded
amadeo
parents:
diff changeset
146 and not($pos2>=@{$hash{$gene}}[0] and $pos2<=@{$hash{$gene}}[1])) and $actual_pvalue<$pvalue
229d36377838 Uploaded
amadeo
parents:
diff changeset
147 #If the actual motif overlaps with the previous motif and the overlaping sequence includes the first
229d36377838 Uploaded
amadeo
parents:
diff changeset
148 #position and not the second one of the actual motif AND it DOES surpass the threshold $OL but the actual motif has a lower p value
229d36377838 Uploaded
amadeo
parents:
diff changeset
149 #than the last considered;then it will consider the line and it will remove the previous motif from the array; considering the motif
229d36377838 Uploaded
amadeo
parents:
diff changeset
150 #with the lowest p value. This p value will consider in the next loop.
229d36377838 Uploaded
amadeo
parents:
diff changeset
151 ) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
152 $hash{$gene}=\@list;
229d36377838 Uploaded
amadeo
parents:
diff changeset
153 $pvalue=$actual_pvalue;
229d36377838 Uploaded
amadeo
parents:
diff changeset
154 pop @output_pos;
229d36377838 Uploaded
amadeo
parents:
diff changeset
155 push @output_pos, $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
156 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
157 elsif (
229d36377838 Uploaded
amadeo
parents:
diff changeset
158
229d36377838 Uploaded
amadeo
parents:
diff changeset
159 (($pos1>=@{$hash{$gene}}[0] and $pos1<=@{$hash{$gene}}[1])
229d36377838 Uploaded
amadeo
parents:
diff changeset
160 and ($pos2>=@{$hash{$gene}}[0] and $pos2<=@{$hash{$gene}}[1])) and $actual_pvalue<$pvalue
229d36377838 Uploaded
amadeo
parents:
diff changeset
161
229d36377838 Uploaded
amadeo
parents:
diff changeset
162 ) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
163 $hash{$gene}=\@list;
229d36377838 Uploaded
amadeo
parents:
diff changeset
164 $pvalue=$actual_pvalue;
229d36377838 Uploaded
amadeo
parents:
diff changeset
165 pop @output_pos;
229d36377838 Uploaded
amadeo
parents:
diff changeset
166 push @output_pos, $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
167 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
168
229d36377838 Uploaded
amadeo
parents:
diff changeset
169
229d36377838 Uploaded
amadeo
parents:
diff changeset
170 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
171
229d36377838 Uploaded
amadeo
parents:
diff changeset
172 ##===========Same strategy applied to the motifs located in the minus strand===========#
229d36377838 Uploaded
amadeo
parents:
diff changeset
173 elsif ($line!~/^##/ and $cols[6]eq"-") {
229d36377838 Uploaded
amadeo
parents:
diff changeset
174 @cols=split;
229d36377838 Uploaded
amadeo
parents:
diff changeset
175 #$TF= substr $cols[8],5,8;
229d36377838 Uploaded
amadeo
parents:
diff changeset
176 $gene=substr $cols[0],0,21;
229d36377838 Uploaded
amadeo
parents:
diff changeset
177 $pos1 = $cols[3];
229d36377838 Uploaded
amadeo
parents:
diff changeset
178 $pos2=$cols[4];
229d36377838 Uploaded
amadeo
parents:
diff changeset
179 @list=();
229d36377838 Uploaded
amadeo
parents:
diff changeset
180 @list=($pos1,$pos2);
229d36377838 Uploaded
amadeo
parents:
diff changeset
181 @sequences= split( "=", $cols[9]);
229d36377838 Uploaded
amadeo
parents:
diff changeset
182 $seq_len = int(length (substr $sequences[1],0,-1));
229d36377838 Uploaded
amadeo
parents:
diff changeset
183 $decimal= substr $cols[8],-16,4;
229d36377838 Uploaded
amadeo
parents:
diff changeset
184 $e=substr $cols[8],-11,3;
229d36377838 Uploaded
amadeo
parents:
diff changeset
185 $decimal =~ s/[^.\d]//g; #This removes all nondigit characters from the string.
229d36377838 Uploaded
amadeo
parents:
diff changeset
186 $actual_pvalue_neg=$decimal*(10**$e);
229d36377838 Uploaded
amadeo
parents:
diff changeset
187
229d36377838 Uploaded
amadeo
parents:
diff changeset
188 if (not exists $hash_negative{$gene}) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
189 $hash_negative{$gene}=\@list;
229d36377838 Uploaded
amadeo
parents:
diff changeset
190 $pvalue_neg=$actual_pvalue_neg;
229d36377838 Uploaded
amadeo
parents:
diff changeset
191 push @output_neg, $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
192 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
193
229d36377838 Uploaded
amadeo
parents:
diff changeset
194 elsif (not($pos1>=@{$hash_negative{$gene}}[0] and $pos1<=@{$hash_negative{$gene}}[1])
229d36377838 Uploaded
amadeo
parents:
diff changeset
195 and not($pos2>=@{$hash_negative{$gene}}[0] and $pos2<=@{$hash_negative{$gene}}[1])) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
196 $pvalue_neg=$actual_pvalue_neg;
229d36377838 Uploaded
amadeo
parents:
diff changeset
197 $hash_negative{$gene}=\@list;
229d36377838 Uploaded
amadeo
parents:
diff changeset
198 push @output_neg, $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
199 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
200
229d36377838 Uploaded
amadeo
parents:
diff changeset
201
229d36377838 Uploaded
amadeo
parents:
diff changeset
202 elsif (
229d36377838 Uploaded
amadeo
parents:
diff changeset
203
229d36377838 Uploaded
amadeo
parents:
diff changeset
204 (not($pos1>=@{$hash_negative{$gene}}[0] and $pos1<=@{$hash_negative{$gene}}[1])and
229d36377838 Uploaded
amadeo
parents:
diff changeset
205 ($pos2>=@{$hash_negative{$gene}}[0] and $pos2<=@{$hash_negative{$gene}}[1]) and (int($pos2-(@{$hash_negative{$gene}}[0]))/$seq_len)<$OL )
229d36377838 Uploaded
amadeo
parents:
diff changeset
206 ) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
207 $pvalue_neg=$actual_pvalue_neg;
229d36377838 Uploaded
amadeo
parents:
diff changeset
208 $hash_negative{$gene}=\@list;
229d36377838 Uploaded
amadeo
parents:
diff changeset
209 push @output_neg, $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
210 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
211 elsif (
229d36377838 Uploaded
amadeo
parents:
diff changeset
212
229d36377838 Uploaded
amadeo
parents:
diff changeset
213 (not($pos1>=@{$hash_negative{$gene}}[0] and $pos1<=@{$hash_negative{$gene}}[1]) and
229d36377838 Uploaded
amadeo
parents:
diff changeset
214 ($pos2>=@{$hash_negative{$gene}}[0] and $pos2<=@{$hash_negative{$gene}}[1]) and (int($pos2-(@{$hash_negative{$gene}}[0]))/$seq_len)>$OL and
229d36377838 Uploaded
amadeo
parents:
diff changeset
215 $actual_pvalue_neg<$pvalue_neg)
229d36377838 Uploaded
amadeo
parents:
diff changeset
216 ) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
217 $pvalue=$actual_pvalue_neg;
229d36377838 Uploaded
amadeo
parents:
diff changeset
218 $hash_negative{$gene}=\@list;
229d36377838 Uploaded
amadeo
parents:
diff changeset
219 pop @output_neg;
229d36377838 Uploaded
amadeo
parents:
diff changeset
220 push @output_neg, $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
221 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
222 elsif (
229d36377838 Uploaded
amadeo
parents:
diff changeset
223 ((($pos1>=@{$hash_negative{$gene}}[0] and $pos1<=@{$hash_negative{$gene}}[1]) and (int((@{$hash_negative{$gene}}[1])-$pos1)/$seq_len)<$OL )
229d36377838 Uploaded
amadeo
parents:
diff changeset
224 and not($pos2>=@{$hash_negative{$gene}}[0] and $pos2<=@{$hash_negative{$gene}}[1] ))
229d36377838 Uploaded
amadeo
parents:
diff changeset
225 ) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
226 $pvalue_neg=$actual_pvalue_neg;
229d36377838 Uploaded
amadeo
parents:
diff changeset
227 $hash_negative{$gene}=\@list;
229d36377838 Uploaded
amadeo
parents:
diff changeset
228 push @output_neg, $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
229 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
230 elsif (
229d36377838 Uploaded
amadeo
parents:
diff changeset
231 ((($pos1>=@{$hash_negative{$gene}}[0] and $pos1<=@{$hash_negative{$gene}}[1]) and
229d36377838 Uploaded
amadeo
parents:
diff changeset
232 (int((@{$hash_negative{$gene}}[1])-$pos1)/$seq_len)>$OL )
229d36377838 Uploaded
amadeo
parents:
diff changeset
233 and not($pos2>=@{$hash_negative{$gene}}[0] and $pos2<=@{$hash_negative{$gene}}[1] )and
229d36377838 Uploaded
amadeo
parents:
diff changeset
234 $actual_pvalue_neg<$pvalue_neg)
229d36377838 Uploaded
amadeo
parents:
diff changeset
235 ) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
236 $pvalue_neg=$actual_pvalue_neg;
229d36377838 Uploaded
amadeo
parents:
diff changeset
237 $hash_negative{$gene}=\@list;
229d36377838 Uploaded
amadeo
parents:
diff changeset
238 pop @output_neg;
229d36377838 Uploaded
amadeo
parents:
diff changeset
239 push @output_neg, $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
240 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
241
229d36377838 Uploaded
amadeo
parents:
diff changeset
242 elsif (
229d36377838 Uploaded
amadeo
parents:
diff changeset
243 ((($pos1>=@{$hash_negative{$gene}}[0] and $pos1<=@{$hash_negative{$gene}}[1]) )
229d36377838 Uploaded
amadeo
parents:
diff changeset
244 and ($pos2>=@{$hash_negative{$gene}}[0] and $pos2<=@{$hash_negative{$gene}}[1] )and
229d36377838 Uploaded
amadeo
parents:
diff changeset
245 $actual_pvalue_neg<$pvalue_neg)
229d36377838 Uploaded
amadeo
parents:
diff changeset
246 ) {
229d36377838 Uploaded
amadeo
parents:
diff changeset
247 $pvalue_neg=$actual_pvalue_neg;
229d36377838 Uploaded
amadeo
parents:
diff changeset
248 $hash_negative{$gene}=\@list;
229d36377838 Uploaded
amadeo
parents:
diff changeset
249 pop @output_neg;
229d36377838 Uploaded
amadeo
parents:
diff changeset
250 push @output_neg, $line;
229d36377838 Uploaded
amadeo
parents:
diff changeset
251 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
252
229d36377838 Uploaded
amadeo
parents:
diff changeset
253
229d36377838 Uploaded
amadeo
parents:
diff changeset
254 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
255 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
256 foreach my $lines_pos (@output_pos){
229d36377838 Uploaded
amadeo
parents:
diff changeset
257 printf POSITIVE"%s\n", $lines_pos;
229d36377838 Uploaded
amadeo
parents:
diff changeset
258
229d36377838 Uploaded
amadeo
parents:
diff changeset
259 }
229d36377838 Uploaded
amadeo
parents:
diff changeset
260 foreach my $lines_neg (@output_neg){
229d36377838 Uploaded
amadeo
parents:
diff changeset
261 printf NEGATIVE"%s\n", $lines_neg;
229d36377838 Uploaded
amadeo
parents:
diff changeset
262 }