annotate bin/ppp.pm @ 1:1df6aaac800e draft

Deleted selected files
author brasset_jensen
date Wed, 13 Dec 2017 10:40:50 -0500
parents
children 4bc00caa60b4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
1 package ppp;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
2
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
3 use strict;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
4 use warnings;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
5 use FindBin;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
6 use lib $FindBin::Bin;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
7 use Rcall qw ( histogram );
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
8 use Math::CDF;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
9
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
10 use Exporter;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
11 our @ISA = qw( Exporter );
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
12 our @EXPORT_OK = qw( &ping_pong_partners );
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
13
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
14 sub ping_pong_partners
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
15 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
16 my ( $TE_fai, $sam, $dir, $max ) = @_;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
17 my ( $hashRef, $dupRef, $hasPpp ) = count_mapped ( $TE_fai, $sam );
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
18 my ( %num_per_overlap_size, $overlap_number, $reverseR, $begRev, $endRev, $sensR, $begSens, $endSens, $snum, $rnum, $overlap );
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
19 my ( $SP, $AP, $SN, $AN, $txt );
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
20 my $flag = 0;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
21 my @distri_overlap = (); my @overlaps_names = ();
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
22
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
23 open my $ppp_f, '>', $dir."ppp.txt" || die "cannot create ppp.txt $!\n";
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
24 foreach my $k ( sort keys %{$hashRef} )
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
25 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
26 my $v = $hashRef->{$k};
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
27 my $TE_dir = $dir.$k.'/';
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
28
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
29 %num_per_overlap_size = (); $overlap_number = 0;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
30 $flag = 0;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
31 for ( my $i = 0; $i <= $#{$v->[1]} ; $i++ )
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
32 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
33 $reverseR = ${$v->[1]}[$i] ;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
34 $begRev = $reverseR->[0];
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
35 $endRev = $begRev + length($reverseR->[1]) - 1;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
36
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
37 my $revR = reverse($reverseR->[1]);
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
38 $revR =~ tr/atgcuATGCU/tacgaTACGA/;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
39
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
40 for ( my $j = 0; $j <= $#{$v->[0]}; $j++ )
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
41 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
42 $sensR = ${$v->[0]}[$j];
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
43 $begSens = $sensR->[0];
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
44 $endSens = $begSens + length($sensR->[1]) - 1;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
45
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
46 if ( $begSens <= $endRev && $endSens > $endRev )
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
47 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
48 $flag = 1;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
49 mkdir $TE_dir;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
50 open $txt, '>', $TE_dir.'overlap_size.txt' || die "cannot open repartition\n";
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
51
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
52 $overlap = $endRev - $begSens + 1;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
53 $snum = $dupRef->{$sensR->[0].$sensR->[1].$sensR->[2].$sensR->[3]};
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
54 $rnum = $dupRef->{$reverseR->[0].$reverseR->[1].$reverseR->[2].$reverseR->[3]};
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
55
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
56 if ( $overlap == 10 )
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
57 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
58 $hasPpp->{ $sensR->[0].$sensR->[1].$sensR->[2].$sensR->[3] } = 1;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
59 $hasPpp->{ $reverseR->[0].$reverseR->[1].$reverseR->[2].$reverseR->[3] } = 1;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
60 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
61 next if $overlap > $max;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
62 if ( $snum < $rnum )
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
63 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
64 $num_per_overlap_size{$overlap} += $snum;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
65 $overlap_number += $snum;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
66 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
67 else
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
68 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
69 $num_per_overlap_size{$overlap} += $rnum ;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
70 $overlap_number += $rnum ;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
71 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
72 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
73 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
74 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
75 if ( $max != 0 )
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
76 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
77 my @overlaps = ();
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
78 push @overlaps_names, $k;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
79 for my $i (1..$max)
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
80 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
81 $num_per_overlap_size{$i} = 0 unless exists( $num_per_overlap_size{$i} );
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
82 push @overlaps, $num_per_overlap_size{$i};
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
83 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
84 push @distri_overlap, \@overlaps;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
85 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
86
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
87 if ( $flag == 1 )
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
88 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
89 open $AP, '>', $TE_dir."antisensPPP.txt" || die "cannot create antisensPPP\n";
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
90 open $AN, '>', $TE_dir."antisens.txt" || die "cannot create antisens\n";
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
91 for ( my $i = 0; $i <= $#{$v->[1]} ; $i++ )
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
92 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
93 $reverseR = ${$v->[1]}[$i] ;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
94 my $revR = reverse($reverseR->[1]);
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
95 $revR =~ tr/atgcuATGCU/tacgaTACGA/;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
96 $rnum = $dupRef->{$reverseR->[0].$reverseR->[1].$reverseR->[2].$reverseR->[3]};
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
97 if ( $hasPpp->{ $reverseR->[0].$reverseR->[1].$reverseR->[2].$reverseR->[3] } == 1 )
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
98 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
99 print $AP ">$reverseR->[0]|$reverseR->[2]|$reverseR->[3]|$rnum\n$revR\n";
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
100 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
101 else
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
102 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
103 print $AN ">$reverseR->[0]|$reverseR->[2]|$reverseR->[3]|$rnum\n$revR\n";
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
104 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
105 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
106 close $AP; close $AN;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
107
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
108 open $SP, '>', $TE_dir."sensPPP.txt" || die "cannot create sensPPP\n";
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
109 open $SN, '>', $TE_dir."sens.txt" || die "cannot create sens\n";
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
110 for ( my $j = 0; $j <= $#{$v->[0]}; $j++ )
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
111 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
112 $sensR = ${$v->[0]}[$j];
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
113 $snum = $dupRef->{$sensR->[0].$sensR->[1].$sensR->[2].$sensR->[3]};
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
114 if ( $hasPpp->{ $sensR->[0].$sensR->[1].$sensR->[2].$sensR->[3] } == 1 )
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
115 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
116 print $SP ">$sensR->[0]|$sensR->[2]|$sensR->[3]|$snum\n$sensR->[1]\n";
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
117 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
118 else
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
119 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
120 print $SN ">$sensR->[0]|$sensR->[2]|$sensR->[3]|$snum\n$sensR->[1]\n";
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
121 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
122 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
123 close $SP; close $SN;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
124
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
125 my $histo_png = $TE_dir.'histogram.png';
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
126 histogram( \%num_per_overlap_size, $histo_png, $overlap_number );
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
127 print $txt "size\tnumber\tpercentage of the total overlap number\n";
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
128 foreach my $k ( sort {$a <=> $b} keys %num_per_overlap_size )
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
129 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
130 my $percentage = 0;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
131 $percentage = $num_per_overlap_size{$k} * 100 / $overlap_number unless $overlap_number == 0;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
132 print $txt "$k\t$num_per_overlap_size{$k}\t"; printf $txt "%.2f\n",$percentage;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
133 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
134 close $txt;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
135 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
136 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
137
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
138 foreach my $tabP ( @distri_overlap )
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
139 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
140 my $sum = sum($tabP);
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
141 my $ten = $tabP->[9];
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
142 my $mean = mean($tabP);
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
143 my $std = standard_deviation($tabP, $mean);
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
144 my $zsc = z_significance($ten, $mean, $std);
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
145 my $name = shift @overlaps_names;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
146 my $prob = 'NA';
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
147 $prob = 1 - &Math::CDF::pnorm( $zsc ) if $zsc ne 'NA';
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
148 print $ppp_f (join ("\t", $name, $sum, $ten, $mean, $std, $zsc, $prob ),"\n" );
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
149 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
150 close $ppp_f;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
151 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
152
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
153 sub count_mapped
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
154 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
155 my ( $fai, $in_file ) = @_;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
156 my ( %mapped, %dup, %has_ppp );
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
157
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
158 open my $f, '<', $fai || die "cannot open $fai $! \n";
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
159 while(<$f>)
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
160 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
161 if ($_ =~ /(.*)\t(\d+)\n/)
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
162 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
163 $mapped{$1} = [];
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
164 $mapped{$1}->[0] = []; $mapped{$1}->[1] = [];
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
165 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
166 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
167 close $f;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
168
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
169 open my $infile, "samtools view $in_file |"|| die "cannot open input file $! \n";
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
170 while(<$infile>)
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
171 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
172 unless ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ )
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
173 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
174 my @line = split (/\t/,$_);
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
175 if ($line[1] == 0)
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
176 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
177 unless ( exists ($dup{$line[3].$line[9].$line[1].$line[2]}) )
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
178 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
179 push @{$mapped{$line[2]}->[0]} , [$line[3], $line[9], $line[1], $line[2]];
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
180 $has_ppp {$line[3].$line[9].$line[1].$line[2]} = 0;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
181 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
182 $dup{$line[3].$line[9].$line[1].$line[2]}+=1;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
183 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
184 elsif ($line[1] == 16)
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
185 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
186 unless ( exists ($dup{$line[3].$line[9].$line[1].$line[2]}) )
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
187 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
188 push @{$mapped{$line[2]}->[1]} , [$line[3], $line[9], $line[1], $line[2]];
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
189 $has_ppp{$line[3].$line[9].$line[1].$line[2]} = 0;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
190 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
191 $dup{$line[3].$line[9].$line[1].$line[2]}+=1
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
192 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
193 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
194 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
195 close $infile;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
196 return (\%mapped, \%dup, \%has_ppp );
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
197 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
198
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
199 sub sum
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
200 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
201 my $arrayref = shift;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
202 my $result = 0;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
203 foreach (@$arrayref) {$result += $_}
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
204 return $result;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
205 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
206
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
207 sub mean
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
208 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
209 my $arrayref = shift;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
210 my $result;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
211 foreach (@$arrayref) {$result += $_}
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
212 return $result / scalar(@$arrayref);
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
213 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
214
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
215 sub standard_deviation
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
216 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
217 my ($arrayref, $mean) = @_;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
218 return sqrt ( mean ( [map $_**2 , @$arrayref ]) - ($mean**2));
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
219 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
220
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
221 sub z_significance
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
222 {
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
223 my ($ten, $mean, $std) = @_;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
224 my $z = 'NA';
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
225 $z = (($ten - $mean) / $std) if $std != 0;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
226 return $z;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
227 }
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
228
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
229 1;
1df6aaac800e Deleted selected files
brasset_jensen
parents:
diff changeset
230