0
|
1 package CPT::Analysis::PAUSE;
|
|
2
|
|
3 # ABSTRACT: Library for use in PAUSE analysis
|
|
4 use strict;
|
|
5 use warnings;
|
|
6 use Moose;
|
|
7 use List::Util qw(sum);
|
|
8 use Statistics::Descriptive;
|
|
9
|
|
10 sub max ($$) { shift; $_[ $_[0] < $_[1] ] }
|
|
11 sub min ($$) { shift; $_[ $_[0] > $_[1] ] }
|
|
12
|
|
13 sub derivative {
|
|
14 my ( $self, $data_ref ) = @_;
|
|
15 my @data = @{$data_ref};
|
|
16 my @new_data;
|
|
17 foreach ( my $i = 0 ; $i < scalar(@data) - 1 ; $i++ ) {
|
|
18 $new_data[ $i + 1 ] = $data[ $i + 1 ] - $data[$i];
|
|
19 }
|
|
20 return \@new_data;
|
|
21 }
|
|
22
|
|
23 sub find_peaks {
|
|
24 my ( $self, %data ) = @_;
|
|
25 use IPC::Run3;
|
|
26 use File::Temp qw/tempfile/;
|
|
27
|
|
28 # Store to CSV File
|
|
29 my @starts = @{ $data{data} };
|
|
30 my ( $fh0, $filename0 ) = tempfile('galaxy.pause.XXXXXXX');
|
|
31 printf $fh0 ( "%s,%s\n", 'position', 'count' );
|
|
32 for ( my $i = 0 ; $i < scalar(@starts) ; $i++ ) {
|
|
33 printf $fh0 "%d,%d\n", $i,
|
|
34 ( defined $starts[$i] ? $starts[$i] : 0 );
|
|
35 }
|
|
36 close($fh0);
|
|
37
|
|
38 my ( $fh, $filename ) = tempfile('galaxy.pause.XXXXXXX');
|
|
39 my @cmd = (
|
|
40 'Rscript', $data{location_of_rscript_file},
|
|
41 $filename0, $filename, $data{snr}
|
|
42 );
|
|
43 my ( $in, $out, $err );
|
|
44 run3 \@cmd, \$in, \$out, \$err;
|
|
45
|
|
46 # Read in R data
|
|
47 my @values;
|
|
48 while (<$fh>) {
|
|
49 chomp;
|
|
50 push( @values, $_ );
|
|
51 }
|
|
52 close($fh);
|
|
53
|
|
54 unlink($filename0);
|
|
55 unlink($filename);
|
|
56
|
|
57 return @values;
|
|
58 }
|
|
59
|
|
60 sub smooth {
|
|
61 my ( $self, $data_ref ) = @_;
|
|
62 my @data = @{$data_ref};
|
|
63 my @new_data;
|
|
64 my $length = scalar @data;
|
|
65 foreach ( my $i = 0 ; $i < $length ; $i++ ) {
|
|
66 my $avg =
|
|
67 sum( @data[ $i - 20 .. $i - 1, $i + 1 .. $i + 20 ] ) / 40;
|
|
68 $new_data[$i] = $avg;
|
|
69 }
|
|
70 return \@new_data;
|
|
71 }
|
|
72
|
|
73 sub histogram {
|
|
74 my ( $self, %data ) = @_;
|
|
75
|
|
76 my @coverage = @{ $data{data} };
|
|
77 my @return_coverage;
|
|
78 for ( my $i = 0 ; $i < scalar(@coverage) ; $i++ ) {
|
|
79 my $size = $coverage[$i];
|
|
80 unless ($size) { $size = 0 }
|
|
81 $return_coverage[$i] = [ $i, $size, "*" x $size ];
|
|
82 }
|
|
83 my %results = (
|
|
84 'Sheet1' => {
|
|
85 headers => [qw(Base Count Plot)],
|
|
86 data => \@return_coverage,
|
|
87 }
|
|
88 );
|
|
89 return %results;
|
|
90 }
|
|
91
|
|
92 sub getCoverageDensity {
|
|
93 my ( $self, %data ) = @_;
|
|
94
|
|
95 # Load the sam file
|
|
96 my $sam = Bio::DB::Sam->new(
|
|
97 -bam => $data{bam},
|
|
98 -fasta => $data{genome},
|
|
99 -autoindex => 1,
|
|
100 );
|
|
101
|
|
102 # Get all alignments to our indicated FASTA file
|
|
103 my @alignments = $sam->get_features_by_location(
|
|
104 -seq_id => $data{fasta_id},
|
|
105 -start => 1,
|
|
106 -end => $data{fasta_length}
|
|
107 );
|
|
108
|
|
109 # Set up some variables
|
|
110 my $coverage_density_max_value = 0;
|
|
111 my ( @coverage_density, @read_starts, @read_ends );
|
|
112
|
|
113 # including some for statistics
|
|
114 my $stat_start = Statistics::Descriptive::Sparse->new();
|
|
115 my $stat_end = Statistics::Descriptive::Sparse->new();
|
|
116
|
|
117 # Looping over alignments
|
|
118 for my $a (@alignments) {
|
|
119 my $start = $a->start;
|
|
120 my $end = $a->end;
|
|
121
|
|
122 # Increment the number of reads starting there
|
|
123 $read_starts[$start]++;
|
|
124 $read_ends[$end]++;
|
|
125
|
|
126 # And increment the coverage density
|
|
127 foreach ( $start .. $end ) {
|
|
128 $coverage_density[$_]++;
|
|
129 if ( $coverage_density[$_] >
|
|
130 $coverage_density_max_value )
|
|
131 {
|
|
132 $coverage_density_max_value =
|
|
133 $coverage_density[$_];
|
|
134 }
|
|
135 }
|
|
136 }
|
|
137 my @start_data_for_stats;
|
|
138 my @end_data_for_stats;
|
|
139 for ( my $i = 0 ; $i < $data{fasta_length} ; $i++ ) {
|
|
140 if ( $read_starts[$i] ) {
|
|
141 push( @start_data_for_stats, $read_starts[$i] );
|
|
142 }
|
|
143 if ( $read_ends[$i] ) {
|
|
144 push( @end_data_for_stats, $read_ends[$i] );
|
|
145 }
|
|
146 }
|
|
147 $stat_start->add_data(@start_data_for_stats);
|
|
148 $stat_end->add_data(@end_data_for_stats);
|
|
149
|
|
150 # Lots of data to return
|
|
151 use CPT::Analysis::PAUSE::ParsedSam;
|
|
152 my $psam = CPT::Analysis::PAUSE::ParsedSam->new(
|
|
153 coverage_density => \@coverage_density,
|
|
154 read_starts => \@read_starts,
|
|
155 read_ends => \@read_ends,
|
|
156 max => $coverage_density_max_value,
|
|
157 stats_start_max => $stat_start->max(),
|
|
158 stats_end_max => $stat_end->max(),
|
|
159 stats_start_mean => $stat_start->mean(),
|
|
160 stats_end_mean => $stat_end->mean(),
|
|
161 stats_start_standard_deviation =>
|
|
162 $stat_start->standard_deviation(),
|
|
163 stats_end_standard_deviation => $stat_end->standard_deviation(),
|
|
164 );
|
|
165 return $psam;
|
|
166 }
|
|
167
|
|
168 no Moose;
|
|
169 1;
|
|
170
|
|
171 __END__
|
|
172
|
|
173 =pod
|
|
174
|
|
175 =encoding UTF-8
|
|
176
|
|
177 =head1 NAME
|
|
178
|
|
179 CPT::Analysis::PAUSE - Library for use in PAUSE analysis
|
|
180
|
|
181 =head1 VERSION
|
|
182
|
|
183 version 1.96
|
|
184
|
|
185 =head1 AUTHOR
|
|
186
|
|
187 Eric Rasche <rasche.eric@yandex.ru>
|
|
188
|
|
189 =head1 COPYRIGHT AND LICENSE
|
|
190
|
|
191 This software is Copyright (c) 2014 by Eric Rasche.
|
|
192
|
|
193 This is free software, licensed under:
|
|
194
|
|
195 The GNU General Public License, Version 3, June 2007
|
|
196
|
|
197 =cut
|