annotate cpt_psm_plotter/lib/CPT/Analysis/PAUSE.pm @ 0:54c7a3ea81e2 draft

Uploaded
author cpt
date Tue, 05 Jul 2022 05:40:36 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
1 package CPT::Analysis::PAUSE;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
2
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
3 # ABSTRACT: Library for use in PAUSE analysis
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
4 use strict;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
5 use warnings;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
6 use Moose;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
7 use List::Util qw(sum);
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
8 use Statistics::Descriptive;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
9
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
10 sub max ($$) { shift; $_[ $_[0] < $_[1] ] }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
11 sub min ($$) { shift; $_[ $_[0] > $_[1] ] }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
12
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
13 sub derivative {
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
14 my ( $self, $data_ref ) = @_;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
15 my @data = @{$data_ref};
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
16 my @new_data;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
17 foreach ( my $i = 0 ; $i < scalar(@data) - 1 ; $i++ ) {
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
18 $new_data[ $i + 1 ] = $data[ $i + 1 ] - $data[$i];
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
19 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
20 return \@new_data;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
21 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
22
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
23 sub find_peaks {
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
24 my ( $self, %data ) = @_;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
25 use IPC::Run3;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
26 use File::Temp qw/tempfile/;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
27
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
28 # Store to CSV File
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
29 my @starts = @{ $data{data} };
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
30 my ( $fh0, $filename0 ) = tempfile('galaxy.pause.XXXXXXX');
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
31 printf $fh0 ( "%s,%s\n", 'position', 'count' );
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
32 for ( my $i = 0 ; $i < scalar(@starts) ; $i++ ) {
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
33 printf $fh0 "%d,%d\n", $i,
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
34 ( defined $starts[$i] ? $starts[$i] : 0 );
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
35 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
36 close($fh0);
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
37
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
38 my ( $fh, $filename ) = tempfile('galaxy.pause.XXXXXXX');
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
39 my @cmd = (
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
40 'Rscript', $data{location_of_rscript_file},
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
41 $filename0, $filename, $data{snr}
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
42 );
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
43 my ( $in, $out, $err );
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
44 run3 \@cmd, \$in, \$out, \$err;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
45
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
46 # Read in R data
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
47 my @values;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
48 while (<$fh>) {
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
49 chomp;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
50 push( @values, $_ );
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
51 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
52 close($fh);
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
53
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
54 unlink($filename0);
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
55 unlink($filename);
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
56
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
57 return @values;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
58 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
59
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
60 sub smooth {
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
61 my ( $self, $data_ref ) = @_;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
62 my @data = @{$data_ref};
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
63 my @new_data;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
64 my $length = scalar @data;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
65 foreach ( my $i = 0 ; $i < $length ; $i++ ) {
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
66 my $avg =
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
67 sum( @data[ $i - 20 .. $i - 1, $i + 1 .. $i + 20 ] ) / 40;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
68 $new_data[$i] = $avg;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
69 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
70 return \@new_data;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
71 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
72
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
73 sub histogram {
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
74 my ( $self, %data ) = @_;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
75
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
76 my @coverage = @{ $data{data} };
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
77 my @return_coverage;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
78 for ( my $i = 0 ; $i < scalar(@coverage) ; $i++ ) {
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
79 my $size = $coverage[$i];
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
80 unless ($size) { $size = 0 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
81 $return_coverage[$i] = [ $i, $size, "*" x $size ];
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
82 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
83 my %results = (
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
84 'Sheet1' => {
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
85 headers => [qw(Base Count Plot)],
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
86 data => \@return_coverage,
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
87 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
88 );
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
89 return %results;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
90 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
91
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
92 sub getCoverageDensity {
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
93 my ( $self, %data ) = @_;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
94
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
95 # Load the sam file
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
96 my $sam = Bio::DB::Sam->new(
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
97 -bam => $data{bam},
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
98 -fasta => $data{genome},
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
99 -autoindex => 1,
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
100 );
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
101
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
102 # Get all alignments to our indicated FASTA file
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
103 my @alignments = $sam->get_features_by_location(
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
104 -seq_id => $data{fasta_id},
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
105 -start => 1,
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
106 -end => $data{fasta_length}
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
107 );
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
108
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
109 # Set up some variables
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
110 my $coverage_density_max_value = 0;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
111 my ( @coverage_density, @read_starts, @read_ends );
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
112
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
113 # including some for statistics
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
114 my $stat_start = Statistics::Descriptive::Sparse->new();
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
115 my $stat_end = Statistics::Descriptive::Sparse->new();
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
116
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
117 # Looping over alignments
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
118 for my $a (@alignments) {
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
119 my $start = $a->start;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
120 my $end = $a->end;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
121
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
122 # Increment the number of reads starting there
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
123 $read_starts[$start]++;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
124 $read_ends[$end]++;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
125
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
126 # And increment the coverage density
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
127 foreach ( $start .. $end ) {
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
128 $coverage_density[$_]++;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
129 if ( $coverage_density[$_] >
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
130 $coverage_density_max_value )
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
131 {
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
132 $coverage_density_max_value =
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
133 $coverage_density[$_];
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
134 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
135 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
136 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
137 my @start_data_for_stats;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
138 my @end_data_for_stats;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
139 for ( my $i = 0 ; $i < $data{fasta_length} ; $i++ ) {
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
140 if ( $read_starts[$i] ) {
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
141 push( @start_data_for_stats, $read_starts[$i] );
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
142 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
143 if ( $read_ends[$i] ) {
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
144 push( @end_data_for_stats, $read_ends[$i] );
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
145 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
146 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
147 $stat_start->add_data(@start_data_for_stats);
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
148 $stat_end->add_data(@end_data_for_stats);
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
149
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
150 # Lots of data to return
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
151 use CPT::Analysis::PAUSE::ParsedSam;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
152 my $psam = CPT::Analysis::PAUSE::ParsedSam->new(
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
153 coverage_density => \@coverage_density,
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
154 read_starts => \@read_starts,
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
155 read_ends => \@read_ends,
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
156 max => $coverage_density_max_value,
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
157 stats_start_max => $stat_start->max(),
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
158 stats_end_max => $stat_end->max(),
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
159 stats_start_mean => $stat_start->mean(),
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
160 stats_end_mean => $stat_end->mean(),
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
161 stats_start_standard_deviation =>
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
162 $stat_start->standard_deviation(),
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
163 stats_end_standard_deviation => $stat_end->standard_deviation(),
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
164 );
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
165 return $psam;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
166 }
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
167
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
168 no Moose;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
169 1;
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
170
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
171 __END__
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
172
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
173 =pod
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
174
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
175 =encoding UTF-8
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
176
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
177 =head1 NAME
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
178
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
179 CPT::Analysis::PAUSE - Library for use in PAUSE analysis
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
180
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
181 =head1 VERSION
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
182
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
183 version 1.96
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
184
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
185 =head1 AUTHOR
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
186
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
187 Eric Rasche <rasche.eric@yandex.ru>
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
188
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
189 =head1 COPYRIGHT AND LICENSE
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
190
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
191 This software is Copyright (c) 2014 by Eric Rasche.
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
192
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
193 This is free software, licensed under:
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
194
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
195 The GNU General Public License, Version 3, June 2007
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
196
54c7a3ea81e2 Uploaded
cpt
parents:
diff changeset
197 =cut