comparison cpt_psm_plotter/lib/CPT/Analysis/PAUSE.pm @ 0:54c7a3ea81e2 draft

Uploaded
author cpt
date Tue, 05 Jul 2022 05:40:36 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:54c7a3ea81e2
1 package CPT::Analysis::PAUSE;
2
3 # ABSTRACT: Library for use in PAUSE analysis
4 use strict;
5 use warnings;
6 use Moose;
7 use List::Util qw(sum);
8 use Statistics::Descriptive;
9
10 sub max ($$) { shift; $_[ $_[0] < $_[1] ] }
11 sub min ($$) { shift; $_[ $_[0] > $_[1] ] }
12
13 sub derivative {
14 my ( $self, $data_ref ) = @_;
15 my @data = @{$data_ref};
16 my @new_data;
17 foreach ( my $i = 0 ; $i < scalar(@data) - 1 ; $i++ ) {
18 $new_data[ $i + 1 ] = $data[ $i + 1 ] - $data[$i];
19 }
20 return \@new_data;
21 }
22
23 sub find_peaks {
24 my ( $self, %data ) = @_;
25 use IPC::Run3;
26 use File::Temp qw/tempfile/;
27
28 # Store to CSV File
29 my @starts = @{ $data{data} };
30 my ( $fh0, $filename0 ) = tempfile('galaxy.pause.XXXXXXX');
31 printf $fh0 ( "%s,%s\n", 'position', 'count' );
32 for ( my $i = 0 ; $i < scalar(@starts) ; $i++ ) {
33 printf $fh0 "%d,%d\n", $i,
34 ( defined $starts[$i] ? $starts[$i] : 0 );
35 }
36 close($fh0);
37
38 my ( $fh, $filename ) = tempfile('galaxy.pause.XXXXXXX');
39 my @cmd = (
40 'Rscript', $data{location_of_rscript_file},
41 $filename0, $filename, $data{snr}
42 );
43 my ( $in, $out, $err );
44 run3 \@cmd, \$in, \$out, \$err;
45
46 # Read in R data
47 my @values;
48 while (<$fh>) {
49 chomp;
50 push( @values, $_ );
51 }
52 close($fh);
53
54 unlink($filename0);
55 unlink($filename);
56
57 return @values;
58 }
59
60 sub smooth {
61 my ( $self, $data_ref ) = @_;
62 my @data = @{$data_ref};
63 my @new_data;
64 my $length = scalar @data;
65 foreach ( my $i = 0 ; $i < $length ; $i++ ) {
66 my $avg =
67 sum( @data[ $i - 20 .. $i - 1, $i + 1 .. $i + 20 ] ) / 40;
68 $new_data[$i] = $avg;
69 }
70 return \@new_data;
71 }
72
73 sub histogram {
74 my ( $self, %data ) = @_;
75
76 my @coverage = @{ $data{data} };
77 my @return_coverage;
78 for ( my $i = 0 ; $i < scalar(@coverage) ; $i++ ) {
79 my $size = $coverage[$i];
80 unless ($size) { $size = 0 }
81 $return_coverage[$i] = [ $i, $size, "*" x $size ];
82 }
83 my %results = (
84 'Sheet1' => {
85 headers => [qw(Base Count Plot)],
86 data => \@return_coverage,
87 }
88 );
89 return %results;
90 }
91
92 sub getCoverageDensity {
93 my ( $self, %data ) = @_;
94
95 # Load the sam file
96 my $sam = Bio::DB::Sam->new(
97 -bam => $data{bam},
98 -fasta => $data{genome},
99 -autoindex => 1,
100 );
101
102 # Get all alignments to our indicated FASTA file
103 my @alignments = $sam->get_features_by_location(
104 -seq_id => $data{fasta_id},
105 -start => 1,
106 -end => $data{fasta_length}
107 );
108
109 # Set up some variables
110 my $coverage_density_max_value = 0;
111 my ( @coverage_density, @read_starts, @read_ends );
112
113 # including some for statistics
114 my $stat_start = Statistics::Descriptive::Sparse->new();
115 my $stat_end = Statistics::Descriptive::Sparse->new();
116
117 # Looping over alignments
118 for my $a (@alignments) {
119 my $start = $a->start;
120 my $end = $a->end;
121
122 # Increment the number of reads starting there
123 $read_starts[$start]++;
124 $read_ends[$end]++;
125
126 # And increment the coverage density
127 foreach ( $start .. $end ) {
128 $coverage_density[$_]++;
129 if ( $coverage_density[$_] >
130 $coverage_density_max_value )
131 {
132 $coverage_density_max_value =
133 $coverage_density[$_];
134 }
135 }
136 }
137 my @start_data_for_stats;
138 my @end_data_for_stats;
139 for ( my $i = 0 ; $i < $data{fasta_length} ; $i++ ) {
140 if ( $read_starts[$i] ) {
141 push( @start_data_for_stats, $read_starts[$i] );
142 }
143 if ( $read_ends[$i] ) {
144 push( @end_data_for_stats, $read_ends[$i] );
145 }
146 }
147 $stat_start->add_data(@start_data_for_stats);
148 $stat_end->add_data(@end_data_for_stats);
149
150 # Lots of data to return
151 use CPT::Analysis::PAUSE::ParsedSam;
152 my $psam = CPT::Analysis::PAUSE::ParsedSam->new(
153 coverage_density => \@coverage_density,
154 read_starts => \@read_starts,
155 read_ends => \@read_ends,
156 max => $coverage_density_max_value,
157 stats_start_max => $stat_start->max(),
158 stats_end_max => $stat_end->max(),
159 stats_start_mean => $stat_start->mean(),
160 stats_end_mean => $stat_end->mean(),
161 stats_start_standard_deviation =>
162 $stat_start->standard_deviation(),
163 stats_end_standard_deviation => $stat_end->standard_deviation(),
164 );
165 return $psam;
166 }
167
168 no Moose;
169 1;
170
171 __END__
172
173 =pod
174
175 =encoding UTF-8
176
177 =head1 NAME
178
179 CPT::Analysis::PAUSE - Library for use in PAUSE analysis
180
181 =head1 VERSION
182
183 version 1.96
184
185 =head1 AUTHOR
186
187 Eric Rasche <rasche.eric@yandex.ru>
188
189 =head1 COPYRIGHT AND LICENSE
190
191 This software is Copyright (c) 2014 by Eric Rasche.
192
193 This is free software, licensed under:
194
195 The GNU General Public License, Version 3, June 2007
196
197 =cut