comparison Roary/lib/Bio/Roary/QC/Report.pm @ 0:c47a5f61bc9f draft

Uploaded
author dereeper
date Fri, 14 May 2021 20:27:06 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c47a5f61bc9f
1 package Bio::Roary::QC::Report;
2
3 # ABSTRACT: generate a report based on kraken output
4
5 =head1 SYNOPSIS
6
7 =cut
8
9 use Moose;
10 use File::Temp;
11 use File::Path 'rmtree';
12 use Cwd;
13 use File::Basename;
14 with 'Bio::Roary::JobRunner::Role';
15
16 has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
17 has 'kraken_exec' => ( is => 'ro', isa => 'Str', default => 'kraken' );
18 has 'kraken_report_exec' => ( is => 'ro', isa => 'Str', default => 'kraken-report' );
19 has 'kraken_db' => ( is => 'ro', isa => 'Str', required => 1 );
20 has 'outfile' => ( is => 'rw', isa => 'Str', default => 'qc_report.csv' );
21 has '_kraken_data' => ( is => 'rw', isa => 'ArrayRef', lazy_build => 1 );
22 has '_header' => ( is => 'rw', isa => 'Str', lazy_build => 1 );
23 has 'kraken_memory' => ( is => 'rw', isa => 'Int', default => 2000 );
24
25 has '_tmp_directory_obj' => ( is => 'rw', lazy_build => 1 );
26 has '_tmp_directory' => ( is => 'rw', lazy_build => 1, isa => 'Str', );
27
28
29 sub _nuc_fasta_filename
30 {
31 my ($self, $gff) = @_;
32
33 my $prefix = basename( $gff, ".gff" );
34 my $outfile = $self->_tmp_directory . "/$prefix.fna";
35 return $outfile;
36 }
37
38 sub _extract_nuc_fasta_cmd {
39 my ($self, $gff) = @_;
40 my $outfile = $self->_nuc_fasta_filename($gff);
41 my $cmd = "sed -n '/##FASTA/,//p' $gff | grep -v \'##FASTA\' > $outfile";
42
43 return $cmd;
44 }
45
46 sub _extract_nuc_files_from_all_gffs
47 {
48 my ($self) = @_;
49 my @nuc_files;
50 my @commands_to_run;
51 for my $input_file(@{$self->input_files})
52 {
53 push(@nuc_files,$self->_nuc_fasta_filename($input_file));
54 push(@commands_to_run,$self->_extract_nuc_fasta_cmd($input_file));
55 }
56 my $kraken_runner_obj = $self->_job_runner_class->new(
57 commands_to_run => \@commands_to_run,
58 memory_in_mb => $self->kraken_memory,
59 verbose => $self->verbose,
60 cpus => $self->cpus
61 );
62 $kraken_runner_obj->run();
63 return \@nuc_files;
64 }
65
66 sub _kraken_cmd {
67 my ( $self, $a, $kraken_output ) = @_;
68
69 my $kcmd = $self->kraken_exec .
70 " --fasta-input ".
71 " --preload ".
72 " --db " . $self->kraken_db .
73 " --output $kraken_output $a > /dev/null 2>&1";
74 return $kcmd;
75 }
76
77 sub _kraken_report_cmd {
78 my ( $self, $k, $report_output ) = @_;
79
80 my $krcmd = $self->kraken_report_exec .
81 " --db " . $self->kraken_db .
82 " $k > $report_output";
83 return $krcmd;
84 }
85
86 sub _kraken_output_filename
87 {
88 my ( $self, $assembly ) = @_;
89 my $kraken_output = $assembly;
90 $kraken_output =~ s/fna$/kraken/;
91 return $kraken_output;
92 }
93
94 sub _run_kraken_on_nuc_files
95 {
96 my ( $self, $nuc_files ) = @_;
97 my @kraken_output_files;
98 my @commands_to_run;
99 for my $nuc_file(@{$nuc_files})
100 {
101 my $kraken_output = $self->_kraken_output_filename($nuc_file);
102 push(@kraken_output_files, $kraken_output );
103 push(@commands_to_run, $self->_kraken_cmd( $nuc_file, $kraken_output ));
104 }
105
106 my $kraken_runner_obj = $self->_job_runner_class->new(
107 commands_to_run => \@commands_to_run,
108 memory_in_mb => $self->kraken_memory,
109 verbose => $self->verbose,
110 cpus => $self->cpus
111 );
112 $kraken_runner_obj->run();
113
114 for my $filename(@{$nuc_files})
115 {
116 unlink($filename);
117 }
118
119 return \@kraken_output_files;
120 }
121
122 sub _kraken_report_output_filename
123 {
124 my ( $self, $assembly ) = @_;
125 return $assembly.".report";
126 }
127
128 sub _run_kraken_report_on_kraken_files
129 {
130 my ( $self, $kraken_files ) = @_;
131
132 my @kraken_report_output_files;
133 my @commands_to_run;
134 for my $nuc_file(@{$kraken_files})
135 {
136 my $kraken_output = $self->_kraken_report_output_filename($nuc_file);
137 push(@kraken_report_output_files, $kraken_output );
138 push(@commands_to_run, $self->_kraken_report_cmd( $nuc_file, $kraken_output ));
139 }
140
141 my $kraken_runner_obj = $self->_job_runner_class->new(
142 commands_to_run => \@commands_to_run,
143 memory_in_mb => $self->kraken_memory,
144 verbose => $self->verbose,
145 cpus => $self->cpus
146 );
147 $kraken_runner_obj->run();
148 for my $filename(@{$kraken_files})
149 {
150 unlink($filename);
151 }
152 return \@kraken_report_output_files;
153 }
154
155 sub _build__kraken_data {
156 my $self = shift;
157 my $nuc_files = $self->_extract_nuc_files_from_all_gffs();
158 my $kraken_files = $self->_run_kraken_on_nuc_files($nuc_files);
159 my $kraken_report_files = $self->_run_kraken_report_on_kraken_files( $kraken_files );
160
161 return $self->_parse_kraken_reports($kraken_report_files);
162 }
163
164 sub _parse_kraken_reports
165 {
166 my ( $self, $kraken_report_files ) = @_;
167
168 my @report_rows;
169 for my $kraken_report(@{$kraken_report_files})
170 {
171 push(@report_rows, $self->_parse_kraken_report($kraken_report));
172 }
173
174 for my $kraken_report(@{$kraken_report_files})
175 {
176 unlink($kraken_report);
177 }
178
179 return \@report_rows;
180 }
181
182 sub _parse_kraken_report {
183 my ( $self, $kraken_report ) = @_;
184
185 # parse report
186 open( my $report_fh, '<', $kraken_report );
187
188 my $sample_name = $kraken_report;
189 $sample_name =~ s/.report$//;
190 $sample_name =~ s/.kraken$//;
191 my($sample_base_name, $dirs, $suffix) = fileparse($sample_name);
192
193 my ( $top_genus, $top_species );
194 while ( <$report_fh> ){
195 my @parts = split( "\t" );
196 chomp @parts;
197
198 $top_genus = $parts[5] if ( (! defined $top_genus) && $parts[3] eq 'G' );
199 $top_species = $parts[5] if ( (! defined $top_species) && $parts[3] eq 'S' );
200
201 last if (defined $top_genus && defined $top_species);
202 }
203 close($report_fh);
204
205 $top_genus ||= "not_found";
206 $top_genus =~ s/^\s+//g;
207 $top_species ||= "not_found";
208 $top_species =~ s/^\s+//g;
209
210 return [ $sample_base_name, $top_genus, $top_species ];
211 }
212
213
214 sub _build__header {
215 return join( ',', ( 'Sample', 'Genus', 'Species' ) );
216 }
217
218 sub _build__tmp_directory_obj {
219 return File::Temp->newdir(DIR => getcwd, CLEANUP => 1 );
220 }
221
222 sub _build__tmp_directory {
223 my $self = shift;
224 return $self->_tmp_directory_obj->dirname();
225 }
226
227 sub report {
228 my $self = shift;
229
230 open( OUTFILE, '>', $self->outfile );
231 print OUTFILE $self->_header . "\n";
232 for my $line ( @{ $self->_kraken_data } ){
233 print OUTFILE join( ',', @{ $line } ) . "\n";
234 }
235 close OUTFILE;
236 }
237
238
239 __PACKAGE__->meta->make_immutable;
240 no Moose;
241 1;