annotate Roary/lib/Bio/Roary/External/IterativeCdhit.pm @ 0:c47a5f61bc9f draft

Uploaded
author dereeper
date Fri, 14 May 2021 20:27:06 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
1 package Bio::Roary::External::IterativeCdhit;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
2
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
3 # ABSTRACT: Iteratively run CDhit
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
4
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
5 =head1 SYNOPSIS
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
6
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
7 Iteratively run CDhit
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
8
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
9 use Bio::Roary::External::IterativeCdhit;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
10
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
11 my $seg= Bio::Roary::External::IterativeCdhit->new(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
12 output_cd_hit_filename => '',
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
13 output_combined_filename => '',
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
14 number_of_input_files => 10,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
15 output_filtered_clustered_fasta => '',
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
16 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
17
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
18 $seg->run();
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
19
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
20 =cut
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
21
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
22 use Moose;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
23 with 'Bio::Roary::JobRunner::Role';
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
24
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
25 has 'output_cd_hit_filename' => ( is => 'ro', isa => 'Str', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
26 has 'output_combined_filename' => ( is => 'ro', isa => 'Str', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
27 has 'number_of_input_files' => ( is => 'ro', isa => 'Int', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
28 has 'output_filtered_clustered_fasta' => ( is => 'ro', isa => 'Str', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
29 has 'exec' => ( is => 'ro', isa => 'Str', default => 'iterative_cdhit' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
30 has '_max_cpus' => ( is => 'ro', isa => 'Int', default => 40 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
31 # Overload Role
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
32 has 'memory_in_mb' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build_memory_in_mb' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
33
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
34 sub _build_memory_in_mb {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
35 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
36 my $filename = $self->output_combined_filename;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
37 my $memory_required = 2000;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
38 if ( -e $filename ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
39 $memory_required = -s $filename;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
40
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
41 # Convert to mb
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
42 $memory_required = int( $memory_required / 1000000 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
43
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
44 # Pentuple memory for worst case senario
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
45 $memory_required *= 5;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
46 $memory_required = 2000 if ( $memory_required < 2000 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
47 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
48
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
49 return $memory_required;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
50 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
51
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
52 sub _build__max_available_memory_in_mb {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
53 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
54 my $memory_to_cdhit = int( $self->memory_in_mb * 0.9 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
55 return $memory_to_cdhit;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
56 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
57
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
58 sub _command_to_run {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
59 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
60 my $cpus = ($self->cpus > $self->_max_cpus) ? $self->_max_cpus : $self->cpus;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
61
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
62 return join(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
63 ' ',
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
64 (
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
65 $self->exec, '-c', $self->output_cd_hit_filename, '-m',
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
66 $self->output_combined_filename, '-n', $self->number_of_input_files, '--cpus', $cpus, '-f',
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
67 $self->output_filtered_clustered_fasta
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
68 )
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
69 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
70 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
71
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
72 sub run {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
73 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
74 my @commands_to_run;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
75 push( @commands_to_run, $self->_command_to_run );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
76 $self->logger->info( "Running command: " . $self->_command_to_run() );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
77
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
78 my $job_runner_obj = $self->_job_runner_class->new(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
79 commands_to_run => \@commands_to_run,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
80 memory_in_mb => $self->memory_in_mb,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
81 queue => $self->_queue,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
82 cpus => $self->cpus
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
83 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
84 $job_runner_obj->run();
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
85
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
86 1;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
87 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
88
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
89 no Moose;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
90 __PACKAGE__->meta->make_immutable;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
91
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
92 1;