annotate Roary/lib/Bio/Roary/External/Cdhit.pm @ 0:c47a5f61bc9f draft

Uploaded
author dereeper
date Fri, 14 May 2021 20:27:06 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
1 package Bio::Roary::External::Cdhit;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
2
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
3 # ABSTRACT: Wrapper to run cd-hit
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
4
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
5 =head1 SYNOPSIS
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
6
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
7 Wrapper to run cd-hit
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
8 use Bio::Roary::External::Cdhit;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
9
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
10 my $obj = Bio::Roary::External::Cdhit->new(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
11 input_file => 'abc.fa',
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
12 exec => 'cd-hit',
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
13 output_base => 'efg',
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
14 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
15 $obj->run;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
16
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
17 =cut
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
18
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
19 use Moose;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
20
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
21 with 'Bio::Roary::JobRunner::Role';
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
22
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
23 has 'input_file' => ( is => 'ro', isa => 'Str', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
24 has 'output_base' => ( is => 'ro', isa => 'Str', default => 'output' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
25 has 'exec' => ( is => 'ro', isa => 'Str', default => 'cd-hit' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
26 has 'alt_exec' => ( is => 'ro', isa => 'Str', default => 'cdhit' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
27 has '_max_available_memory_in_mb' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__max_available_memory_in_mb' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
28 has '_use_most_similar_clustering' => ( is => 'ro', isa => 'Bool', default => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
29 has '_length_difference_cutoff' => ( is => 'ro', isa => 'Num', default => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
30 has '_sequence_identity_threshold' => ( is => 'ro', isa => 'Num', default => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
31 has '_description_length' => ( is => 'ro', isa => 'Int', default => 256 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
32 has '_logging' => ( is => 'ro', isa => 'Str', default => '> /dev/null 2>&1' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
33 has '_max_cpus' => ( is => 'ro', isa => 'Int', default => 40 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
34
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
35
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
36 # Overload Role
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
37 has 'memory_in_mb' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build_memory_in_mb' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
38
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
39 sub _build_memory_in_mb
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
40 {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
41 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
42 my $filename = $self->input_file;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
43 my $memory_required = 2000;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
44 if(-e $filename)
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
45 {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
46 $memory_required = -s $filename;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
47 # Convert to mb
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
48 $memory_required = int($memory_required/1000000);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
49 # Give it bucket loads of memory for the worst case scenario
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
50 $memory_required *= 5;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
51 $memory_required = 2000 if($memory_required < 2000);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
52 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
53
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
54 return $memory_required;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
55 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
56
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
57 sub _build__max_available_memory_in_mb
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
58 {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
59 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
60 my $memory_to_cdhit = int($self->memory_in_mb *0.9);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
61 return $memory_to_cdhit;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
62 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
63
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
64 sub clusters_filename
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
65 {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
66 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
67 return join('.',($self->output_base,'clstr'));
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
68 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
69
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
70 sub _command_to_run {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
71 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
72
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
73 my $executable = $self->_find_exe([$self->exec, $self->alt_exec]);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
74
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
75 my $cpus = ($self->cpus > $self->_max_cpus) ? $self->_max_cpus : $self->cpus;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
76 return join(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
77 ' ',
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
78 (
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
79 $executable, '-i', $self->input_file, '-o',
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
80 $self->output_base, '-T', $cpus, '-M',
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
81 $self->_max_available_memory_in_mb, '-g', $self->_use_most_similar_clustering, '-s',
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
82 $self->_length_difference_cutoff, '-d', $self->_description_length ,'-c', $self->_sequence_identity_threshold,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
83 $self->_logging
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
84 )
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
85 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
86 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
87
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
88 sub run {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
89 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
90 my @commands_to_run;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
91
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
92 push(@commands_to_run, $self->_command_to_run() );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
93 $self->logger->info( "Running command: " . $self->_command_to_run() );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
94 my $job_runner_obj = $self->_job_runner_class->new( commands_to_run => \@commands_to_run, memory_in_mb => $self->memory_in_mb, queue => $self->_queue, cpus => $self->cpus );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
95 $job_runner_obj->run();
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
96
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
97 1;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
98 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
99
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
100 no Moose;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
101 __PACKAGE__->meta->make_immutable;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
102
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
103 1;