comparison Roary/lib/Bio/Roary/External/Cdhit.pm @ 0:c47a5f61bc9f draft

Uploaded
author dereeper
date Fri, 14 May 2021 20:27:06 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c47a5f61bc9f
1 package Bio::Roary::External::Cdhit;
2
3 # ABSTRACT: Wrapper to run cd-hit
4
5 =head1 SYNOPSIS
6
7 Wrapper to run cd-hit
8 use Bio::Roary::External::Cdhit;
9
10 my $obj = Bio::Roary::External::Cdhit->new(
11 input_file => 'abc.fa',
12 exec => 'cd-hit',
13 output_base => 'efg',
14 );
15 $obj->run;
16
17 =cut
18
19 use Moose;
20
21 with 'Bio::Roary::JobRunner::Role';
22
23 has 'input_file' => ( is => 'ro', isa => 'Str', required => 1 );
24 has 'output_base' => ( is => 'ro', isa => 'Str', default => 'output' );
25 has 'exec' => ( is => 'ro', isa => 'Str', default => 'cd-hit' );
26 has 'alt_exec' => ( is => 'ro', isa => 'Str', default => 'cdhit' );
27 has '_max_available_memory_in_mb' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__max_available_memory_in_mb' );
28 has '_use_most_similar_clustering' => ( is => 'ro', isa => 'Bool', default => 1 );
29 has '_length_difference_cutoff' => ( is => 'ro', isa => 'Num', default => 1 );
30 has '_sequence_identity_threshold' => ( is => 'ro', isa => 'Num', default => 1 );
31 has '_description_length' => ( is => 'ro', isa => 'Int', default => 256 );
32 has '_logging' => ( is => 'ro', isa => 'Str', default => '> /dev/null 2>&1' );
33 has '_max_cpus' => ( is => 'ro', isa => 'Int', default => 40 );
34
35
36 # Overload Role
37 has 'memory_in_mb' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build_memory_in_mb' );
38
39 sub _build_memory_in_mb
40 {
41 my ($self) = @_;
42 my $filename = $self->input_file;
43 my $memory_required = 2000;
44 if(-e $filename)
45 {
46 $memory_required = -s $filename;
47 # Convert to mb
48 $memory_required = int($memory_required/1000000);
49 # Give it bucket loads of memory for the worst case scenario
50 $memory_required *= 5;
51 $memory_required = 2000 if($memory_required < 2000);
52 }
53
54 return $memory_required;
55 }
56
57 sub _build__max_available_memory_in_mb
58 {
59 my ($self) = @_;
60 my $memory_to_cdhit = int($self->memory_in_mb *0.9);
61 return $memory_to_cdhit;
62 }
63
64 sub clusters_filename
65 {
66 my ($self) = @_;
67 return join('.',($self->output_base,'clstr'));
68 }
69
70 sub _command_to_run {
71 my ($self) = @_;
72
73 my $executable = $self->_find_exe([$self->exec, $self->alt_exec]);
74
75 my $cpus = ($self->cpus > $self->_max_cpus) ? $self->_max_cpus : $self->cpus;
76 return join(
77 ' ',
78 (
79 $executable, '-i', $self->input_file, '-o',
80 $self->output_base, '-T', $cpus, '-M',
81 $self->_max_available_memory_in_mb, '-g', $self->_use_most_similar_clustering, '-s',
82 $self->_length_difference_cutoff, '-d', $self->_description_length ,'-c', $self->_sequence_identity_threshold,
83 $self->_logging
84 )
85 );
86 }
87
88 sub run {
89 my ($self) = @_;
90 my @commands_to_run;
91
92 push(@commands_to_run, $self->_command_to_run() );
93 $self->logger->info( "Running command: " . $self->_command_to_run() );
94 my $job_runner_obj = $self->_job_runner_class->new( commands_to_run => \@commands_to_run, memory_in_mb => $self->memory_in_mb, queue => $self->_queue, cpus => $self->cpus );
95 $job_runner_obj->run();
96
97 1;
98 }
99
100 no Moose;
101 __PACKAGE__->meta->make_immutable;
102
103 1;