Mercurial > repos > dereeper > roary_plots
comparison Roary/lib/Bio/Roary/External/Cdhit.pm @ 0:c47a5f61bc9f draft
Uploaded
author | dereeper |
---|---|
date | Fri, 14 May 2021 20:27:06 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c47a5f61bc9f |
---|---|
1 package Bio::Roary::External::Cdhit; | |
2 | |
3 # ABSTRACT: Wrapper to run cd-hit | |
4 | |
5 =head1 SYNOPSIS | |
6 | |
7 Wrapper to run cd-hit | |
8 use Bio::Roary::External::Cdhit; | |
9 | |
10 my $obj = Bio::Roary::External::Cdhit->new( | |
11 input_file => 'abc.fa', | |
12 exec => 'cd-hit', | |
13 output_base => 'efg', | |
14 ); | |
15 $obj->run; | |
16 | |
17 =cut | |
18 | |
19 use Moose; | |
20 | |
21 with 'Bio::Roary::JobRunner::Role'; | |
22 | |
23 has 'input_file' => ( is => 'ro', isa => 'Str', required => 1 ); | |
24 has 'output_base' => ( is => 'ro', isa => 'Str', default => 'output' ); | |
25 has 'exec' => ( is => 'ro', isa => 'Str', default => 'cd-hit' ); | |
26 has 'alt_exec' => ( is => 'ro', isa => 'Str', default => 'cdhit' ); | |
27 has '_max_available_memory_in_mb' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__max_available_memory_in_mb' ); | |
28 has '_use_most_similar_clustering' => ( is => 'ro', isa => 'Bool', default => 1 ); | |
29 has '_length_difference_cutoff' => ( is => 'ro', isa => 'Num', default => 1 ); | |
30 has '_sequence_identity_threshold' => ( is => 'ro', isa => 'Num', default => 1 ); | |
31 has '_description_length' => ( is => 'ro', isa => 'Int', default => 256 ); | |
32 has '_logging' => ( is => 'ro', isa => 'Str', default => '> /dev/null 2>&1' ); | |
33 has '_max_cpus' => ( is => 'ro', isa => 'Int', default => 40 ); | |
34 | |
35 | |
36 # Overload Role | |
37 has 'memory_in_mb' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build_memory_in_mb' ); | |
38 | |
39 sub _build_memory_in_mb | |
40 { | |
41 my ($self) = @_; | |
42 my $filename = $self->input_file; | |
43 my $memory_required = 2000; | |
44 if(-e $filename) | |
45 { | |
46 $memory_required = -s $filename; | |
47 # Convert to mb | |
48 $memory_required = int($memory_required/1000000); | |
49 # Give it bucket loads of memory for the worst case scenario | |
50 $memory_required *= 5; | |
51 $memory_required = 2000 if($memory_required < 2000); | |
52 } | |
53 | |
54 return $memory_required; | |
55 } | |
56 | |
57 sub _build__max_available_memory_in_mb | |
58 { | |
59 my ($self) = @_; | |
60 my $memory_to_cdhit = int($self->memory_in_mb *0.9); | |
61 return $memory_to_cdhit; | |
62 } | |
63 | |
64 sub clusters_filename | |
65 { | |
66 my ($self) = @_; | |
67 return join('.',($self->output_base,'clstr')); | |
68 } | |
69 | |
70 sub _command_to_run { | |
71 my ($self) = @_; | |
72 | |
73 my $executable = $self->_find_exe([$self->exec, $self->alt_exec]); | |
74 | |
75 my $cpus = ($self->cpus > $self->_max_cpus) ? $self->_max_cpus : $self->cpus; | |
76 return join( | |
77 ' ', | |
78 ( | |
79 $executable, '-i', $self->input_file, '-o', | |
80 $self->output_base, '-T', $cpus, '-M', | |
81 $self->_max_available_memory_in_mb, '-g', $self->_use_most_similar_clustering, '-s', | |
82 $self->_length_difference_cutoff, '-d', $self->_description_length ,'-c', $self->_sequence_identity_threshold, | |
83 $self->_logging | |
84 ) | |
85 ); | |
86 } | |
87 | |
88 sub run { | |
89 my ($self) = @_; | |
90 my @commands_to_run; | |
91 | |
92 push(@commands_to_run, $self->_command_to_run() ); | |
93 $self->logger->info( "Running command: " . $self->_command_to_run() ); | |
94 my $job_runner_obj = $self->_job_runner_class->new( commands_to_run => \@commands_to_run, memory_in_mb => $self->memory_in_mb, queue => $self->_queue, cpus => $self->cpus ); | |
95 $job_runner_obj->run(); | |
96 | |
97 1; | |
98 } | |
99 | |
100 no Moose; | |
101 __PACKAGE__->meta->make_immutable; | |
102 | |
103 1; |