Mercurial > repos > dereeper > roary_plots
comparison Roary/lib/Bio/Roary/CommandLine/IterativeCdhit.pm @ 0:c47a5f61bc9f draft
Uploaded
author | dereeper |
---|---|
date | Fri, 14 May 2021 20:27:06 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c47a5f61bc9f |
---|---|
1 undef $VERSION; | |
2 package Bio::Roary::CommandLine::IterativeCdhit; | |
3 | |
4 # ABSTRACT: Iteratively run cdhit | |
5 | |
6 =head1 SYNOPSIS | |
7 | |
8 Iteratively run cdhit | |
9 | |
10 =cut | |
11 | |
12 use Moose; | |
13 use Getopt::Long qw(GetOptionsFromArray); | |
14 use Bio::Roary::IterativeCdhit; | |
15 extends 'Bio::Roary::CommandLine::Common'; | |
16 | |
17 has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); | |
18 has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 ); | |
19 has 'help' => ( is => 'rw', isa => 'Bool', default => 0 ); | |
20 has '_error_message' => ( is => 'rw', isa => 'Str' ); | |
21 | |
22 has 'output_cd_hit_filename' => ( is => 'rw', isa => 'Str', default => '_clustered' ); | |
23 has 'output_combined_filename' => ( is => 'rw', isa => 'Str', default => '_combined_files' ); | |
24 has 'number_of_input_files' => ( is => 'rw', isa => 'Int', default => 1 ); | |
25 has 'output_filtered_clustered_fasta' => ( is => 'rw', isa => 'Str', default => '_clustered_filtered.fa' ); | |
26 | |
27 has 'lower_bound_percentage' => ( is => 'rw', isa => 'Num', default => 0.98 ); | |
28 has 'upper_bound_percentage' => ( is => 'rw', isa => 'Num', default => 0.99 ); | |
29 has 'step_size_percentage' => ( is => 'rw', isa => 'Num', default => 0.005 ); | |
30 has 'cpus' => ( is => 'rw', isa => 'Int', default => 1 ); | |
31 has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); | |
32 | |
33 | |
34 sub BUILD { | |
35 my ($self) = @_; | |
36 | |
37 my ( $output_cd_hit_filename,$cpus,$lower_bound_percentage,$upper_bound_percentage,$step_size_percentage, $output_combined_filename, $number_of_input_files, $output_filtered_clustered_fasta,$verbose, | |
38 $help ); | |
39 | |
40 GetOptionsFromArray( | |
41 $self->args, | |
42 'c|output_cd_hit_filename=s' => \$output_cd_hit_filename, | |
43 'm|output_combined_filename=s' => \$output_combined_filename, | |
44 'n|number_of_input_files=i' => \$number_of_input_files, | |
45 'f|output_filtered_clustered_fasta=s' => \$output_filtered_clustered_fasta, | |
46 'l|lower_bound_percentage=s' => \$lower_bound_percentage, | |
47 'u|upper_bound_percentage=s' => \$upper_bound_percentage, | |
48 's|step_size_percentage=s' => \$step_size_percentage, | |
49 'p|cpus=i' => \$cpus, | |
50 'v|verbose' => \$verbose, | |
51 'h|help' => \$help, | |
52 ); | |
53 | |
54 if ( defined($verbose) ) { | |
55 $self->verbose($verbose); | |
56 $self->logger->level(10000); | |
57 } | |
58 $self->help($help) if(defined($help)); | |
59 $self->lower_bound_percentage($lower_bound_percentage/100) if ( defined($lower_bound_percentage) ); | |
60 $self->upper_bound_percentage($upper_bound_percentage/100) if ( defined($upper_bound_percentage) ); | |
61 $self->step_size_percentage($step_size_percentage/100) if ( defined($step_size_percentage) ); | |
62 $self->output_cd_hit_filename($output_cd_hit_filename) if ( defined($output_cd_hit_filename) ); | |
63 $self->output_combined_filename($output_combined_filename) if ( defined($output_combined_filename) ); | |
64 $self->number_of_input_files($number_of_input_files) if ( defined($number_of_input_files) ); | |
65 $self->cpus($cpus) if ( defined($cpus) ); | |
66 $self->output_filtered_clustered_fasta($output_filtered_clustered_fasta) | |
67 if ( defined($output_filtered_clustered_fasta) ); | |
68 | |
69 } | |
70 | |
71 sub run { | |
72 my ($self) = @_; | |
73 | |
74 ( !$self->help ) or die $self->usage_text; | |
75 if ( defined( $self->_error_message ) ) { | |
76 print $self->_error_message . "\n"; | |
77 die $self->usage_text; | |
78 } | |
79 | |
80 my $obj = Bio::Roary::IterativeCdhit->new( | |
81 output_cd_hit_filename => $self->output_cd_hit_filename, | |
82 output_combined_filename => $self->output_combined_filename, | |
83 number_of_input_files => $self->number_of_input_files, | |
84 output_filtered_clustered_fasta => $self->output_filtered_clustered_fasta, | |
85 lower_bound_percentage => $self->lower_bound_percentage, | |
86 upper_bound_percentage => $self->upper_bound_percentage, | |
87 step_size_percentage => $self->step_size_percentage, | |
88 cpus => $self->cpus, | |
89 logger => $self->logger | |
90 | |
91 ); | |
92 $obj->run; | |
93 } | |
94 | |
95 sub usage_text { | |
96 my ($self) = @_; | |
97 | |
98 return <<USAGE; | |
99 Usage: iterative_cdhit [options] | |
100 Iteratively cluster a FASTA file of proteins with CD-hit, lower the threshold each time and extracting core genes (1 per isolate) to another file, and remove them from the input proteins file. | |
101 | |
102 Required arguments: | |
103 -m STR input FASTA file of protein sequences [_combined_files] | |
104 | |
105 Options: -p INT number of threads [1] | |
106 -n INT number of isolates [1] | |
107 -c STR cd-hit output filename [_clustered] | |
108 -f STR output filename for filtered sequences [_clustered_filtered.fa] | |
109 -l FLOAT lower bound percentage identity [98.0] | |
110 -u FLOAT upper bound percentage identity [99.0] | |
111 -s FLOAT step size for percentage identity [0.5] | |
112 -v verbose output to STDOUT | |
113 -h this help message | |
114 | |
115 For further info see: http://sanger-pathogens.github.io/Roary/ | |
116 USAGE | |
117 } | |
118 | |
119 __PACKAGE__->meta->make_immutable; | |
120 no Moose; | |
121 1; |