annotate Roary/lib/Bio/Roary/CommandLine/IterativeCdhit.pm @ 0:c47a5f61bc9f draft

Uploaded
author dereeper
date Fri, 14 May 2021 20:27:06 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
1 undef $VERSION;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
2 package Bio::Roary::CommandLine::IterativeCdhit;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
3
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
4 # ABSTRACT: Iteratively run cdhit
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
5
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
6 =head1 SYNOPSIS
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
7
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
8 Iteratively run cdhit
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
9
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
10 =cut
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
11
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
12 use Moose;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
13 use Getopt::Long qw(GetOptionsFromArray);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
14 use Bio::Roary::IterativeCdhit;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
15 extends 'Bio::Roary::CommandLine::Common';
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
16
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
17 has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
18 has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
19 has 'help' => ( is => 'rw', isa => 'Bool', default => 0 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
20 has '_error_message' => ( is => 'rw', isa => 'Str' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
21
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
22 has 'output_cd_hit_filename' => ( is => 'rw', isa => 'Str', default => '_clustered' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
23 has 'output_combined_filename' => ( is => 'rw', isa => 'Str', default => '_combined_files' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
24 has 'number_of_input_files' => ( is => 'rw', isa => 'Int', default => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
25 has 'output_filtered_clustered_fasta' => ( is => 'rw', isa => 'Str', default => '_clustered_filtered.fa' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
26
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
27 has 'lower_bound_percentage' => ( is => 'rw', isa => 'Num', default => 0.98 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
28 has 'upper_bound_percentage' => ( is => 'rw', isa => 'Num', default => 0.99 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
29 has 'step_size_percentage' => ( is => 'rw', isa => 'Num', default => 0.005 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
30 has 'cpus' => ( is => 'rw', isa => 'Int', default => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
31 has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
32
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
33
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
34 sub BUILD {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
35 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
36
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
37 my ( $output_cd_hit_filename,$cpus,$lower_bound_percentage,$upper_bound_percentage,$step_size_percentage, $output_combined_filename, $number_of_input_files, $output_filtered_clustered_fasta,$verbose,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
38 $help );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
39
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
40 GetOptionsFromArray(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
41 $self->args,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
42 'c|output_cd_hit_filename=s' => \$output_cd_hit_filename,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
43 'm|output_combined_filename=s' => \$output_combined_filename,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
44 'n|number_of_input_files=i' => \$number_of_input_files,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
45 'f|output_filtered_clustered_fasta=s' => \$output_filtered_clustered_fasta,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
46 'l|lower_bound_percentage=s' => \$lower_bound_percentage,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
47 'u|upper_bound_percentage=s' => \$upper_bound_percentage,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
48 's|step_size_percentage=s' => \$step_size_percentage,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
49 'p|cpus=i' => \$cpus,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
50 'v|verbose' => \$verbose,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
51 'h|help' => \$help,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
52 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
53
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
54 if ( defined($verbose) ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
55 $self->verbose($verbose);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
56 $self->logger->level(10000);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
57 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
58 $self->help($help) if(defined($help));
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
59 $self->lower_bound_percentage($lower_bound_percentage/100) if ( defined($lower_bound_percentage) );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
60 $self->upper_bound_percentage($upper_bound_percentage/100) if ( defined($upper_bound_percentage) );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
61 $self->step_size_percentage($step_size_percentage/100) if ( defined($step_size_percentage) );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
62 $self->output_cd_hit_filename($output_cd_hit_filename) if ( defined($output_cd_hit_filename) );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
63 $self->output_combined_filename($output_combined_filename) if ( defined($output_combined_filename) );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
64 $self->number_of_input_files($number_of_input_files) if ( defined($number_of_input_files) );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
65 $self->cpus($cpus) if ( defined($cpus) );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
66 $self->output_filtered_clustered_fasta($output_filtered_clustered_fasta)
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
67 if ( defined($output_filtered_clustered_fasta) );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
68
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
69 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
70
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
71 sub run {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
72 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
73
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
74 ( !$self->help ) or die $self->usage_text;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
75 if ( defined( $self->_error_message ) ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
76 print $self->_error_message . "\n";
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
77 die $self->usage_text;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
78 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
79
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
80 my $obj = Bio::Roary::IterativeCdhit->new(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
81 output_cd_hit_filename => $self->output_cd_hit_filename,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
82 output_combined_filename => $self->output_combined_filename,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
83 number_of_input_files => $self->number_of_input_files,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
84 output_filtered_clustered_fasta => $self->output_filtered_clustered_fasta,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
85 lower_bound_percentage => $self->lower_bound_percentage,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
86 upper_bound_percentage => $self->upper_bound_percentage,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
87 step_size_percentage => $self->step_size_percentage,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
88 cpus => $self->cpus,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
89 logger => $self->logger
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
90
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
91 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
92 $obj->run;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
93 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
94
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
95 sub usage_text {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
96 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
97
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
98 return <<USAGE;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
99 Usage: iterative_cdhit [options]
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
100 Iteratively cluster a FASTA file of proteins with CD-hit, lower the threshold each time and extracting core genes (1 per isolate) to another file, and remove them from the input proteins file.
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
101
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
102 Required arguments:
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
103 -m STR input FASTA file of protein sequences [_combined_files]
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
104
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
105 Options: -p INT number of threads [1]
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
106 -n INT number of isolates [1]
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
107 -c STR cd-hit output filename [_clustered]
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
108 -f STR output filename for filtered sequences [_clustered_filtered.fa]
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
109 -l FLOAT lower bound percentage identity [98.0]
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
110 -u FLOAT upper bound percentage identity [99.0]
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
111 -s FLOAT step size for percentage identity [0.5]
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
112 -v verbose output to STDOUT
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
113 -h this help message
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
114
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
115 For further info see: http://sanger-pathogens.github.io/Roary/
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
116 USAGE
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
117 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
118
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
119 __PACKAGE__->meta->make_immutable;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
120 no Moose;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
121 1;