Mercurial > repos > dereeper > roary_plots
diff Roary/lib/Bio/Roary/CommandLine/IterativeCdhit.pm @ 0:c47a5f61bc9f draft
Uploaded
author | dereeper |
---|---|
date | Fri, 14 May 2021 20:27:06 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Roary/lib/Bio/Roary/CommandLine/IterativeCdhit.pm Fri May 14 20:27:06 2021 +0000 @@ -0,0 +1,121 @@ +undef $VERSION; +package Bio::Roary::CommandLine::IterativeCdhit; + +# ABSTRACT: Iteratively run cdhit + +=head1 SYNOPSIS + +Iteratively run cdhit + +=cut + +use Moose; +use Getopt::Long qw(GetOptionsFromArray); +use Bio::Roary::IterativeCdhit; +extends 'Bio::Roary::CommandLine::Common'; + +has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); +has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 ); +has 'help' => ( is => 'rw', isa => 'Bool', default => 0 ); +has '_error_message' => ( is => 'rw', isa => 'Str' ); + +has 'output_cd_hit_filename' => ( is => 'rw', isa => 'Str', default => '_clustered' ); +has 'output_combined_filename' => ( is => 'rw', isa => 'Str', default => '_combined_files' ); +has 'number_of_input_files' => ( is => 'rw', isa => 'Int', default => 1 ); +has 'output_filtered_clustered_fasta' => ( is => 'rw', isa => 'Str', default => '_clustered_filtered.fa' ); + +has 'lower_bound_percentage' => ( is => 'rw', isa => 'Num', default => 0.98 ); +has 'upper_bound_percentage' => ( is => 'rw', isa => 'Num', default => 0.99 ); +has 'step_size_percentage' => ( is => 'rw', isa => 'Num', default => 0.005 ); +has 'cpus' => ( is => 'rw', isa => 'Int', default => 1 ); +has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); + + +sub BUILD { + my ($self) = @_; + + my ( $output_cd_hit_filename,$cpus,$lower_bound_percentage,$upper_bound_percentage,$step_size_percentage, $output_combined_filename, $number_of_input_files, $output_filtered_clustered_fasta,$verbose, + $help ); + + GetOptionsFromArray( + $self->args, + 'c|output_cd_hit_filename=s' => \$output_cd_hit_filename, + 'm|output_combined_filename=s' => \$output_combined_filename, + 'n|number_of_input_files=i' => \$number_of_input_files, + 'f|output_filtered_clustered_fasta=s' => \$output_filtered_clustered_fasta, + 'l|lower_bound_percentage=s' => \$lower_bound_percentage, + 'u|upper_bound_percentage=s' => \$upper_bound_percentage, + 's|step_size_percentage=s' => \$step_size_percentage, + 'p|cpus=i' => \$cpus, + 'v|verbose' => \$verbose, + 'h|help' => \$help, + ); + + if ( defined($verbose) ) { + $self->verbose($verbose); + $self->logger->level(10000); + } + $self->help($help) if(defined($help)); + $self->lower_bound_percentage($lower_bound_percentage/100) if ( defined($lower_bound_percentage) ); + $self->upper_bound_percentage($upper_bound_percentage/100) if ( defined($upper_bound_percentage) ); + $self->step_size_percentage($step_size_percentage/100) if ( defined($step_size_percentage) ); + $self->output_cd_hit_filename($output_cd_hit_filename) if ( defined($output_cd_hit_filename) ); + $self->output_combined_filename($output_combined_filename) if ( defined($output_combined_filename) ); + $self->number_of_input_files($number_of_input_files) if ( defined($number_of_input_files) ); + $self->cpus($cpus) if ( defined($cpus) ); + $self->output_filtered_clustered_fasta($output_filtered_clustered_fasta) + if ( defined($output_filtered_clustered_fasta) ); + +} + +sub run { + my ($self) = @_; + + ( !$self->help ) or die $self->usage_text; + if ( defined( $self->_error_message ) ) { + print $self->_error_message . "\n"; + die $self->usage_text; + } + + my $obj = Bio::Roary::IterativeCdhit->new( + output_cd_hit_filename => $self->output_cd_hit_filename, + output_combined_filename => $self->output_combined_filename, + number_of_input_files => $self->number_of_input_files, + output_filtered_clustered_fasta => $self->output_filtered_clustered_fasta, + lower_bound_percentage => $self->lower_bound_percentage, + upper_bound_percentage => $self->upper_bound_percentage, + step_size_percentage => $self->step_size_percentage, + cpus => $self->cpus, + logger => $self->logger + + ); + $obj->run; +} + +sub usage_text { + my ($self) = @_; + + return <<USAGE; +Usage: iterative_cdhit [options] +Iteratively cluster a FASTA file of proteins with CD-hit, lower the threshold each time and extracting core genes (1 per isolate) to another file, and remove them from the input proteins file. + +Required arguments: + -m STR input FASTA file of protein sequences [_combined_files] + +Options: -p INT number of threads [1] + -n INT number of isolates [1] + -c STR cd-hit output filename [_clustered] + -f STR output filename for filtered sequences [_clustered_filtered.fa] + -l FLOAT lower bound percentage identity [98.0] + -u FLOAT upper bound percentage identity [99.0] + -s FLOAT step size for percentage identity [0.5] + -v verbose output to STDOUT + -h this help message + +For further info see: http://sanger-pathogens.github.io/Roary/ +USAGE +} + +__PACKAGE__->meta->make_immutable; +no Moose; +1;