Mercurial > repos > dereeper > roary_plots
view Roary/lib/Bio/Roary/CommandLine/RoaryCoreAlignment.pm @ 0:c47a5f61bc9f draft
Uploaded
author | dereeper |
---|---|
date | Fri, 14 May 2021 20:27:06 +0000 |
parents | |
children |
line wrap: on
line source
undef $VERSION; package Bio::Roary::CommandLine::RoaryCoreAlignment; # ABSTRACT: Take in the group statistics spreadsheet and the location of the gene multifasta files and create a core alignment. =head1 SYNOPSIS Take in the group statistics spreadsheet and the location of the gene multifasta files and create a core alignment. =cut use Moose; use Getopt::Long qw(GetOptionsFromArray); use Cwd 'abs_path'; use File::Path qw(remove_tree); use Bio::Roary::ExtractCoreGenesFromSpreadsheet; use Bio::Roary::LookupGeneFiles; use Bio::Roary::MergeMultifastaAlignments; extends 'Bio::Roary::CommandLine::Common'; has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 ); has 'help' => ( is => 'rw', isa => 'Bool', default => 0 ); has 'multifasta_base_directory' => ( is => 'rw', isa => 'Str', default => 'pan_genome_sequences' ); has 'spreadsheet_filename' => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' ); has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'core_gene_alignment.aln' ); has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 ); has 'dont_delete_files' => ( is => 'rw', isa => 'Bool', default => 0 ); has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 ); has '_error_message' => ( is => 'rw', isa => 'Str' ); has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); sub BUILD { my ($self) = @_; my ( $multifasta_base_directory, $spreadsheet_filename, $output_filename, $core_definition,$verbose, $help, $mafft, $allow_paralogs, $dont_delete_files ); GetOptionsFromArray( $self->args, 'm|multifasta_base_directory=s' => \$multifasta_base_directory, 's|spreadsheet_filename=s' => \$spreadsheet_filename, 'o|output_filename=s' => \$output_filename, 'cd|core_definition=f' => \$core_definition, 'z|dont_delete_files' => \$dont_delete_files, 'p|allow_paralogs' => \$allow_paralogs, 'v|verbose' => \$verbose, 'h|help' => \$help, ); if ( defined($verbose) ) { $self->verbose($verbose); $self->logger->level(10000); } $self->help($help) if(defined($help)); $self->allow_paralogs($allow_paralogs) if(defined($allow_paralogs)); if ( defined($multifasta_base_directory) && ( -d $multifasta_base_directory ) ) { $self->multifasta_base_directory( abs_path($multifasta_base_directory)); } if(! -d $self->multifasta_base_directory ) { $self->_error_message("Error: Cant access the multifasta base directory: ".$self->multifasta_base_directory); } if ( defined($spreadsheet_filename) && ( -e $spreadsheet_filename ) ) { $self->spreadsheet_filename( abs_path($spreadsheet_filename)); } if(! -e $self->spreadsheet_filename ) { $self->_error_message("Error: Cant access the spreadsheet: ".$self->spreadsheet_filename); } $self->output_filename( $output_filename ) if ( defined($output_filename) ); if ( defined($core_definition) ) { if($core_definition > 1) { $self->core_definition( $core_definition/100 ); } else { $self->core_definition( $core_definition ); } } $self->dont_delete_files($dont_delete_files) if ( defined($dont_delete_files) ); } sub run { my ($self) = @_; ( !$self->help ) or die $self->usage_text; if ( defined( $self->_error_message ) ) { print $self->_error_message . "\n"; die $self->usage_text; } $self->logger->info("Extract core genes from spreadsheet"); my $core_genes_obj = Bio::Roary::ExtractCoreGenesFromSpreadsheet->new( spreadsheet => $self->spreadsheet_filename, core_definition => $self->core_definition, allow_paralogs => $self->allow_paralogs ); $self->logger->info("Looking up genes in files"); my $gene_files = Bio::Roary::LookupGeneFiles->new( multifasta_directory => $self->multifasta_base_directory, ordered_genes => $core_genes_obj->ordered_core_genes, ); $self->logger->info("Merge multifasta alignments"); my $merge_alignments_obj = Bio::Roary::MergeMultifastaAlignments->new( sample_names => $core_genes_obj->sample_names, multifasta_files => $gene_files->ordered_gene_files(), output_filename => $self->output_filename, sample_names_to_genes => $core_genes_obj->sample_names_to_genes ); $merge_alignments_obj->merge_files; if($self->dont_delete_files == 0) { remove_tree('pan_genome_sequences'); } } sub usage_text { my ($self) = @_; return <<USAGE; Usage: pan_genome_core_alignment [options] Create an alignment of core genes from the spreadsheet and the directory of gene multi-FASTAs. Options: -o STR output filename [core_gene_alignment.aln] -cd FLOAT percentage of isolates a gene must be in to be core [99] -m STR directory containing gene multi-FASTAs [pan_genome_sequences] -s STR gene presence and absence spreadsheet [gene_presence_absence.csv] -p allow paralogs -z dont delete intermediate files -v verbose output to STDOUT -h this help message For further info see: http://sanger-pathogens.github.io/Roary/ USAGE } __PACKAGE__->meta->make_immutable; no Moose; 1;