annotate Roary/lib/Bio/Roary/Output/NumberOfGroups.pm @ 3:e95344f6dfc5 draft default tip

Uploaded
author dereeper
date Fri, 12 Nov 2021 16:32:26 +0000
parents c47a5f61bc9f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
1 package Bio::Roary::Output::NumberOfGroups;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
2
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
3 # ABSTRACT: Create raw output files of group counts for turning into plots
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
4
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
5 =head1 SYNOPSIS
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
6
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
7 # ABSTRACT: Create raw output files of group counts for turning into plots
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
8 use Bio::Roary::Output::NumberOfGroups;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
9
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
10 my $obj = Bio::Roary::Output::NumberOfGroups->new(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
11 group_statistics_obj => $group_stats
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
12 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
13 $obj->create_files();
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
14
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
15 =cut
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
16
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
17 use Moose;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
18 use List::Util qw(shuffle);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
19 use Bio::Roary::AnnotateGroups;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
20 use Bio::Roary::GroupStatistics;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
21
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
22 has 'group_statistics_obj' => ( is => 'ro', isa => 'Bio::Roary::GroupStatistics', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
23 has 'number_of_iterations' => ( is => 'ro', isa => 'Int', default => 10);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
24 has 'groups_to_contigs' => ( is => 'ro', isa => 'Maybe[HashRef]' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
25 has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
26 has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1.0 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
27
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
28 has 'output_raw_filename_conserved_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_conserved_genes.Rtab' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
29 has 'output_raw_filename_unique_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_unique_genes.Rtab' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
30 has 'output_raw_filename_total_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_genes_in_pan_genome.Rtab' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
31 has 'output_raw_filename_new_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_new_genes.Rtab' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
32 has '_conserved_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
33 has '_unique_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
34 has '_total_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
35 has '_new_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
36
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
37 sub create_output_files {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
38 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
39
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
40 for ( my $i = 0 ; $i < $self->number_of_iterations ; $i++ ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
41 $self->_single_iteration_gene_expansion;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
42 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
43
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
44 $self->_create_raw_output_file( $self->output_raw_filename_conserved_genes, $self->_conserved_genes );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
45 $self->_create_raw_output_file( $self->output_raw_filename_unique_genes, $self->_unique_genes );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
46 $self->_create_raw_output_file( $self->output_raw_filename_total_genes, $self->_total_genes );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
47 $self->_create_raw_output_file( $self->output_raw_filename_new_genes, $self->_new_genes );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
48 return 1;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
49 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
50
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
51 sub _create_raw_output_file {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
52 my ( $self, $filename, $output_data ) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
53 open( my $fh, '>', $filename );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
54 for my $iterations ( @{$output_data} ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
55 print {$fh} join( "\t", @{$iterations} );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
56 print {$fh} "\n";
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
57 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
58 close($fh);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
59 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
60
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
61 sub _shuffle_input_files {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
62 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
63 my @shuffled_input_files = shuffle( @{ $self->group_statistics_obj->_sorted_file_names } );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
64 return \@shuffled_input_files;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
65 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
66
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
67 sub _single_iteration_gene_expansion {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
68 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
69 my %existing_groups;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
70 my @conserved_genes_added_per_file;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
71 my @unique_genes_added_per_file;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
72 my @total_genes_added_per_file;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
73 my @new_genes_added_per_file;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
74
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
75 my $shuffled_input_files = $self->_shuffle_input_files();
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
76
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
77 my $files_counter = 1;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
78 for my $input_file ( @{$shuffled_input_files} ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
79 my $unique_groups_counter = 0;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
80 my $total_groups_counter = 0;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
81 my $new_group_counter = 0;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
82 my $conserved_groups_counter = 0;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
83 my $new_groups = $self->group_statistics_obj->_files_to_groups->{$input_file};
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
84
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
85 for my $group ( @{$new_groups} ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
86 if ( !defined( $existing_groups{$group} ) ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
87 $new_group_counter++;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
88 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
89 $existing_groups{$group}++;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
90 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
91
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
92 for my $group ( keys %existing_groups ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
93 if ( $existing_groups{$group} >= ($files_counter*$self->core_definition) ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
94 $conserved_groups_counter++;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
95 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
96
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
97 if ( $existing_groups{$group} == 1 ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
98 $unique_groups_counter++;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
99 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
100 $total_groups_counter++;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
101 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
102
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
103 push( @conserved_genes_added_per_file, $conserved_groups_counter );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
104 push( @unique_genes_added_per_file, $unique_groups_counter );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
105 push( @total_genes_added_per_file, $total_groups_counter );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
106 push( @new_genes_added_per_file, $new_group_counter );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
107 $files_counter++;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
108 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
109 push( @{ $self->_conserved_genes }, \@conserved_genes_added_per_file );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
110 push( @{ $self->_unique_genes }, \@unique_genes_added_per_file );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
111 push( @{ $self->_total_genes }, \@total_genes_added_per_file );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
112 push( @{ $self->_new_genes }, \@new_genes_added_per_file );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
113
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
114 return;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
115 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
116
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
117 no Moose;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
118 __PACKAGE__->meta->make_immutable;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
119
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
120 1;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
121