0
|
1 package Bio::Roary::Output::NumberOfGroups;
|
|
2
|
|
3 # ABSTRACT: Create raw output files of group counts for turning into plots
|
|
4
|
|
5 =head1 SYNOPSIS
|
|
6
|
|
7 # ABSTRACT: Create raw output files of group counts for turning into plots
|
|
8 use Bio::Roary::Output::NumberOfGroups;
|
|
9
|
|
10 my $obj = Bio::Roary::Output::NumberOfGroups->new(
|
|
11 group_statistics_obj => $group_stats
|
|
12 );
|
|
13 $obj->create_files();
|
|
14
|
|
15 =cut
|
|
16
|
|
17 use Moose;
|
|
18 use List::Util qw(shuffle);
|
|
19 use Bio::Roary::AnnotateGroups;
|
|
20 use Bio::Roary::GroupStatistics;
|
|
21
|
|
22 has 'group_statistics_obj' => ( is => 'ro', isa => 'Bio::Roary::GroupStatistics', required => 1 );
|
|
23 has 'number_of_iterations' => ( is => 'ro', isa => 'Int', default => 10);
|
|
24 has 'groups_to_contigs' => ( is => 'ro', isa => 'Maybe[HashRef]' );
|
|
25 has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
|
|
26 has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1.0 );
|
|
27
|
|
28 has 'output_raw_filename_conserved_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_conserved_genes.Rtab' );
|
|
29 has 'output_raw_filename_unique_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_unique_genes.Rtab' );
|
|
30 has 'output_raw_filename_total_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_genes_in_pan_genome.Rtab' );
|
|
31 has 'output_raw_filename_new_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_new_genes.Rtab' );
|
|
32 has '_conserved_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
|
|
33 has '_unique_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
|
|
34 has '_total_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
|
|
35 has '_new_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
|
|
36
|
|
37 sub create_output_files {
|
|
38 my ($self) = @_;
|
|
39
|
|
40 for ( my $i = 0 ; $i < $self->number_of_iterations ; $i++ ) {
|
|
41 $self->_single_iteration_gene_expansion;
|
|
42 }
|
|
43
|
|
44 $self->_create_raw_output_file( $self->output_raw_filename_conserved_genes, $self->_conserved_genes );
|
|
45 $self->_create_raw_output_file( $self->output_raw_filename_unique_genes, $self->_unique_genes );
|
|
46 $self->_create_raw_output_file( $self->output_raw_filename_total_genes, $self->_total_genes );
|
|
47 $self->_create_raw_output_file( $self->output_raw_filename_new_genes, $self->_new_genes );
|
|
48 return 1;
|
|
49 }
|
|
50
|
|
51 sub _create_raw_output_file {
|
|
52 my ( $self, $filename, $output_data ) = @_;
|
|
53 open( my $fh, '>', $filename );
|
|
54 for my $iterations ( @{$output_data} ) {
|
|
55 print {$fh} join( "\t", @{$iterations} );
|
|
56 print {$fh} "\n";
|
|
57 }
|
|
58 close($fh);
|
|
59 }
|
|
60
|
|
61 sub _shuffle_input_files {
|
|
62 my ($self) = @_;
|
|
63 my @shuffled_input_files = shuffle( @{ $self->group_statistics_obj->_sorted_file_names } );
|
|
64 return \@shuffled_input_files;
|
|
65 }
|
|
66
|
|
67 sub _single_iteration_gene_expansion {
|
|
68 my ($self) = @_;
|
|
69 my %existing_groups;
|
|
70 my @conserved_genes_added_per_file;
|
|
71 my @unique_genes_added_per_file;
|
|
72 my @total_genes_added_per_file;
|
|
73 my @new_genes_added_per_file;
|
|
74
|
|
75 my $shuffled_input_files = $self->_shuffle_input_files();
|
|
76
|
|
77 my $files_counter = 1;
|
|
78 for my $input_file ( @{$shuffled_input_files} ) {
|
|
79 my $unique_groups_counter = 0;
|
|
80 my $total_groups_counter = 0;
|
|
81 my $new_group_counter = 0;
|
|
82 my $conserved_groups_counter = 0;
|
|
83 my $new_groups = $self->group_statistics_obj->_files_to_groups->{$input_file};
|
|
84
|
|
85 for my $group ( @{$new_groups} ) {
|
|
86 if ( !defined( $existing_groups{$group} ) ) {
|
|
87 $new_group_counter++;
|
|
88 }
|
|
89 $existing_groups{$group}++;
|
|
90 }
|
|
91
|
|
92 for my $group ( keys %existing_groups ) {
|
|
93 if ( $existing_groups{$group} >= ($files_counter*$self->core_definition) ) {
|
|
94 $conserved_groups_counter++;
|
|
95 }
|
|
96
|
|
97 if ( $existing_groups{$group} == 1 ) {
|
|
98 $unique_groups_counter++;
|
|
99 }
|
|
100 $total_groups_counter++;
|
|
101 }
|
|
102
|
|
103 push( @conserved_genes_added_per_file, $conserved_groups_counter );
|
|
104 push( @unique_genes_added_per_file, $unique_groups_counter );
|
|
105 push( @total_genes_added_per_file, $total_groups_counter );
|
|
106 push( @new_genes_added_per_file, $new_group_counter );
|
|
107 $files_counter++;
|
|
108 }
|
|
109 push( @{ $self->_conserved_genes }, \@conserved_genes_added_per_file );
|
|
110 push( @{ $self->_unique_genes }, \@unique_genes_added_per_file );
|
|
111 push( @{ $self->_total_genes }, \@total_genes_added_per_file );
|
|
112 push( @{ $self->_new_genes }, \@new_genes_added_per_file );
|
|
113
|
|
114 return;
|
|
115 }
|
|
116
|
|
117 no Moose;
|
|
118 __PACKAGE__->meta->make_immutable;
|
|
119
|
|
120 1;
|
|
121
|