annotate Roary/lib/Bio/Roary/Output/DifferenceBetweenSets.pm @ 0:c47a5f61bc9f draft

Uploaded
author dereeper
date Fri, 14 May 2021 20:27:06 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
1 package Bio::Roary::Output::DifferenceBetweenSets;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
2
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
3 # ABSTRACT: Given two sets of isolates and a group file, output whats unique in each and whats in common
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
4
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
5 =head1 SYNOPSIS
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
6
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
7 Given two sets of isolates and a group file, output whats unique in each and whats in common
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
8 use Bio::Roary::Output::DifferenceBetweenSets;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
9
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
10 my $obj = Bio::Roary::Output::DifferenceBetweenSets->new(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
11 analyse_groups => $analyse_groups,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
12 input_filenames_sets =>
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
13 [
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
14 ['aaa.faa','bbb.faa'],
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
15 ['ccc.faa','ddd.faa']
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
16 ],
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
17 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
18 $obj->groups_set_one_unique();
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
19 $obj->groups_set_two_unique();
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
20 $obj->groups_in_common();
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
21
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
22 =cut
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
23
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
24 use Moose;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
25 use Bio::SeqIO;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
26 use Bio::Roary::Exceptions;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
27 use Bio::Roary::AnalyseGroups;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
28 use Bio::Roary::Output::QueryGroups;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
29
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
30 has 'analyse_groups' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
31 has 'input_filenames_sets' => ( is => 'ro', isa => 'ArrayRef[ArrayRef]', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
32 has 'output_filename_base' => ( is => 'ro', isa => 'Str', default => 'set_difference' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
33
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
34 has '_query_groups_objs' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__query_groups_objs' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
35
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
36 # TODO: update to handle more than 2 input sets
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
37
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
38 sub _build__query_groups_objs {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
39 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
40 my @query_groups_objs;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
41 for my $file_name_set ( @{ $self->input_filenames_sets } ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
42 push(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
43 @query_groups_objs,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
44 Bio::Roary::Output::QueryGroups->new(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
45 analyse_groups => $self->analyse_groups,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
46 input_filenames => $file_name_set
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
47 )
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
48 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
49 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
50
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
51 my @all_input_files = (@{ $self->input_filenames_sets->[0] },@{ $self->input_filenames_sets->[1] });
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
52 push(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
53 @query_groups_objs,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
54 Bio::Roary::Output::QueryGroups->new(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
55 analyse_groups => $self->analyse_groups,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
56 input_filenames => \@all_input_files
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
57 )
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
58 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
59
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
60
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
61 return \@query_groups_objs;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
62 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
63
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
64 sub _subtract_arrays {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
65 my ( $self, $array_1, $array_2 ) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
66 my %array_1 = map { $_ => 1 } @{$array_1};
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
67 my @difference = grep { not $array_1{$_} } @{$array_2};
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
68 return \@difference;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
69 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
70
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
71 sub _groups_unique {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
72 my ( $self, $output_filename, $query_group1, $query_group2 ) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
73 my $unique_groups = $self->_subtract_arrays( $query_group2->_groups, $query_group1->_groups );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
74 $query_group1->groups_with_external_inputs( $output_filename, $unique_groups );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
75 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
76
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
77 sub groups_set_one_unique_filename
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
78 {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
79 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
80 return $self->output_filename_base . '_unique_set_one';
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
81 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
82
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
83 sub groups_set_two_unique_filename
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
84 {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
85 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
86 return $self->output_filename_base . '_unique_set_two';
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
87 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
88
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
89 sub groups_in_common_filename
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
90 {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
91 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
92 return $self->output_filename_base . '_common_set';
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
93 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
94
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
95
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
96 sub groups_set_one_unique {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
97 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
98 $self->_groups_unique(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
99 $self->groups_set_one_unique_filename,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
100 $self->_query_groups_objs->[0],
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
101 $self->_query_groups_objs->[1]
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
102 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
103 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
104
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
105 sub groups_set_two_unique {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
106 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
107 $self->_groups_unique(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
108 $self->groups_set_two_unique_filename,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
109 $self->_query_groups_objs->[1],
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
110 $self->_query_groups_objs->[0]
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
111 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
112 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
113
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
114 sub groups_in_common {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
115 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
116 my $unique_group_1 = $self->_subtract_arrays( $self->_query_groups_objs->[0]->_groups, $self->_query_groups_objs->[1]->_groups );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
117 my $unique_group_2 = $self->_subtract_arrays( $self->_query_groups_objs->[1]->_groups, $self->_query_groups_objs->[0]->_groups );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
118 my $common_groups_1 = $self->_subtract_arrays( $unique_group_1,$self->_query_groups_objs->[2]->_groups);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
119 my $common_groups_2 = $self->_subtract_arrays( $unique_group_2,$common_groups_1);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
120 $self->_query_groups_objs->[2]->groups_with_external_inputs( $self->groups_in_common_filename, $common_groups_2 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
121 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
122
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
123 no Moose;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
124 __PACKAGE__->meta->make_immutable;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
125
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
126 1;