Mercurial > repos > dereeper > roary_plots
comparison Roary/lib/Bio/Roary/AccessoryBinaryFasta.pm @ 0:c47a5f61bc9f draft
Uploaded
author | dereeper |
---|---|
date | Fri, 14 May 2021 20:27:06 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c47a5f61bc9f |
---|---|
1 package Bio::Roary::AccessoryBinaryFasta; | |
2 | |
3 # ABSTRACT: Output a FASTA file which represents the binary presence and absence of genes in the accessory genome | |
4 | |
5 =head1 SYNOPSIS | |
6 | |
7 Output a FASTA file which represents the binary presence and absence of genes in the accessory genome | |
8 use Bio::Roary::AccessoryBinaryFasta; | |
9 my $obj = Bio::Roary::AccessoryBinaryFasta->new(input_files => ['abc','efg'], | |
10 groups_to_files => {'group_1' => ['abc'], group_2 => ['abc', 'efg']} | |
11 ); | |
12 $obj->create_accessory_binary_fasta(); | |
13 =cut | |
14 | |
15 use Moose; | |
16 use POSIX; | |
17 use Bio::Roary::AnnotateGroups; | |
18 use Bio::Roary::AnalyseGroups; | |
19 use Bio::Roary::Exceptions; | |
20 use Bio::SeqIO; | |
21 use File::Basename; | |
22 | |
23 has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); | |
24 has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 ); | |
25 has 'analyse_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 ); | |
26 has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'accessory_binary_genes.fa' ); | |
27 has 'lower_bound_percentage' => ( is => 'ro', isa => 'Int', default => 5 ); | |
28 has 'upper_bound_percentage' => ( is => 'ro', isa => 'Int', default => 5 ); | |
29 has 'max_accessory_to_include' => ( is => 'ro', isa => 'Int', default => 4000 ); | |
30 has 'groups_to_files' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_files' ); | |
31 has '_lower_bound_value' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__lower_bound_value' ); | |
32 has '_upper_bound_value' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__upper_bound_value' ); | |
33 | |
34 sub _build__groups_to_files { | |
35 my ($self) = @_; | |
36 my %groups_to_files; | |
37 for my $group ( @{ $self->annotate_groups_obj->_groups } ) { | |
38 my $genes = $self->annotate_groups_obj->_groups_to_id_names->{$group}; | |
39 my %filenames; | |
40 for my $gene_name ( @{$genes} ) { | |
41 my $filename = $self->analyse_groups_obj->_genes_to_file->{$gene_name}; | |
42 push( @{ $filenames{$filename} }, $gene_name ); | |
43 } | |
44 $groups_to_files{$group} = \%filenames; | |
45 } | |
46 | |
47 return \%groups_to_files; | |
48 } | |
49 | |
50 sub _build__lower_bound_value { | |
51 my ($self) = @_; | |
52 my $num_files = @{ $self->input_files }; | |
53 return ceil( $num_files * ( $self->lower_bound_percentage / 100 ) ); | |
54 } | |
55 | |
56 sub _build__upper_bound_value { | |
57 my ($self) = @_; | |
58 my $num_files = @{ $self->input_files }; | |
59 return $num_files - ceil( $num_files * ( $self->upper_bound_percentage / 100 ) ); | |
60 } | |
61 | |
62 sub create_accessory_binary_fasta { | |
63 my ($self) = @_; | |
64 my $out_seq_io = Bio::SeqIO->new( -file => ">" . $self->output_filename, -format => 'Fasta' ); | |
65 | |
66 for my $full_filename ( @{ $self->input_files } ) { | |
67 my($filename, $dirs, $suffix) = fileparse($full_filename); | |
68 | |
69 my $output_sequence = ''; | |
70 my $sample_name = $filename; | |
71 $sample_name =~ s!\.gff\.proteome\.faa!!; | |
72 | |
73 my $gene_count = 0; | |
74 for my $group ( sort keys %{ $self->groups_to_files } ) { | |
75 last if($gene_count > $self->max_accessory_to_include); | |
76 | |
77 my @files = keys %{ $self->groups_to_files->{$group} }; | |
78 | |
79 next if ( @files <= $self->_lower_bound_value || @files > $self->_upper_bound_value ); | |
80 | |
81 my $group_to_file_genes = $self->groups_to_files->{$group}->{$full_filename}; | |
82 if ( defined($group_to_file_genes) && @{$group_to_file_genes} > 0 ) { | |
83 $output_sequence .= 'A'; | |
84 } | |
85 else { | |
86 $output_sequence .= 'C'; | |
87 } | |
88 $gene_count++; | |
89 | |
90 } | |
91 next if($output_sequence eq ''); | |
92 $out_seq_io->write_seq( Bio::Seq->new( -display_id => $sample_name, -seq => $output_sequence ) ); | |
93 } | |
94 return 1; | |
95 } | |
96 | |
97 no Moose; | |
98 __PACKAGE__->meta->make_immutable; | |
99 | |
100 1; |