Mercurial > repos > dereeper > roary_plots
comparison Roary/lib/Bio/Roary/ChunkFastaFile.pm @ 0:c47a5f61bc9f draft
Uploaded
author | dereeper |
---|---|
date | Fri, 14 May 2021 20:27:06 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c47a5f61bc9f |
---|---|
1 package Bio::Roary::ChunkFastaFile; | |
2 | |
3 # ABSTRACT: Take in a FASTA file and chunk it up into smaller pieces. | |
4 | |
5 =head1 SYNOPSIS | |
6 | |
7 Take in a FASTA file and chunk it up into smaller pieces. | |
8 use Bio::Roary::ChunkFastaFile; | |
9 | |
10 my $obj = Bio::Roary::ChunkFastaFile->new( | |
11 fasta_file => 'abc.fa', | |
12 ); | |
13 $obj->sequence_file_names; | |
14 | |
15 =cut | |
16 | |
17 use Moose; | |
18 use Bio::SeqIO; | |
19 use Bio::Roary::Exceptions; | |
20 use Cwd; | |
21 use File::Temp; | |
22 | |
23 has 'fasta_file' => ( is => 'ro', isa => 'Str', required => 1 ); | |
24 has 'target_chunk_size' => ( is => 'ro', isa => 'Int', default => 200000 ); | |
25 has 'sequence_file_names' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_sequence_file_names' ); | |
26 has '_working_directory' => | |
27 ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } ); | |
28 has '_working_directory_name' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__working_directory_name' ); | |
29 has '_input_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__input_seqio' ); | |
30 | |
31 sub _build__working_directory_name { | |
32 my ($self) = @_; | |
33 return $self->_working_directory->dirname(); | |
34 } | |
35 | |
36 sub _build__input_seqio { | |
37 my ($self) = @_; | |
38 return Bio::SeqIO->new( -file => $self->fasta_file, -format => 'Fasta' ); | |
39 } | |
40 | |
41 sub _create_next_chunk_file_name { | |
42 my ( $self, $chunk_number ) = @_; | |
43 return join( '/', ( $self->_working_directory_name, $chunk_number . '.seq' ) ); | |
44 } | |
45 | |
46 sub _create_next_chunk_seqio { | |
47 my ( $self, $chunk_number ) = @_; | |
48 return Bio::SeqIO->new( -file => ">".$self->_create_next_chunk_file_name($chunk_number), -format => 'Fasta' ); | |
49 } | |
50 | |
51 sub _build_sequence_file_names { | |
52 my ($self) = @_; | |
53 my @sequence_file_names; | |
54 my $chunk_number = 0; | |
55 my $current_chunk_length = 0; | |
56 my $current_chunk_seqio = $self->_create_next_chunk_seqio($chunk_number); | |
57 push( @sequence_file_names, $self->_create_next_chunk_file_name($chunk_number) ); | |
58 | |
59 while ( my $input_seq = $self->_input_seqio->next_seq() ) { | |
60 if ( $current_chunk_length > $self->target_chunk_size ) { | |
61 | |
62 # next chunk | |
63 $chunk_number++; | |
64 $current_chunk_length = 0; | |
65 $current_chunk_seqio = $self->_create_next_chunk_seqio($chunk_number); | |
66 push( @sequence_file_names, $self->_create_next_chunk_file_name($chunk_number) ); | |
67 } | |
68 $current_chunk_seqio->write_seq($input_seq); | |
69 $current_chunk_length += $input_seq->length(); | |
70 } | |
71 return \@sequence_file_names; | |
72 } | |
73 | |
74 no Moose; | |
75 __PACKAGE__->meta->make_immutable; | |
76 | |
77 1; |