0
|
1 package Bio::Roary::ChunkFastaFile;
|
|
2
|
|
3 # ABSTRACT: Take in a FASTA file and chunk it up into smaller pieces.
|
|
4
|
|
5 =head1 SYNOPSIS
|
|
6
|
|
7 Take in a FASTA file and chunk it up into smaller pieces.
|
|
8 use Bio::Roary::ChunkFastaFile;
|
|
9
|
|
10 my $obj = Bio::Roary::ChunkFastaFile->new(
|
|
11 fasta_file => 'abc.fa',
|
|
12 );
|
|
13 $obj->sequence_file_names;
|
|
14
|
|
15 =cut
|
|
16
|
|
17 use Moose;
|
|
18 use Bio::SeqIO;
|
|
19 use Bio::Roary::Exceptions;
|
|
20 use Cwd;
|
|
21 use File::Temp;
|
|
22
|
|
23 has 'fasta_file' => ( is => 'ro', isa => 'Str', required => 1 );
|
|
24 has 'target_chunk_size' => ( is => 'ro', isa => 'Int', default => 200000 );
|
|
25 has 'sequence_file_names' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_sequence_file_names' );
|
|
26 has '_working_directory' =>
|
|
27 ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
|
|
28 has '_working_directory_name' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__working_directory_name' );
|
|
29 has '_input_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__input_seqio' );
|
|
30
|
|
31 sub _build__working_directory_name {
|
|
32 my ($self) = @_;
|
|
33 return $self->_working_directory->dirname();
|
|
34 }
|
|
35
|
|
36 sub _build__input_seqio {
|
|
37 my ($self) = @_;
|
|
38 return Bio::SeqIO->new( -file => $self->fasta_file, -format => 'Fasta' );
|
|
39 }
|
|
40
|
|
41 sub _create_next_chunk_file_name {
|
|
42 my ( $self, $chunk_number ) = @_;
|
|
43 return join( '/', ( $self->_working_directory_name, $chunk_number . '.seq' ) );
|
|
44 }
|
|
45
|
|
46 sub _create_next_chunk_seqio {
|
|
47 my ( $self, $chunk_number ) = @_;
|
|
48 return Bio::SeqIO->new( -file => ">".$self->_create_next_chunk_file_name($chunk_number), -format => 'Fasta' );
|
|
49 }
|
|
50
|
|
51 sub _build_sequence_file_names {
|
|
52 my ($self) = @_;
|
|
53 my @sequence_file_names;
|
|
54 my $chunk_number = 0;
|
|
55 my $current_chunk_length = 0;
|
|
56 my $current_chunk_seqio = $self->_create_next_chunk_seqio($chunk_number);
|
|
57 push( @sequence_file_names, $self->_create_next_chunk_file_name($chunk_number) );
|
|
58
|
|
59 while ( my $input_seq = $self->_input_seqio->next_seq() ) {
|
|
60 if ( $current_chunk_length > $self->target_chunk_size ) {
|
|
61
|
|
62 # next chunk
|
|
63 $chunk_number++;
|
|
64 $current_chunk_length = 0;
|
|
65 $current_chunk_seqio = $self->_create_next_chunk_seqio($chunk_number);
|
|
66 push( @sequence_file_names, $self->_create_next_chunk_file_name($chunk_number) );
|
|
67 }
|
|
68 $current_chunk_seqio->write_seq($input_seq);
|
|
69 $current_chunk_length += $input_seq->length();
|
|
70 }
|
|
71 return \@sequence_file_names;
|
|
72 }
|
|
73
|
|
74 no Moose;
|
|
75 __PACKAGE__->meta->make_immutable;
|
|
76
|
|
77 1;
|