annotate Roary/lib/Bio/Roary/ChunkFastaFile.pm @ 0:c47a5f61bc9f draft

Uploaded
author dereeper
date Fri, 14 May 2021 20:27:06 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
1 package Bio::Roary::ChunkFastaFile;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
2
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
3 # ABSTRACT: Take in a FASTA file and chunk it up into smaller pieces.
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
4
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
5 =head1 SYNOPSIS
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
6
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
7 Take in a FASTA file and chunk it up into smaller pieces.
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
8 use Bio::Roary::ChunkFastaFile;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
9
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
10 my $obj = Bio::Roary::ChunkFastaFile->new(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
11 fasta_file => 'abc.fa',
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
12 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
13 $obj->sequence_file_names;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
14
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
15 =cut
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
16
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
17 use Moose;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
18 use Bio::SeqIO;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
19 use Bio::Roary::Exceptions;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
20 use Cwd;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
21 use File::Temp;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
22
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
23 has 'fasta_file' => ( is => 'ro', isa => 'Str', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
24 has 'target_chunk_size' => ( is => 'ro', isa => 'Int', default => 200000 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
25 has 'sequence_file_names' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_sequence_file_names' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
26 has '_working_directory' =>
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
27 ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
28 has '_working_directory_name' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__working_directory_name' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
29 has '_input_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__input_seqio' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
30
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
31 sub _build__working_directory_name {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
32 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
33 return $self->_working_directory->dirname();
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
34 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
35
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
36 sub _build__input_seqio {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
37 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
38 return Bio::SeqIO->new( -file => $self->fasta_file, -format => 'Fasta' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
39 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
40
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
41 sub _create_next_chunk_file_name {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
42 my ( $self, $chunk_number ) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
43 return join( '/', ( $self->_working_directory_name, $chunk_number . '.seq' ) );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
44 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
45
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
46 sub _create_next_chunk_seqio {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
47 my ( $self, $chunk_number ) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
48 return Bio::SeqIO->new( -file => ">".$self->_create_next_chunk_file_name($chunk_number), -format => 'Fasta' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
49 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
50
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
51 sub _build_sequence_file_names {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
52 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
53 my @sequence_file_names;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
54 my $chunk_number = 0;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
55 my $current_chunk_length = 0;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
56 my $current_chunk_seqio = $self->_create_next_chunk_seqio($chunk_number);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
57 push( @sequence_file_names, $self->_create_next_chunk_file_name($chunk_number) );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
58
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
59 while ( my $input_seq = $self->_input_seqio->next_seq() ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
60 if ( $current_chunk_length > $self->target_chunk_size ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
61
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
62 # next chunk
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
63 $chunk_number++;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
64 $current_chunk_length = 0;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
65 $current_chunk_seqio = $self->_create_next_chunk_seqio($chunk_number);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
66 push( @sequence_file_names, $self->_create_next_chunk_file_name($chunk_number) );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
67 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
68 $current_chunk_seqio->write_seq($input_seq);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
69 $current_chunk_length += $input_seq->length();
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
70 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
71 return \@sequence_file_names;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
72 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
73
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
74 no Moose;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
75 __PACKAGE__->meta->make_immutable;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
76
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
77 1;