Mercurial > repos > cpt > cpt_psm_prep
view lib/CPT/Bio.pm @ 1:d724f34e671d draft default tip
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:50:07 +0000 |
parents | |
children |
line wrap: on
line source
package CPT::Bio; use Moose; use strict; use warnings; use autodie; use CPT::FiletypeDetector; use CPT::BioData; my $bd = CPT::BioData->new(); my $filetype = CPT::FiletypeDetector->new(); has 'var_translate' => ( is => 'rw', isa => 'Bool'); has 'var_header' => ( is => 'rw', isa => 'Bool'); has codonTable => ( is => 'rw', isa => 'Any', default => sub { $bd->getTranslationTable(11) }, ); sub set_codon_table { my ($self, $num) = @_; $self->codonTable($bd->getTranslationTable($num)); } sub _getFeatureTag { my ( $self, $feat, $tag ) = @_; if(! defined($feat)){ warn "Undefined feature"; } return $feat->has_tag($tag) ? ( join( ',', $feat->get_tag_values($tag) ) ) : ''; } sub _getIdentifier { my ( $self, $feat ) = @_; my $line; if ( ref $feat eq 'Bio::Seq::RichSeq' || ref $feat eq 'Bio::Seq' ) { return $feat->display_id; } else { my $locus_tag = $self->_getFeatureTag( $feat, 'locus_tag' ); if ($locus_tag) { return $locus_tag; } my $gene = $self->_getFeatureTag( $feat, 'gene' ); if ($gene) { return $gene; } my $product = $self->_getFeatureTag( $feat, 'product' ); if ($product) { return $product; } } return sprintf("%s_%s_%s", $feat->start(), $feat->end(), ($feat->strand() == 1 ? 'sense':'antisense')); } sub requestCopy { my ( $self, %data ) = @_; use Bio::SeqIO; if ($data{'file'} ) { my ($guessed_type) = $filetype->detect( $data{'file'} ); my $seqio = Bio::SeqIO->new( -file => $data{'file'}, -format => $guessed_type ); my @results; while ( my $seqobj = $seqio->next_seq() ) { return \$seqobj; } } else { die "No file specified"; } } sub getSeqIO { my ( $self, $file ) = @_; use Bio::SeqIO; if ($file ) { my ($guessed_type) = $filetype->detect( $file ); my $seqio = Bio::SeqIO->new( -file => $file, -format => $guessed_type ); return $seqio; } else { die "No file specified"; } } sub parseFile { my ( $self, %data ) = @_; use Bio::SeqIO; my ($guessed_type) = $filetype->detect( $data{'file'} ); my $seqio = Bio::SeqIO->new( -file => $data{'file'}, -format => $guessed_type ); # Are we to translate this $self->var_translate(defined($data{translate}) && $data{translate}); $self->var_header(defined($data{header}) && $data{header}); my @results; if ( not defined $data{'subset'} ) { $data{'subset'} = 'all'; } while ( my $seqobj = $seqio->next_seq() ) { if ( (ref $data{'subset'} ne 'ARRAY' && $data{'subset'} eq 'whole' ) # Want the whole thing for a richseq || (ref $seqobj eq 'Bio::Seq' || ref $seqobj eq 'Bio::Seq::fasta') # or it's a fasta type sequence ) { push( @results, $self->handle_seq($seqobj)); } else #data subset eq sometag { my %wanted_tags; if ( ref $data{'subset'} eq 'ARRAY' ) { %wanted_tags = map { $_ => 1 } @{ $data{'subset'} }; } else { $wanted_tags{ $data{'subset'} }++; } foreach my $feat ( $seqobj->get_SeqFeatures ) { if ( $wanted_tags{ $feat->primary_tag } || ( $wanted_tags{'all'} && $feat->primary_tag ne "source" ) ) { push( @results, $self->handle_seq($feat)); } } } } if ( $data{'callback'} ) { $data{'callback'}->( \@results ); } else { return \@results; } } sub handle_seq { my ($self, $obj) = @_; my @line; if ( $self->var_header() ){ $line[0] = '>' . $self->_getIdentifier($obj); } # Get our sequence $line[1] = $self->intelligent_get_seq($obj); if ( $self->var_translate() ) { $line[1] = $self->translate($line[1]); } return \@line; } sub intelligent_get_seq { my ($self, $obj, %extra) = @_; # Top level, e.g., fasta/gbk file, "extra" doesn't apply to these if ( ref $obj eq 'Bio::Seq::RichSeq' || ref $obj eq 'Bio::Seq' ) { return $obj->seq; }else{ return $self->get_seq_from_feature($obj, %extra); } } sub get_seq_from_feature { my ($self, $feat, %extra) = @_; my $seq; my $l; if($extra{parent}){ $l = $extra{parent}->length(); } if($extra{upstream}){ if($feat->strand < 0){ my $y = $feat->end + 1; my $z = $feat->end + $extra{upstream}; if($y < $l){ if($z > $l){ $z = $l; } $seq .= $extra{parent}->trunc($y, $z)->revcom->seq; } }else{ my $y = $feat->start - $extra{upstream}; my $z = $feat->start - 1; if($z > 0){ if($y < 1){ $y = 1; } $seq .= $extra{parent}->trunc($y, $z)->seq; } } } if(ref($feat->location) eq 'Bio::Location::Simple'){ $seq .= $feat->seq->seq(); }else{ $seq .= $feat->spliced_seq->seq(); } if($extra{downstream}){ if($feat->strand < 0){ my $y = $feat->start - $extra{downstream}; my $z = $feat->start - 1; if($z > 0){ if($y < 1){ $y = 1; } $seq .= $extra{parent}->trunc($y, $z)->revcom->seq; } }else{ my $y = $feat->end + 1; my $z = $feat->end + $extra{downstream}; if($y < $l){ if($z > $l){ $z = $l; } $seq .= $extra{parent}->trunc($y, $z)->seq; } } } return $seq; } sub translate { my ($self, $seq) = @_; if($seq =~ /^[ACTGN]+$/){ my %ct = %{$self->codonTable}; $seq = join( '' , map { if($ct{$_}){ $ct{$_} }else{ () } } unpack("(A3)*", $seq)); } return $seq; } no Moose; 1; __END__ =pod =encoding UTF-8 =head1 NAME CPT::Bio =head1 VERSION version 1.99.4 =head2 _getFeatureTag my $tag = $libCPT->_getFeatureTag($feature,'note'); returns all values of the given tag, joined with ','. =head2 requestCopy my $seqobj = $libCPT->requestCopy('file'=>'test.gbk'); requests a 'copy' of a given Bio::SeqIO file, which allows for addition of features before writing out to file. =head2 getSeqIO my $seqio = $libCPT->getSeqIO('file'=>'test.gbk'); requests a 'copy' of a given Bio::SeqIO file, which allows for addition of features before writing out to file. =head2 parseFile $libCPT->parseFile( 'file' => $options{'file'}, 'callback' => \&func, 'translate' => 1, 'header' => 1, 'subset' => ['CDS', $options{'tag'}], ); Arguably the most important function in this library, wraps a lot of functionality in a clean wrapper, since most of the scripts we have are written around data munging. =over 4 =item * file - the Bio::SeqIO file to process =item * callback - the function to send our data to. Done all at once, in an array =item * translate - should we translate the sequence to amino acids if it's not already. =item * subset - either "whole", a valid tag, or an array of valid tags =item * header - Do we want a header (FASTA) with our result set =back =head1 AUTHOR Eric Rasche <rasche.eric@yandex.ru> =head1 COPYRIGHT AND LICENSE This software is Copyright (c) 2014 by Eric Rasche. This is free software, licensed under: The GNU General Public License, Version 3, June 2007 =cut