Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
diff phyloconversion/length_outliers.pl @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phyloconversion/length_outliers.pl Tue Mar 11 12:19:13 2014 -0700 @@ -0,0 +1,59 @@ +#!/usr/bin/perl -w + +use strict; + +use Bio::DB::Fasta; +use Bio::SeqIO; +use Bio::Seq; + +#inputs +my $infile=shift(@ARGV); +my $outfile=shift(@ARGV); +my $deloutfile=shift(@ARGV); +my $percent=shift(@ARGV); + +my $seqid; +my $newnumbers=1; #for sequential renumbering of header + +open FILE, ">$outfile" or die $!; +open DELFILE, ">$deloutfile" or die $!; + + +# open infile fasta file to get average length +my $in_obj = Bio::SeqIO->new(-file => $infile, '-format' =>'fasta'); + +my $seqcount; +my $seqsum = 0; +my $avelen; +while (my $seq = $in_obj->next_seq() ) { + my $sequence = $seq->seq; + my $seqlen = length($sequence); + $seqcount++; + $seqsum = $seqsum + $seqlen; +} +$avelen = $seqsum/$seqcount; +print "AVE= $avelen \n"; + + +# open infile fasta file to get average length +$in_obj = Bio::SeqIO->new(-file => $infile, '-format' =>'fasta'); + +while (my $seq = $in_obj->next_seq() ) { + my $sequence = $seq->seq; + $seqid = $seq->id; + $sequence =~ s/\n//g; + $sequence =~ tr/a-z/A-Z/; + my $seqlen = length($sequence); + + if($seqlen > ($avelen * ($percent/100) ) ){ + print FILE ">"; + print FILE $seqid." ".$seq->desc."\n".$sequence."\n"; + }else{ +print "Writing sequence of $seqlen to DELFILE\n"; + print DELFILE ">"; + print DELFILE $seqid." ".$seq->desc."\n".$sequence."\n"; + } +} + +close FILE; +close DELFILE;