Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
view phyloconversion/length_outliers.pl @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl -w use strict; use Bio::DB::Fasta; use Bio::SeqIO; use Bio::Seq; #inputs my $infile=shift(@ARGV); my $outfile=shift(@ARGV); my $deloutfile=shift(@ARGV); my $percent=shift(@ARGV); my $seqid; my $newnumbers=1; #for sequential renumbering of header open FILE, ">$outfile" or die $!; open DELFILE, ">$deloutfile" or die $!; # open infile fasta file to get average length my $in_obj = Bio::SeqIO->new(-file => $infile, '-format' =>'fasta'); my $seqcount; my $seqsum = 0; my $avelen; while (my $seq = $in_obj->next_seq() ) { my $sequence = $seq->seq; my $seqlen = length($sequence); $seqcount++; $seqsum = $seqsum + $seqlen; } $avelen = $seqsum/$seqcount; print "AVE= $avelen \n"; # open infile fasta file to get average length $in_obj = Bio::SeqIO->new(-file => $infile, '-format' =>'fasta'); while (my $seq = $in_obj->next_seq() ) { my $sequence = $seq->seq; $seqid = $seq->id; $sequence =~ s/\n//g; $sequence =~ tr/a-z/A-Z/; my $seqlen = length($sequence); if($seqlen > ($avelen * ($percent/100) ) ){ print FILE ">"; print FILE $seqid." ".$seq->desc."\n".$sequence."\n"; }else{ print "Writing sequence of $seqlen to DELFILE\n"; print DELFILE ">"; print DELFILE $seqid." ".$seq->desc."\n".$sequence."\n"; } } close FILE; close DELFILE;