comparison phyloconversion/length_outliers.pl @ 0:5b9a38ec4a39 draft default tip

First commit of old repositories
author osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
date Tue, 11 Mar 2014 12:19:13 -0700
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5b9a38ec4a39
1 #!/usr/bin/perl -w
2
3 use strict;
4
5 use Bio::DB::Fasta;
6 use Bio::SeqIO;
7 use Bio::Seq;
8
9 #inputs
10 my $infile=shift(@ARGV);
11 my $outfile=shift(@ARGV);
12 my $deloutfile=shift(@ARGV);
13 my $percent=shift(@ARGV);
14
15 my $seqid;
16 my $newnumbers=1; #for sequential renumbering of header
17
18 open FILE, ">$outfile" or die $!;
19 open DELFILE, ">$deloutfile" or die $!;
20
21
22 # open infile fasta file to get average length
23 my $in_obj = Bio::SeqIO->new(-file => $infile, '-format' =>'fasta');
24
25 my $seqcount;
26 my $seqsum = 0;
27 my $avelen;
28 while (my $seq = $in_obj->next_seq() ) {
29 my $sequence = $seq->seq;
30 my $seqlen = length($sequence);
31 $seqcount++;
32 $seqsum = $seqsum + $seqlen;
33 }
34 $avelen = $seqsum/$seqcount;
35 print "AVE= $avelen \n";
36
37
38 # open infile fasta file to get average length
39 $in_obj = Bio::SeqIO->new(-file => $infile, '-format' =>'fasta');
40
41 while (my $seq = $in_obj->next_seq() ) {
42 my $sequence = $seq->seq;
43 $seqid = $seq->id;
44 $sequence =~ s/\n//g;
45 $sequence =~ tr/a-z/A-Z/;
46 my $seqlen = length($sequence);
47
48 if($seqlen > ($avelen * ($percent/100) ) ){
49 print FILE ">";
50 print FILE $seqid." ".$seq->desc."\n".$sequence."\n";
51 }else{
52 print "Writing sequence of $seqlen to DELFILE\n";
53 print DELFILE ">";
54 print DELFILE $seqid." ".$seq->desc."\n".$sequence."\n";
55 }
56 }
57
58 close FILE;
59 close DELFILE;