view phyloconversion/uniprotfasta2phytab.pl @ 0:5b9a38ec4a39 draft default tip

First commit of old repositories
author osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
date Tue, 11 Mar 2014 12:19:13 -0700
parents
children
line wrap: on
line source

#!/usr/bin/perl -w

use strict;

use FindBin;
use lib "$FindBin::Bin/lib";
use Bio::DB::Fasta;
use Bio::SeqIO;
use Bio::Seq;

#inputs
my $infile=shift(@ARGV);
my $partition=shift(@ARGV);
#my $delpipes=shift(@ARGV);
my $species;

my $seqid;
# open infile fasta file
my $in_obj = Bio::SeqIO->new(-file => $infile, '-format' =>'fasta');

while (my $seq = $in_obj->next_seq() ) {
        my $sequence = $seq->seq;
        my @rawid = split(/\|/, $seq->id);
        $seqid = $rawid[1];
#       $seqid = $seq->id;

        $sequence =~ s/\n//g;
        $species = $seq->desc;
        #species Name is after OS=
        $species =~ s/.+OS\=//;
        $species =~ s/.+OS\=//;
        #species Name is before GN= sometimes PE=
        $species =~ s/ GN\=.+//;
        $species =~ s/ PE\=.+//;
        $species =~ s/ /_/g;

        print $species."\t".$partition."\t".$seqid."\t".$sequence."\n";
}