Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
diff getdata/get_gb_sp.pl @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/getdata/get_gb_sp.pl Tue Mar 11 12:19:13 2014 -0700 @@ -0,0 +1,68 @@ +#!/usr/bin/perl +use strict; +no warnings; #genbank produces annoying warning if no sequence is found + +#use FindBin; +#use lib "$FindBin::Bin/lib"; +use Bio::DB::GenBank; +use Bio::SeqIO; +use Bio::Root::Exception; +use Error qw(:try); + + +my $datafile = $ARGV[0]; +my $datatype = $ARGV[1]; +my $outtype = $ARGV[2]; +my $outfile = $ARGV[3]; +my $nodata = $ARGV[4]; + +my $accessions; +my @accnums; + + open (FILE,"<$datafile") or die "Cannot open file containing accession numbers\n"; + open (OUT,">$outfile") or die "Cannot open outfile\n"; + close OUT; #This overwrites old file if it exists + open (ND,">$nodata") or die "Cannot open file\n"; + my $fh = Bio::SeqIO->newFh(-format=>$outtype, -file=>">>$outfile"); + + + while (<FILE>) + { + chomp; + next unless ($_); + push(@accnums, $_); + } + close FILE; + + my $countnames = 0; + foreach (@accnums){ + #Should check input for one word per line and throw error if not, which is not done + + $accessions = $_; + chomp; + if($accessions eq ""){ + die "Put spaces between accession numbers. No Empty Lines allowed.\n"; + } + my $qry_string .= $accessions."[organism]"." "; + +# my $GBseq; + my $gb = new Bio::DB::GenBank; + my $query = Bio::DB::Query::GenBank->new + (-query =>$qry_string, + -db =>$datatype); + + my $seqio; + + if (eval {$gb->get_Stream_by_query($query)}){ + $seqio = $gb->get_Stream_by_query($query); + while( my $GBseq = $seqio->next_seq ) { + my $sequence = $GBseq; # read a sequence object + print $fh $sequence; # write a sequence object + } + }else{ + print ND "$accessions\n"; + } + } +exit; + +