Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
view getdata/get_gb.pl @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl -w use strict; #use FindBin; #use lib "$FindBin::Bin/lib"; use Bio::DB::GenBank; use Bio::SeqIO; my $datafile = $ARGV[0]; my $datatype = $ARGV[1]; my $outtype = $ARGV[2]; my $outfile = $ARGV[3]; my $manual = $ARGV[4]; my $mannames = $ARGV[5]; my $genenames = $ARGV[6]; my $accessions; my @accnums; my @newnames; my $manbin=0; my @genenames; my $genebin=0; unless($mannames eq ''){ @newnames = split(/ /,$mannames); $manbin=1; } unless($genenames eq ''){ @genenames = split(/ /,$genenames); $genebin=1; } if($datafile eq 'None'){ @accnums = split(/ /,$manual); # if(@accnums != @newnames && $manbin ==1 ){ # die "Must have the same number of Custom Names as Accession Numbers\n"; # } }else{ open (FILE,"<$datafile") or die "Cannot open file containing accession numbers\n"; while (<FILE>) { chomp; next unless ($_); push(@accnums, $_); } } my $countnames = 0; foreach (@accnums){ #Should check input for one word per line and throw error if not, which is not done $accessions = $_; chomp; if($accessions eq ""){ die "Put spaces between accession numbers\n"; } my $qry_string .= $accessions."[accession]"." "; my $GBseq; my $gb = new Bio::DB::GenBank; my $query = Bio::DB::Query::GenBank->new (-query =>$qry_string, -db =>$datatype); my $count; my $species; my $seqio; if($outtype eq "phytab"){ #print phytab format, do not use bioperl as below. open(OUTFILE, ">>$outfile"); if( defined ($seqio = $gb->get_Stream_by_query($query)) ){ # my $seqio = $gb->get_Stream_by_query($query); while( defined ($GBseq = $seqio->next_seq )) { my $sequence = $GBseq; # read a sequence object if($manbin ==1){ #replace GenBank Names with Custom Names $sequence->id($newnames[$countnames]); $sequence->desc(''); $species = $sequence->id; $countnames++; }else{ $species = $sequence->species->binomial; $species =~ s/ /_/g ; } if(@genenames > 0){ if(@genenames == 1){ print OUTFILE $species."\t".$genenames[0]."\t".$sequence->accession."\t".$sequence->seq."\n"; }else{ print OUTFILE $species."\t".$genenames[$countnames-1]."\t".$sequence->accession."\t".$sequence->seq."\n"; } }else{ print OUTFILE $species."\tNone\t".$sequence->accession."\t".$sequence->seq."\n"; } } }else{ print "Did not find $accessions\n"; } }else{ my $fh = Bio::SeqIO->newFh(-format=>$outtype, -file=>">>$outfile"); if( defined ($seqio = $gb->get_Stream_by_query($query)) ){ # my $seqio = $gb->get_Stream_by_query($query); while( defined ($GBseq = $seqio->next_seq )) { my $sequence = $GBseq; # read a sequence object if($manbin ==1){ #replace GenBank Names with Custom Names $sequence->id($newnames[$countnames]); $sequence->desc(''); $countnames++; } print $fh $sequence; # write a sequence object } }else{ print "Did not find $accessions\n"; } } } exit;