Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
comparison getdata/get_gb.pl @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
| author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
|---|---|
| date | Tue, 11 Mar 2014 12:19:13 -0700 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:5b9a38ec4a39 |
|---|---|
| 1 #!/usr/bin/perl -w | |
| 2 use strict; | |
| 3 | |
| 4 #use FindBin; | |
| 5 #use lib "$FindBin::Bin/lib"; | |
| 6 use Bio::DB::GenBank; | |
| 7 use Bio::SeqIO; | |
| 8 | |
| 9 | |
| 10 my $datafile = $ARGV[0]; | |
| 11 my $datatype = $ARGV[1]; | |
| 12 my $outtype = $ARGV[2]; | |
| 13 my $outfile = $ARGV[3]; | |
| 14 my $manual = $ARGV[4]; | |
| 15 my $mannames = $ARGV[5]; | |
| 16 my $genenames = $ARGV[6]; | |
| 17 | |
| 18 | |
| 19 my $accessions; | |
| 20 my @accnums; | |
| 21 my @newnames; | |
| 22 my $manbin=0; | |
| 23 my @genenames; | |
| 24 my $genebin=0; | |
| 25 | |
| 26 unless($mannames eq ''){ | |
| 27 @newnames = split(/ /,$mannames); | |
| 28 $manbin=1; | |
| 29 } | |
| 30 | |
| 31 unless($genenames eq ''){ | |
| 32 @genenames = split(/ /,$genenames); | |
| 33 $genebin=1; | |
| 34 } | |
| 35 | |
| 36 if($datafile eq 'None'){ | |
| 37 @accnums = split(/ /,$manual); | |
| 38 # if(@accnums != @newnames && $manbin ==1 ){ | |
| 39 # die "Must have the same number of Custom Names as Accession Numbers\n"; | |
| 40 # } | |
| 41 }else{ | |
| 42 open (FILE,"<$datafile") or die "Cannot open file containing accession numbers\n"; | |
| 43 | |
| 44 while (<FILE>) | |
| 45 { | |
| 46 chomp; | |
| 47 next unless ($_); | |
| 48 push(@accnums, $_); | |
| 49 } | |
| 50 } | |
| 51 my $countnames = 0; | |
| 52 foreach (@accnums){ | |
| 53 #Should check input for one word per line and throw error if not, which is not done | |
| 54 | |
| 55 $accessions = $_; | |
| 56 chomp; | |
| 57 if($accessions eq ""){ | |
| 58 die "Put spaces between accession numbers\n"; | |
| 59 } | |
| 60 my $qry_string .= $accessions."[accession]"." "; | |
| 61 | |
| 62 my $GBseq; | |
| 63 my $gb = new Bio::DB::GenBank; | |
| 64 my $query = Bio::DB::Query::GenBank->new | |
| 65 (-query =>$qry_string, | |
| 66 -db =>$datatype); | |
| 67 | |
| 68 my $count; | |
| 69 my $species; | |
| 70 my $seqio; | |
| 71 if($outtype eq "phytab"){ #print phytab format, do not use bioperl as below. | |
| 72 open(OUTFILE, ">>$outfile"); | |
| 73 if( defined ($seqio = $gb->get_Stream_by_query($query)) ){ | |
| 74 # my $seqio = $gb->get_Stream_by_query($query); | |
| 75 while( defined ($GBseq = $seqio->next_seq )) { | |
| 76 my $sequence = $GBseq; # read a sequence object | |
| 77 if($manbin ==1){ #replace GenBank Names with Custom Names | |
| 78 $sequence->id($newnames[$countnames]); | |
| 79 $sequence->desc(''); | |
| 80 $species = $sequence->id; | |
| 81 $countnames++; | |
| 82 }else{ | |
| 83 $species = $sequence->species->binomial; | |
| 84 $species =~ s/ /_/g ; | |
| 85 } | |
| 86 if(@genenames > 0){ | |
| 87 if(@genenames == 1){ | |
| 88 print OUTFILE $species."\t".$genenames[0]."\t".$sequence->accession."\t".$sequence->seq."\n"; | |
| 89 }else{ | |
| 90 print OUTFILE $species."\t".$genenames[$countnames-1]."\t".$sequence->accession."\t".$sequence->seq."\n"; | |
| 91 } | |
| 92 }else{ | |
| 93 print OUTFILE $species."\tNone\t".$sequence->accession."\t".$sequence->seq."\n"; | |
| 94 } | |
| 95 } | |
| 96 }else{ | |
| 97 print "Did not find $accessions\n"; | |
| 98 } | |
| 99 }else{ | |
| 100 my $fh = Bio::SeqIO->newFh(-format=>$outtype, -file=>">>$outfile"); | |
| 101 | |
| 102 if( defined ($seqio = $gb->get_Stream_by_query($query)) ){ | |
| 103 # my $seqio = $gb->get_Stream_by_query($query); | |
| 104 while( defined ($GBseq = $seqio->next_seq )) { | |
| 105 my $sequence = $GBseq; # read a sequence object | |
| 106 if($manbin ==1){ #replace GenBank Names with Custom Names | |
| 107 $sequence->id($newnames[$countnames]); | |
| 108 $sequence->desc(''); | |
| 109 $countnames++; | |
| 110 } | |
| 111 print $fh $sequence; # write a sequence object | |
| 112 } | |
| 113 }else{ | |
| 114 print "Did not find $accessions\n"; | |
| 115 } | |
| 116 } | |
| 117 } | |
| 118 exit; |
