Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
comparison getdata/get_gb.pl @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5b9a38ec4a39 |
---|---|
1 #!/usr/bin/perl -w | |
2 use strict; | |
3 | |
4 #use FindBin; | |
5 #use lib "$FindBin::Bin/lib"; | |
6 use Bio::DB::GenBank; | |
7 use Bio::SeqIO; | |
8 | |
9 | |
10 my $datafile = $ARGV[0]; | |
11 my $datatype = $ARGV[1]; | |
12 my $outtype = $ARGV[2]; | |
13 my $outfile = $ARGV[3]; | |
14 my $manual = $ARGV[4]; | |
15 my $mannames = $ARGV[5]; | |
16 my $genenames = $ARGV[6]; | |
17 | |
18 | |
19 my $accessions; | |
20 my @accnums; | |
21 my @newnames; | |
22 my $manbin=0; | |
23 my @genenames; | |
24 my $genebin=0; | |
25 | |
26 unless($mannames eq ''){ | |
27 @newnames = split(/ /,$mannames); | |
28 $manbin=1; | |
29 } | |
30 | |
31 unless($genenames eq ''){ | |
32 @genenames = split(/ /,$genenames); | |
33 $genebin=1; | |
34 } | |
35 | |
36 if($datafile eq 'None'){ | |
37 @accnums = split(/ /,$manual); | |
38 # if(@accnums != @newnames && $manbin ==1 ){ | |
39 # die "Must have the same number of Custom Names as Accession Numbers\n"; | |
40 # } | |
41 }else{ | |
42 open (FILE,"<$datafile") or die "Cannot open file containing accession numbers\n"; | |
43 | |
44 while (<FILE>) | |
45 { | |
46 chomp; | |
47 next unless ($_); | |
48 push(@accnums, $_); | |
49 } | |
50 } | |
51 my $countnames = 0; | |
52 foreach (@accnums){ | |
53 #Should check input for one word per line and throw error if not, which is not done | |
54 | |
55 $accessions = $_; | |
56 chomp; | |
57 if($accessions eq ""){ | |
58 die "Put spaces between accession numbers\n"; | |
59 } | |
60 my $qry_string .= $accessions."[accession]"." "; | |
61 | |
62 my $GBseq; | |
63 my $gb = new Bio::DB::GenBank; | |
64 my $query = Bio::DB::Query::GenBank->new | |
65 (-query =>$qry_string, | |
66 -db =>$datatype); | |
67 | |
68 my $count; | |
69 my $species; | |
70 my $seqio; | |
71 if($outtype eq "phytab"){ #print phytab format, do not use bioperl as below. | |
72 open(OUTFILE, ">>$outfile"); | |
73 if( defined ($seqio = $gb->get_Stream_by_query($query)) ){ | |
74 # my $seqio = $gb->get_Stream_by_query($query); | |
75 while( defined ($GBseq = $seqio->next_seq )) { | |
76 my $sequence = $GBseq; # read a sequence object | |
77 if($manbin ==1){ #replace GenBank Names with Custom Names | |
78 $sequence->id($newnames[$countnames]); | |
79 $sequence->desc(''); | |
80 $species = $sequence->id; | |
81 $countnames++; | |
82 }else{ | |
83 $species = $sequence->species->binomial; | |
84 $species =~ s/ /_/g ; | |
85 } | |
86 if(@genenames > 0){ | |
87 if(@genenames == 1){ | |
88 print OUTFILE $species."\t".$genenames[0]."\t".$sequence->accession."\t".$sequence->seq."\n"; | |
89 }else{ | |
90 print OUTFILE $species."\t".$genenames[$countnames-1]."\t".$sequence->accession."\t".$sequence->seq."\n"; | |
91 } | |
92 }else{ | |
93 print OUTFILE $species."\tNone\t".$sequence->accession."\t".$sequence->seq."\n"; | |
94 } | |
95 } | |
96 }else{ | |
97 print "Did not find $accessions\n"; | |
98 } | |
99 }else{ | |
100 my $fh = Bio::SeqIO->newFh(-format=>$outtype, -file=>">>$outfile"); | |
101 | |
102 if( defined ($seqio = $gb->get_Stream_by_query($query)) ){ | |
103 # my $seqio = $gb->get_Stream_by_query($query); | |
104 while( defined ($GBseq = $seqio->next_seq )) { | |
105 my $sequence = $GBseq; # read a sequence object | |
106 if($manbin ==1){ #replace GenBank Names with Custom Names | |
107 $sequence->id($newnames[$countnames]); | |
108 $sequence->desc(''); | |
109 $countnames++; | |
110 } | |
111 print $fh $sequence; # write a sequence object | |
112 } | |
113 }else{ | |
114 print "Did not find $accessions\n"; | |
115 } | |
116 } | |
117 } | |
118 exit; |