comparison getdata/get_gb.pl @ 0:5b9a38ec4a39 draft default tip

First commit of old repositories
author osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
date Tue, 11 Mar 2014 12:19:13 -0700
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5b9a38ec4a39
1 #!/usr/bin/perl -w
2 use strict;
3
4 #use FindBin;
5 #use lib "$FindBin::Bin/lib";
6 use Bio::DB::GenBank;
7 use Bio::SeqIO;
8
9
10 my $datafile = $ARGV[0];
11 my $datatype = $ARGV[1];
12 my $outtype = $ARGV[2];
13 my $outfile = $ARGV[3];
14 my $manual = $ARGV[4];
15 my $mannames = $ARGV[5];
16 my $genenames = $ARGV[6];
17
18
19 my $accessions;
20 my @accnums;
21 my @newnames;
22 my $manbin=0;
23 my @genenames;
24 my $genebin=0;
25
26 unless($mannames eq ''){
27 @newnames = split(/ /,$mannames);
28 $manbin=1;
29 }
30
31 unless($genenames eq ''){
32 @genenames = split(/ /,$genenames);
33 $genebin=1;
34 }
35
36 if($datafile eq 'None'){
37 @accnums = split(/ /,$manual);
38 # if(@accnums != @newnames && $manbin ==1 ){
39 # die "Must have the same number of Custom Names as Accession Numbers\n";
40 # }
41 }else{
42 open (FILE,"<$datafile") or die "Cannot open file containing accession numbers\n";
43
44 while (<FILE>)
45 {
46 chomp;
47 next unless ($_);
48 push(@accnums, $_);
49 }
50 }
51 my $countnames = 0;
52 foreach (@accnums){
53 #Should check input for one word per line and throw error if not, which is not done
54
55 $accessions = $_;
56 chomp;
57 if($accessions eq ""){
58 die "Put spaces between accession numbers\n";
59 }
60 my $qry_string .= $accessions."[accession]"." ";
61
62 my $GBseq;
63 my $gb = new Bio::DB::GenBank;
64 my $query = Bio::DB::Query::GenBank->new
65 (-query =>$qry_string,
66 -db =>$datatype);
67
68 my $count;
69 my $species;
70 my $seqio;
71 if($outtype eq "phytab"){ #print phytab format, do not use bioperl as below.
72 open(OUTFILE, ">>$outfile");
73 if( defined ($seqio = $gb->get_Stream_by_query($query)) ){
74 # my $seqio = $gb->get_Stream_by_query($query);
75 while( defined ($GBseq = $seqio->next_seq )) {
76 my $sequence = $GBseq; # read a sequence object
77 if($manbin ==1){ #replace GenBank Names with Custom Names
78 $sequence->id($newnames[$countnames]);
79 $sequence->desc('');
80 $species = $sequence->id;
81 $countnames++;
82 }else{
83 $species = $sequence->species->binomial;
84 $species =~ s/ /_/g ;
85 }
86 if(@genenames > 0){
87 if(@genenames == 1){
88 print OUTFILE $species."\t".$genenames[0]."\t".$sequence->accession."\t".$sequence->seq."\n";
89 }else{
90 print OUTFILE $species."\t".$genenames[$countnames-1]."\t".$sequence->accession."\t".$sequence->seq."\n";
91 }
92 }else{
93 print OUTFILE $species."\tNone\t".$sequence->accession."\t".$sequence->seq."\n";
94 }
95 }
96 }else{
97 print "Did not find $accessions\n";
98 }
99 }else{
100 my $fh = Bio::SeqIO->newFh(-format=>$outtype, -file=>">>$outfile");
101
102 if( defined ($seqio = $gb->get_Stream_by_query($query)) ){
103 # my $seqio = $gb->get_Stream_by_query($query);
104 while( defined ($GBseq = $seqio->next_seq )) {
105 my $sequence = $GBseq; # read a sequence object
106 if($manbin ==1){ #replace GenBank Names with Custom Names
107 $sequence->id($newnames[$countnames]);
108 $sequence->desc('');
109 $countnames++;
110 }
111 print $fh $sequence; # write a sequence object
112 }
113 }else{
114 print "Did not find $accessions\n";
115 }
116 }
117 }
118 exit;