Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
comparison getdata/phylota_with_taxid.pl @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5b9a38ec4a39 |
---|---|
1 #!/usr/bin/perl -w | |
2 use strict; | |
3 use LWP::Simple; | |
4 use Bio::SeqIO; | |
5 | |
6 my $ti = $ARGV[0]; | |
7 my $outfile = $ARGV[1]; | |
8 my $phytabfile = $ARGV[2]; | |
9 | |
10 open(OUT, ">$outfile") or exit; | |
11 | |
12 my $content = getclustersfromphylota($ti); | |
13 my @weblines = split(/\<\/tr\>/, $content); | |
14 my @ci; | |
15 | |
16 #Parse html from phylota browser to retain just each ci | |
17 foreach(@weblines){ | |
18 if($_ =~ m/getcluster\.cgi/){ | |
19 chomp; | |
20 $_ =~ s/\&ntype\=1\&db\=184\".+// ; | |
21 $_ =~ s/(.*?)getcluster\.cgi.+cl\=// ; | |
22 $_ =~ s/\<\/font\>\<\/td\>// ; | |
23 chomp; | |
24 $_ =~ s/^\n// ; | |
25 push(@ci, $_); | |
26 } | |
27 } | |
28 | |
29 #get fasta files for trees | |
30 for(my $i=0;$i < @ci; $i++){ | |
31 my $ci = $ci[$i]; | |
32 my $addstring = 'ti'.$ti.'ci'.$ci.'_'; | |
33 my $fastafile = getfastafromphylota($ci,$ti); | |
34 #Add TI_CI_ to each fastaheader | |
35 $fastafile =~ s/\>/\>$addstring/g; | |
36 print OUT $fastafile; | |
37 } | |
38 close(OUT); | |
39 | |
40 #Now convert fasta file to phytab file and write | |
41 open(PHYTAB, ">$phytabfile") or exit; | |
42 # open infile fasta file | |
43 my $in_obj = Bio::SeqIO->new(-file => $outfile, '-format' =>'fasta'); | |
44 my $total=0; | |
45 # grab sequence object | |
46 while (my $seq = $in_obj->next_seq() ) { | |
47 my $seq_obj = $in_obj; | |
48 my $sequenceid = $seq->id; | |
49 my $species_name = $seq->desc; | |
50 my $fullheader = $sequenceid." ".$species_name; | |
51 my $sequence = $seq->seq; | |
52 my @header = split(/_/, $fullheader); | |
53 my $cluster = $header[0]; | |
54 my $seqgi = $header[1]; | |
55 $seqgi =~ s/gi//; | |
56 my $seqti = $header[2]; | |
57 $seqti =~ s/ti//; | |
58 my $seqsp = $header[3]; | |
59 $seqsp = cleansp($seqsp); | |
60 print PHYTAB $seqsp."\t".$cluster."\t".$seqgi."\t".$sequence."\n"; | |
61 } | |
62 close(PHYTAB); | |
63 | |
64 | |
65 | |
66 | |
67 | |
68 | |
69 #************************************************************** | |
70 #sub routines | |
71 | |
72 sub cleansp | |
73 { | |
74 my $seqsp = shift; | |
75 $seqsp =~ s/ /_/g; | |
76 $seqsp =~ s/\.//g; | |
77 $seqsp =~ s/\'//g; | |
78 $seqsp =~ s/\-//g; | |
79 return($seqsp); | |
80 } | |
81 sub getfastafromphylota | |
82 { | |
83 my $ci=shift; | |
84 my $ti=shift; | |
85 | |
86 #print "Writing: CI:$ci TI:$ti\n"; | |
87 | |
88 my $url = 'http://phylota.net/cgi-bin/sql_getcluster_fasta.cgi?format=all&db=184&ti='.$ti.'&cl='.$ci.'&ntype=1'; | |
89 my $content = get $url; | |
90 die "Couldn't get $url" unless defined $content; | |
91 $content =~ s/\<html\>\<pre\>//; | |
92 $content =~ s/\<\/html\>//; | |
93 $content =~ s/\<\/pre\>//; | |
94 return($content); | |
95 } | |
96 sub getclustersfromphylota | |
97 { | |
98 my $ti=shift; | |
99 | |
100 #print "Writing: CI:$ci TI:$ti\n"; | |
101 | |
102 my $url = 'http://phylota.net/cgi-bin/sql_getclusterset.cgi?ti='.$ti.'&ntype=1&piflag=1&dflag=0&db=184'; | |
103 my $content = get $url; | |
104 die "Couldn't get $url" unless defined $content; | |
105 $content =~ s/\<html\>\<pre\>//; | |
106 $content =~ s/\<\/html\>//; | |
107 $content =~ s/\<\/pre\>//; | |
108 return($content); | |
109 } |