annotate ucsb_phylogenetics/NJst/makeNJst.pl @ 8:798d8401d420 draft

Uploaded
author ucsb-phylogenetics
date Sat, 08 Sep 2012 15:33:34 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
1 #!/usr/bin/perl
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
2
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
3 #This script generates an R script to call NJst
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
4 #input is a table with treename<tab>newick tree
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
5 use strict;
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
6 use Bio::TreeIO;
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
7
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
8 my $filename = $ARGV[0];
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
9 my $outfile = $ARGV[1];
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
10 open FILE, $filename or die $!;
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
11
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
12
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
13 my @splitline;
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
14
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
15 print "require(phybase);\n";
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
16 print "genetrees<-c(";
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
17 my $counter=0;
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
18 my $tree;
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
19 while (<FILE>) {
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
20 chomp;
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
21 #get a line from the data file
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
22 my $currentinput = "$_";
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
23 @splitline = split(/\t/);
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
24 my $treename= $splitline[0];
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
25 $tree = $splitline[1];
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
26 unless($counter==0){
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
27 print ", ";
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
28 }
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
29 $counter++;
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
30 print "'$tree'";
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
31 }
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
32 print ")\n"; #close genetree vector
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
33 print "taxaname<-c(";
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
34 my $spnum = tree2spList($tree);
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
35 print ")\nspname<-taxaname\n";
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
36 print "species.structure<-matrix(0,$spnum,$spnum)\n";
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
37 print "diag(species.structure)<-1\n";
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
38 print "\n";
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
39 print "result<-NJst(genetrees,taxaname,spname,species.structure)\n";
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
40 print "write(result, file='$outfile')\n";
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
41 close FILE;
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
42
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
43
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
44
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
45
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
46
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
47 #This script requires phybase R package
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
48 #NJst is a function used as follows
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
49 # genetrees<-c("(A:0.004,(B:0.003,(C:0.002,(D:0.001,E:0.001)
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
50 # :0.001):0.001):0.001);","(A:0.004,(B:0.003,(E:0.002,(D:0.001,C:0.001):0.001):0.001):0.001);","(A:0.004,(B:0.003,(C:0.002,(D:0.001,E:0.001):0.001):0.001):0.001);")
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
51 # taxaname<-c("A","B","C","D","E")
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
52 # spname<-taxaname
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
53 # species.structure<-matrix(0, 5, 5)
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
54 # diag(species.structure)<-1
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
55 #
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
56 # NJst(genetrees,taxaname, spname, species.structure)
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
57
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
58
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
59
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
60 sub tree2spList {
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
61 my $treefile=shift;
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
62
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
63 my ($charactername, $characterstate);
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
64 my ($call, $sp_id, $char_id);
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
65
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
66 #Open treefile and get taxon names from tree
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
67 my $stringfh;
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
68 open($stringfh, "<", \$treefile);
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
69
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
70 my $input = Bio::TreeIO->new(-format => 'newick', -fh => $stringfh);
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
71 my $tree = $input->next_tree;
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
72
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
73 my @taxa = $tree->get_leaf_nodes;
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
74 my @names = map { $_->id } @taxa;
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
75
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
76 my $count=0;
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
77 foreach(@names){
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
78 my $treespecies = $_;
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
79 $treespecies =~ s/^\s+|\s+$//g ; #Trim leading and trailing whitespace
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
80 unless($count==0){
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
81 print ",";
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
82 }
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
83 print "'$treespecies'";
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
84 $count++
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
85 }
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
86 return $count;
798d8401d420 Uploaded
ucsb-phylogenetics
parents:
diff changeset
87 } #end of tree2spList subroutine