8
|
1 #!/usr/bin/perl
|
|
2
|
|
3 #This script generates an R script to call NJst
|
|
4 #input is a table with treename<tab>newick tree
|
|
5 use strict;
|
|
6 use Bio::TreeIO;
|
|
7
|
|
8 my $filename = $ARGV[0];
|
|
9 my $outfile = $ARGV[1];
|
|
10 open FILE, $filename or die $!;
|
|
11
|
|
12
|
|
13 my @splitline;
|
|
14
|
|
15 print "require(phybase);\n";
|
|
16 print "genetrees<-c(";
|
|
17 my $counter=0;
|
|
18 my $tree;
|
|
19 while (<FILE>) {
|
|
20 chomp;
|
|
21 #get a line from the data file
|
|
22 my $currentinput = "$_";
|
|
23 @splitline = split(/\t/);
|
|
24 my $treename= $splitline[0];
|
|
25 $tree = $splitline[1];
|
|
26 unless($counter==0){
|
|
27 print ", ";
|
|
28 }
|
|
29 $counter++;
|
|
30 print "'$tree'";
|
|
31 }
|
|
32 print ")\n"; #close genetree vector
|
|
33 print "taxaname<-c(";
|
|
34 my $spnum = tree2spList($tree);
|
|
35 print ")\nspname<-taxaname\n";
|
|
36 print "species.structure<-matrix(0,$spnum,$spnum)\n";
|
|
37 print "diag(species.structure)<-1\n";
|
|
38 print "\n";
|
|
39 print "result<-NJst(genetrees,taxaname,spname,species.structure)\n";
|
|
40 print "write(result, file='$outfile')\n";
|
|
41 close FILE;
|
|
42
|
|
43
|
|
44
|
|
45
|
|
46
|
|
47 #This script requires phybase R package
|
|
48 #NJst is a function used as follows
|
|
49 # genetrees<-c("(A:0.004,(B:0.003,(C:0.002,(D:0.001,E:0.001)
|
|
50 # :0.001):0.001):0.001);","(A:0.004,(B:0.003,(E:0.002,(D:0.001,C:0.001):0.001):0.001):0.001);","(A:0.004,(B:0.003,(C:0.002,(D:0.001,E:0.001):0.001):0.001):0.001);")
|
|
51 # taxaname<-c("A","B","C","D","E")
|
|
52 # spname<-taxaname
|
|
53 # species.structure<-matrix(0, 5, 5)
|
|
54 # diag(species.structure)<-1
|
|
55 #
|
|
56 # NJst(genetrees,taxaname, spname, species.structure)
|
|
57
|
|
58
|
|
59
|
|
60 sub tree2spList {
|
|
61 my $treefile=shift;
|
|
62
|
|
63 my ($charactername, $characterstate);
|
|
64 my ($call, $sp_id, $char_id);
|
|
65
|
|
66 #Open treefile and get taxon names from tree
|
|
67 my $stringfh;
|
|
68 open($stringfh, "<", \$treefile);
|
|
69
|
|
70 my $input = Bio::TreeIO->new(-format => 'newick', -fh => $stringfh);
|
|
71 my $tree = $input->next_tree;
|
|
72
|
|
73 my @taxa = $tree->get_leaf_nodes;
|
|
74 my @names = map { $_->id } @taxa;
|
|
75
|
|
76 my $count=0;
|
|
77 foreach(@names){
|
|
78 my $treespecies = $_;
|
|
79 $treespecies =~ s/^\s+|\s+$//g ; #Trim leading and trailing whitespace
|
|
80 unless($count==0){
|
|
81 print ",";
|
|
82 }
|
|
83 print "'$treespecies'";
|
|
84 $count++
|
|
85 }
|
|
86 return $count;
|
|
87 } #end of tree2spList subroutine
|