changeset 8:798d8401d420 draft

Uploaded
author ucsb-phylogenetics
date Sat, 08 Sep 2012 15:33:34 -0400
parents c83d7e34cc88
children 51904e90a6b2
files ucsb_phylogenetics/NJst/NJst.sh ucsb_phylogenetics/NJst/NJst.xml ucsb_phylogenetics/NJst/README.txt ucsb_phylogenetics/NJst/makeNJst.pl ucsb_phylogenetics/PDPairs/PD.pl ucsb_phylogenetics/PDPairs/PDpairs.xml ucsb_phylogenetics/PDPairs/README.txt ucsb_phylogenetics/SHtest/README.txt ucsb_phylogenetics/SHtest/SHtest.pl ucsb_phylogenetics/SHtest/SHtest.xml ucsb_phylogenetics/aliscorecut/README.txt ucsb_phylogenetics/gblocks/README.txt ucsb_phylogenetics/gblocks/gblocks.pl ucsb_phylogenetics/gblocks/gblocks.xml ucsb_phylogenetics/get_orfs_or_cdss/get_orfs_or_cdss.py ucsb_phylogenetics/get_orfs_or_cdss/get_orfs_or_cdss.txt ucsb_phylogenetics/get_orfs_or_cdss/get_orfs_or_cdss.xml ucsb_phylogenetics/hmmbuild/README.txt ucsb_phylogenetics/hmmsearch/README.txt ucsb_phylogenetics/iAssembler/iAssembler2.xml ucsb_phylogenetics/iAssembler/iAssembler_README.txt ucsb_phylogenetics/iAssembler/iAssembler_wrapper2.pl ucsb_phylogenetics/iAssembler/increment.txt ucsb_phylogenetics/muscle/README.txt ucsb_phylogenetics/muscle/muscle.py ucsb_phylogenetics/muscle/muscle.xml ucsb_phylogenetics/mview/README.txt ucsb_phylogenetics/mview/mview.pl ucsb_phylogenetics/mview/mview.xml ucsb_phylogenetics/phytab_LB_pruner/LB_prunerG.pl ucsb_phylogenetics/phytab_LB_pruner/README.txt ucsb_phylogenetics/phytab_LB_pruner/phytab_LB_pruner.py ucsb_phylogenetics/phytab_LB_pruner/phytab_LB_pruner.xml ucsb_phylogenetics/phytab_clearcut/README.txt ucsb_phylogenetics/phytab_clearcut/phytab_clearcut.py ucsb_phylogenetics/phytab_clearcut/phytab_clearcut.xml ucsb_phylogenetics/phytab_prank/README.txt ucsb_phylogenetics/phytab_prank/phytab_prank.py ucsb_phylogenetics/phytab_prank/phytab_prank.xml ucsb_phylogenetics/prottest/README.txt ucsb_phylogenetics/prottest/prottest.xml ucsb_phylogenetics/prottest/prottest_wrapper.pl ucsb_phylogenetics/prune_taxa/Prune_taxa.xml ucsb_phylogenetics/prune_taxa/README.txt ucsb_phylogenetics/raxml/README.txt ucsb_phylogenetics/raxml/raxml.pl ucsb_phylogenetics/raxml/raxml.xml ucsb_phylogenetics/scythe/README.txt ucsb_phylogenetics/scythe/scythe.xml ucsb_phylogenetics/tab2trees/README.txt ucsb_phylogenetics/tab2trees/makeRtrees.pl ucsb_phylogenetics/tab2trees/phytab2trees.sh ucsb_phylogenetics/tab2trees/tab2trees.xml ucsb_phylogenetics/tagdust/README.txt ucsb_phylogenetics/tagdust/tagdust.xml ucsb_phylogenetics/thinningtrees/README.txt ucsb_phylogenetics/thinningtrees/Thinning_trees.xml ucsb_phylogenetics/tree_support/README.txt ucsb_phylogenetics/tree_support/tree_support.xml ucsb_phylogenetics/vert_tree_format/README.txt ucsb_phylogenetics/vert_tree_format/vert_tree_format.xml
diffstat 61 files changed, 2312 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/NJst/NJst.sh	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,4 @@
+#First call perl script which reads trees and writes 
+/home/galaxy/galaxy-dist/tools/Rtools/makeNJst.pl $1 $2 > Rnjst.R 2>log.txt
+
+R --vanilla < Rnjst.R 2>log.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/NJst/NJst.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,19 @@
+<tool id="NJst" name="NJst">
+  <description>Estimate species tree with NJst from table of tree names and newick trees</description>
+  <command interpreter="bash">NJst.sh $input $output </command>
+  <inputs>
+    <param name="input" type="data" format="tabular" label="Dataset" help="First column=Tree Name. Second Column=newick phylogeny"/>
+  </inputs>
+  <outputs>
+    <data format="txt" name="output" />
+  </outputs>
+
+  <help>
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
+
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/NJst/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,5 @@
+Produces species tree from input of multiple gene trees
+
+(Liu, Yu 2011)
+
+BioPerl is required to be installed.
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/NJst/makeNJst.pl	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,87 @@
+#!/usr/bin/perl
+
+#This script generates an R script to call NJst
+#input is a table with treename<tab>newick tree
+use strict;
+use Bio::TreeIO;
+
+my $filename = $ARGV[0];
+my $outfile = $ARGV[1];
+open FILE, $filename or die $!;
+
+
+my @splitline;
+
+print "require(phybase);\n";
+print "genetrees<-c(";
+my $counter=0;
+my $tree;
+while (<FILE>) {
+        chomp;
+        #get a line from the data file
+        my $currentinput = "$_";
+	@splitline = split(/\t/);
+	my $treename= $splitline[0];
+	$tree = $splitline[1];
+	unless($counter==0){
+		print ", ";
+	}
+	$counter++;
+        print "'$tree'";
+}
+print ")\n"; #close genetree vector
+print "taxaname<-c(";
+my $spnum = tree2spList($tree);
+print ")\nspname<-taxaname\n";
+print "species.structure<-matrix(0,$spnum,$spnum)\n";
+print "diag(species.structure)<-1\n";
+print "\n";
+print "result<-NJst(genetrees,taxaname,spname,species.structure)\n";
+print "write(result, file='$outfile')\n";
+close FILE;
+
+
+
+
+
+#This script requires phybase R package
+#NJst is a function used as follows
+#	genetrees<-c("(A:0.004,(B:0.003,(C:0.002,(D:0.001,E:0.001)
+#		:0.001):0.001):0.001);","(A:0.004,(B:0.003,(E:0.002,(D:0.001,C:0.001):0.001):0.001):0.001);","(A:0.004,(B:0.003,(C:0.002,(D:0.001,E:0.001):0.001):0.001):0.001);")
+#     taxaname<-c("A","B","C","D","E")
+#     spname<-taxaname
+#     species.structure<-matrix(0, 5, 5)
+#     diag(species.structure)<-1
+#     
+#     NJst(genetrees,taxaname, spname, species.structure)
+
+
+
+sub tree2spList {
+	my $treefile=shift;
+
+	my ($charactername, $characterstate); 
+	my ($call, $sp_id, $char_id);
+
+	#Open treefile and get taxon names from tree
+	my $stringfh;
+	open($stringfh, "<", \$treefile);
+
+	my $input = Bio::TreeIO->new(-format => 'newick', -fh => $stringfh); 
+	my $tree = $input->next_tree; 
+
+	my @taxa = $tree->get_leaf_nodes; 
+	my @names = map { $_->id } @taxa;
+
+	my $count=0;
+	foreach(@names){
+		my $treespecies = $_;
+		$treespecies =~ s/^\s+|\s+$//g ;	#Trim leading and trailing whitespace
+		unless($count==0){
+			print ",";
+		}
+		print "'$treespecies'";
+		$count++
+	}
+	return $count;
+}	#end of tree2spList subroutine
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/PDPairs/PD.pl	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,76 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use FindBin;
+use lib "$FindBin::Bin/lib";
+use Bio::TreeIO;
+use Bio::Tree::Tree;
+
+##Need to add error checking
+	#1. check if input is tree file and species pairs table
+	#2. check for misspelling of species in species pairs - report when sp not found
+	#3. Throw ERROR back into Galaxy for users to diagnose problem
+##Want to add
+	#1. Option for dividing in half - for divergence times
+	#2. Option to switch orientation of table? Rows be dif trees cols be diff pairs ??
+
+###this script will find the phylogenetic distance between two species
+#input is a tree, output filename, and table with pairwise distances
+#usage:
+#PD.pl <pairsTable> <treefile> <outfile> <yes|no>
+# parse in newick/new hampshire format
+my @species1;
+my @species2;
+
+
+my $half=$ARGV[3];
+my $divtimebool;
+if($half eq 'yes'){
+	$divtimebool=1;
+}elsif($half eq 'no'){
+	$divtimebool=0;
+}else{
+	die "Argument must contain yes or no for divergence times\n";
+}
+my $outfile = $ARGV[2];
+open(OUT, ">$outfile") or die("Couldn't open output file $ARGV[2]\n");
+
+
+my $pairsfile = $ARGV[0];
+open(PAIRS, "$pairsfile") or die("Couldn't open input file $ARGV[0]\n");
+while (<PAIRS>) {
+        chomp;
+        my $sp1;
+        my $sp2;
+        ($sp1, $sp2) = split("\t");
+        push(@species1, $sp1);
+        push(@species2, $sp2);
+}
+
+my $treefile = $ARGV[1];
+
+for(my $i=0; $i < @species1; $i++){
+        print OUT $species1[$i]."\t".$species2[$i];
+        open(TREE, "$treefile") or die("Couldn't open output file $ARGV[1]\n");
+
+        my $treeio = new Bio::TreeIO('-format' => 'newick',
+                                   '-file'   => $treefile);
+
+        while(my $tree = $treeio->next_tree){;
+                my $node1 = $tree->find_node(-id => $species1[$i]);
+                my $node2 = $tree->find_node(-id => $species2[$i]);
+                my $distances = $tree->distance(-nodes => [$node1,$node2]);
+
+                #ADD OPTION FOR DIVIDING BY 2 FOR DIVERGENCE TIMES
+		if($divtimebool==1){
+                	$distances = $distances/2 ;
+		}
+                print OUT "\t".$distances;
+        }
+print OUT "\n";
+close(TREE);
+}
+
+close(PAIRS);
+close(OUT);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/PDPairs/PDpairs.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,28 @@
+<tool id="PDpairs" name="PDpairs">
+    <description>Pairwise distance between taxa in a phylogenetic tree</description>
+    <command interpreter="perl">
+		PD.pl $intable $intree $outtable $half
+    </command>
+    <inputs>
+	    <param name="intree" type="data" format="txt" label="Input Tree File" help="newick" />
+	    <param name="intable" type="data" format="tabular" label="Table of species pairs" help="species tab species" />
+       	    <param name='half' type='boolean' checked='false' truevalue='yes' falsevalue='no' label='Calculate Divergence times (divides pairwise distances in half)' />
+    </inputs>
+    <outputs>
+	<data format="tabular" name="outtable" label="${tool.name} on ${on_string}: Out file" />
+    </outputs>
+    <help>
+Input a table of species pairs::
+
+  species1	species2
+  species2	species4
+
+And a Newick format phylogeny with branch lengths
+
+Output is a table of the species pairs followed by a column of pairwise distance for each tree in the file::
+
+  species1	species2	1.104
+  species2	species4	2.119
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/PDPairs/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,3 @@
+Calculates phylogenetic distances for pairs of species on a phylogeny
+
+Tools developed by Oakley et al
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/SHtest/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,5 @@
+Uses RAxML to compute an SHtest to compare trees
+
+(Stamatakis 2006)
+
+RAxML is required to be installed
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/SHtest/SHtest.pl	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,47 @@
+#! /usr/bin/perl -w
+
+use strict;
+use warnings;
+#raxml.pl Galaxy wrapper calls raxml from raxml.xml
+
+##For debugging command line pass, uncomment next
+#for (my $i=0; $i < @ARGV; $i++){
+#	print "Parameter #$i ".$ARGV[$i]."\n\n";
+#}
+#exit;
+
+my $datatype = shift(@ARGV);		#0 datatype
+my $data_file= shift(@ARGV);		#1 input a phylip file
+my $part_file = shift(@ARGV);		#2 optional partition file
+my $best_tree = shift(@ARGV);		#3 best tree for SH comparison
+my $alt_trees = shift(@ARGV);		#4 Alternative tree(s) for SH comparison
+my $model;
+
+#ADD OPTIONS TO BUILD FULL RAXML COMMANDLINE ARGUMENT
+
+my $build_command;
+#First CALL RAXML THROUGH PATH with 8 threads
+	$build_command = "raxmlHPC-PTHREADS-SSE3 ";
+#Add SH Test Option and Thread number for PThreads
+	$build_command = $build_command."-f h -T 4";
+#Next add call to input phylip file
+	$build_command = $build_command." -s ".$data_file;
+#model is passed directly with xml
+	$model = $datatype;
+	$build_command = $build_command." -m ".$model;
+#Add call to partition file name
+	unless($part_file eq 'None'){
+		$build_command = $build_command." -q ".$part_file;
+	}
+#Next add call to input best tree file
+	$build_command = $build_command." -t ".$best_tree;
+#Next add call to input best tree file
+	$build_command = $build_command." -z ".$alt_trees;
+#name output files galaxy
+	$build_command = $build_command." -n SH";
+
+print "Galaxy COMMAND BUILD WAS: $build_command\n";
+
+#Uncomment to actually call raxml
+system $build_command;
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/SHtest/SHtest.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,31 @@
+<tool id="SHtest" name="SHtest" version="7.2.8" force_history_refresh='True'>
+    <description> Use RAxML to calculate SHtest to compare trees </description>
+    <requirements>
+        <requirement type="package">raxml</requirement>
+    </requirements>
+    <command interpreter="perl">
+        SHtest.pl $datatype $data_file $part_file $best_tree $alt_trees
+		  > $raxml_log
+			2>&amp;1
+    </command>
+    <inputs>
+        <param format="phylip" name="data_file" type="data" label="Raxml Phylip File" help=""/>
+    <param name="datatype" type="select" format="text">
+      <label>Model of evolution to apply to all data partitions (-m)</label>
+      <option value="GTRGAMMA">GTRGAMMA</option> <option value="PROTGAMMADAYHOFF">PROTGAMMADAYHOFF</option> <option value="PROTGAMMADCMUT">PROTGAMMADCMUT</option> <option value="PROTGAMMAJTT">PROTGAMMAJTT</option> <option value="PROTGAMMAMTREV">PROTGAMMAMTREV</option> <option value="PROTGAMMAWAG">PROTGAMMAWAG</option> <option value="PROTGAMMARTREV">PROTGAMMARTREV</option> <option value="PROTGAMMACPREV">PROTGAMMACPREV</option> <option value="PROTGAMMAVT">PROTGAMMAVT</option> <option value="PROTGAMMABLOSUM62">PROTGAMMABLOSUM62</option> <option value="PROTGAMMAMTMAM">PROTGAMMAMTMAM</option> <option value="PROTGAMMALG">PROTGAMMALG</option> <option value="PROTGAMMAMTART">PROTGAMMAMTART</option> <option value="PROTGAMMAMTZOA">PROTGAMMAMTZOA</option> <option value="PROTGAMMAPMB">PROTGAMMAPMB</option> <option value="PROTGAMMAHIVB">PROTGAMMAHIVB</option> <option value="PROTGAMMAHIVW">PROTGAMMAHIVW</option> <option value="PROTGAMMAJTTDCMUT">PROTGAMMAJTTDCMUT</option> <option value="PROTGAMMAFLU">PROTGAMMAFLU</option> <option value="PROTGAMMAGTR">PROTGAMMAGTR</option>
+    </param>
+        <param format="txt" name="part_file" type="data" optional="true" label="Partition file" help="You may provide an alignment partition file."/>
+        <param format="txt" name="best_tree" type="data" optional="true" label="ML Tree" help="ML Tree in newick format."/>
+        <param format="txt" name="alt_trees" type="data" optional="true" label="Alt Tree(s)" help="Alternative Hypothesis Tree(s) in newick format."/>
+    </inputs>
+    <outputs>
+	<data format="txt" name="raxml_log" label="${tool.name} Screen Output on ${on_string}" />
+        <data format="txt" name="raxml_info" label="${tool.name} Log File on ${on_string}" from_work_dir="RAxML_info.SH" />
+    </outputs>
+    <tests>
+    </tests>
+    <help>
+	raxml Home Page:
+	http://www.exelixis-lab.org/software.html
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/aliscorecut/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,5 @@
+ALICUT/SCORE
+------------
+Needs a modified ALICUT perl script in PATH.
+Script is called Aliscore.02.pl and is at:
+http://zfmk.de/web/Forschung/Abteilungen/AG_Wgele/Software/Aliscore/index.en.html
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/gblocks/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,7 @@
+gblocks
+
+Implements gblocks to prune ambiguous alignments
+
+(Talavera, Castresana 2007)
+
+gblocks package required to be installed
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/gblocks/gblocks.pl	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,30 @@
+#! /usr/bin/perl -w
+
+use strict;
+use warnings;
+#gblocks.pl [fasta file]
+
+my $infile=shift(@ARGV);
+my $datatype=shift(@ARGV);
+my $gaps=shift(@ARGV);
+my $size=shift(@ARGV);
+my $outfileloc=shift(@ARGV);
+my $htmlfileloc=shift(@ARGV);
+
+
+
+
+##For debugging command line pass, uncomment next
+#for (my $i=0; $i < @ARGV; $i++){
+#	print "Parameter #$i ".$ARGV[$i]."\n\n";
+#}
+
+system "Gblocks $infile $datatype $gaps -b4=$size";
+
+#Gblocks requires output from $input.fas to be written to $input.fas-gb
+#Copy that file to gout where galaxy expects to find the output
+my $outfile = $infile."-gb";
+my $htmlfile = $outfile.".htm";
+system "cat $outfile > $outfileloc";
+system "cat $htmlfile > $htmlfileloc";
+exit;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/gblocks/gblocks.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,61 @@
+<tool id="gblocks" name="gblocks">
+    <description>Convert Aligned FASTA to phylip Extended</description>
+    <command interpreter="perl">
+        gblocks.pl $input $datatype $gaps $Block $out_file $html_file > $screen
+    </command>
+    <inputs>
+            <param name="input" type="data" format="fasta" label="Input Fasta File" help="Aligned fasta" />
+    <param name="datatype" type="select" label="Type of Sequence">
+      <option value="-t=d">DNA</option>
+      <option value="-t=p">Protein</option>
+      <option value="-t=c">Codons</option>
+    </param>
+    <param name="Block" type="integer" value="10" label="Minimum block size" help='10 is default. Smaller values less stringent.'/>
+    <param name="gaps" type="select" label="Gap Allowance">
+      <option value="-b5=n">None</option>
+      <option value="-b5=h">Half</option>
+      <option value="-b5=a">All</option>
+    </param>
+    </inputs>
+    <outputs>
+        <data format="txt" name="screen" label="${tool.name} on ${on_string}: screen Log" />
+        <data format="html" name="html_file" label="${tool.name} on ${on_string}: html File"/>
+        <data format="txt" name="out_file" label="${tool.name} on ${on_string}: Output File"/>
+    </outputs>
+    <help>
+About Gblocks
+
+Version 0.91b, January 2002
+
+Copyrightose Castresana
+
+Gblocks is a computer program written in ANSI C language that eliminates poorly aligned 
+positions and divergent regions of an alignment of DNA or protein sequences. These 
+positions may not be homologous or may have been saturated by multiple substitutions and it 
+is convenient to eliminate them prior to phylogenetic analysis. Gblocks selects blocks in a 
+similar way as it is usually done by hand but following a reproducible set of conditions. 
+The selected blocks must fulfill certain requirements with respect to the lack of large 
+segments of contiguous nonconserved positions, lack of gap positions and high conservation 
+of flanking positions, making the final alignment more suitable for phylogenetic analysis. 
+Gblocks outputs several files to visualize the selected blocks. The use of a program such 
+as Gblocks reduces the necessity of manually editing multiple alignments, makes the 
+automation of phylogenetic analysis of large data sets feasible and, finally, facilitates 
+the reproduction of the alignments and subsequent phylogenetic analysis by other 
+researchers. Gblocks is very fast in processing alignments and it is therefore highly 
+suitable for large-scale phylogenetic analyses.
+
+Several parameters can be modified to make the selection of blocks more or less stringent. 
+In general, a relaxed selection of blocks is better for short alignments, whereas a 
+stringent selection is more adequate for longer ones. Be aware that the default options of 
+Gblocks are stringent.
+
+    Talavera, G., and Castresana, J. (2007). Improvement of phylogenies after removing 
+divergent and ambiguously aligned blocks from protein sequence alignments. Systematic 
+Biology 56, 564-577.
+
+    Castresana, J. (2000). Selection of conserved blocks from multiple alignments for their 
+use in phylogenetic analysis. Molecular Biology and Evolution 17, 540-552.
+
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/get_orfs_or_cdss/get_orfs_or_cdss.py	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,219 @@
+#!/usr/bin/env python
+"""Find ORFs in a nucleotide sequence file.
+
+get_orfs_or_cdss.py $input_fasta $input_format $table $ftype $ends $mode $min_len $strand $out_nuc_file $out_prot_file
+
+Takes ten command line options, input sequence filename, format, genetic
+code, CDS vs ORF, end type (open, closed), selection mode (all, top, one),
+minimum length (in amino acids), strand (both, forward, reverse), output
+nucleotide filename, and output protein filename.
+
+This tool is a short Python script which requires Biopython. If you use
+this tool in scientific work leading to a publication, please cite the
+Biopython application note:
+
+Cock et al 2009. Biopython: freely available Python tools for computational
+molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
+http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
+
+This script is copyright 2011 by Peter Cock, The James Hutton Institute
+(formerly SCRI), Dundee, UK. All rights reserved.
+
+See accompanying text file for licence details (MIT/BSD style).
+
+This is version 0.0.1 of the script.
+"""
+import sys
+import re
+
+def stop_err(msg, err=1):
+    sys.stderr.write(msg.rstrip() + "\n")
+    sys.exit(err)
+
+try:
+    from Bio.Seq import Seq, reverse_complement, translate
+    from Bio.SeqRecord import SeqRecord
+    from Bio import SeqIO
+    from Bio.Data import CodonTable
+except ImportError:
+    stop_err("Missing Biopython library")
+
+#Parse Command Line
+try:
+    input_file, seq_format, table, ftype, ends, mode, min_len, strand, out_nuc_file, out_prot_file = sys.argv[1:]
+except ValueError:
+    stop_err("Expected ten arguments, got %i:\n%s" % (len(sys.argv)-1, " ".join(sys.argv)))
+
+try:
+    table = int(table)
+except ValueError:
+    stop_err("Expected integer for genetic code table, got %s" % table)
+
+try:
+    table_obj = CodonTable.ambiguous_generic_by_id[table]
+except KeyError:
+    stop_err("Unknown codon table %i" % table)
+
+if ftype not in ["CDS", "ORF"]:
+    stop_err("Expected CDS or ORF, got %s" % ftype)
+
+if ends not in ["open", "closed"]:
+    stop_err("Expected open or closed for end treatment, got %s" % ends)
+
+try:
+    min_len = int(min_len)
+except ValueError:
+    stop_err("Expected integer for min_len, got %s" % min_len)
+
+if seq_format.lower()=="sff":
+    seq_format = "sff-trim"
+elif seq_format.lower()=="fasta":
+    seq_format = "fasta"
+elif seq_format.lower().startswith("fastq"):
+    seq_format = "fastq"
+else:
+    stop_err("Unsupported file type %r" % seq_format)
+
+print "Genetic code table %i" % table
+print "Minimum length %i aa" % min_len
+#print "Taking %s ORF(s) from %s strand(s)" % (mode, strand)
+
+starts = sorted(table_obj.start_codons)
+assert "NNN" not in starts
+re_starts = re.compile("|".join(starts))
+
+stops = sorted(table_obj.stop_codons)
+assert "NNN" not in stops
+re_stops = re.compile("|".join(stops))
+
+def start_chop_and_trans(s, strict=True):
+    """Returns offset, trimmed nuc, protein."""
+    if strict:
+        assert s[-3:] in stops, s
+    assert len(s) % 3 == 0
+    for match in re_starts.finditer(s):
+        #Must check the start is in frame
+        start = match.start()
+        if start % 3 == 0:
+            n = s[start:]
+            assert len(n) % 3 == 0, "%s is len %i" % (n, len(n))
+            if strict:
+                t = translate(n, table, cds=True)
+            else:
+                #Use when missing stop codon,
+                t = "M" + translate(n[3:], table, to_stop=True)
+            return start, n, t
+    return None, None, None
+
+def break_up_frame(s):
+    """Returns offset, nuc, protein."""
+    start = 0
+    for match in re_stops.finditer(s):
+        index = match.start() + 3
+        if index % 3 != 0:
+            continue
+        n = s[start:index]
+        if ftype=="CDS":
+            offset, n, t = start_chop_and_trans(n)
+        else:
+            offset = 0
+            t = translate(n, table, to_stop=True)
+        if n and len(t) >= min_len:
+            yield start + offset, n, t
+        start = index
+    if ends == "open":
+        #No stop codon, Biopython's strict CDS translate will fail
+        n = s[start:]
+        #Ensure we have whole codons
+        #TODO - Try appending N instead?
+        #TODO - Do the next four lines more elegantly
+        if len(n) % 3:
+            n = n[:-1]
+        if len(n) % 3:
+            n = n[:-1]
+        if ftype=="CDS":
+            offset, n, t = start_chop_and_trans(n, strict=False)
+        else:
+            offset = 0
+            t = translate(n, table, to_stop=True)
+        if n and len(t) >= min_len:
+            yield start + offset, n, t
+                        
+
+def get_all_peptides(nuc_seq):
+    """Returns start, end, strand, nucleotides, protein.
+
+    Co-ordinates are Python style zero-based.
+    """
+    #TODO - Refactor to use a generator function (in start order)
+    #rather than making a list and sorting?
+    answer = []
+    full_len = len(nuc_seq)
+    if strand != "reverse":
+        for frame in range(0,3):
+            for offset, n, t in break_up_frame(nuc_seq[frame:]):
+                start = frame + offset #zero based
+                answer.append((start, start + len(n), +1, n, t))
+    if strand != "forward":
+        rc = reverse_complement(nuc_seq)
+        for frame in range(0,3) :
+            for offset, n, t in break_up_frame(rc[frame:]):
+                start = full_len - frame - offset #zero based
+                answer.append((start, start + len(n), -1, n ,t))
+    answer.sort()
+    return answer
+
+def get_top_peptides(nuc_seq):
+    """Returns all peptides of max length."""
+    values = list(get_all_peptides(nuc_seq))
+    if not values:
+        raise StopIteration
+    max_len = max(len(x[-1]) for x in values)
+    for x in values:
+        if len(x[-1]) == max_len:
+            yield x
+
+def get_one_peptide(nuc_seq):
+    """Returns first (left most) peptide with max length."""
+    values = list(get_top_peptides(nuc_seq))
+    if not values:
+        raise StopIteration
+    yield values[0]
+
+if mode == "all":
+    get_peptides = get_all_peptides
+elif mode == "top":
+    get_peptides = get_top_peptides
+elif mode == "one":
+    get_peptides = get_one_peptide
+
+in_count = 0
+out_count = 0
+if out_nuc_file == "-":
+    out_nuc = sys.stdout
+else:
+    out_nuc = open(out_nuc_file, "w")
+if out_prot_file == "-":
+    out_prot = sys.stdout
+else:
+    out_prot = open(out_prot_file, "w")
+for record in SeqIO.parse(input_file, seq_format):
+    for i, (f_start, f_end, f_strand, n, t) in enumerate(get_peptides(str(record.seq).upper())):
+        out_count += 1
+        if f_strand == +1:
+            loc = "%i..%i" % (f_start+1, f_end)
+        else:
+            loc = "complement(%i..%i)" % (f_start+1, f_end)
+        descr = "length %i aa, %i bp, from %s of %s" \
+                % (len(t), len(n), loc, record.description)
+        r = SeqRecord(Seq(n), id = record.id + "|%s%i" % (ftype, i+1), name = "", description= descr)
+        t = SeqRecord(Seq(t), id = record.id + "|%s%i" % (ftype, i+1), name = "", description= descr)
+        SeqIO.write(r, out_nuc, "fasta")
+        SeqIO.write(t, out_prot, "fasta")
+    in_count += 1
+if out_nuc is not sys.stdout:
+    out_nuc.close()
+if out_prot is not sys.stdout:
+    out_prot.close()
+
+print "Found %i %ss in %i sequences" % (out_count, ftype, in_count)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/get_orfs_or_cdss/get_orfs_or_cdss.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,75 @@
+Galaxy tool to find ORFs or simple CDSs
+=======================================
+
+This tool is copyright 2011 by Peter Cock, The James Hutton Institute
+(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
+See the licence text below.
+
+This tool is a short Python script (using Biopython library functions)
+to search nucleotide sequences for open reading frames (ORFs) or coding
+sequences (CDSs) where the first potential start codon is used. See the
+help text in the XML file for more information.
+
+There are just two files to install:
+
+* get_orfs_or_cdss.py (the Python script)
+* get_orfs_or_cdss.xml (the Galaxy tool definition)
+
+The suggested location is in the Galaxy folder tools/filters next to the tool
+for calling sff_extract.py for converting SFF to FASTQ or FASTA + QUAL.
+
+You will also need to modify the tools_conf.xml file to tell Galaxy to offer the
+tool. One suggested location is in the filters section. Simply add the line:
+
+<tool file="filters/get_orfs_or_cdss.xml" />
+
+You will also need to install Biopython 1.54 or later. If you want to run
+the unit tests, include this line in tools_conf.xml.sample and the sample
+FASTA files under the test-data directory. That's it.
+
+
+History
+=======
+
+v0.0.1 - Initial version.
+
+
+Developers
+==========
+
+This script and related tools are being developed on the following hg branch:
+http://bitbucket.org/peterjc/galaxy-central/src/tools
+
+For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball use
+the following command from the Galaxy root folder:
+
+tar -czf get_orfs_or_cdss.tar.gz tools/filters/get_orfs_or_cdss.*
+
+Check this worked:
+
+$ tar -tzf get_orfs_or_cdss.tar.gz
+filter/get_orfs_or_cdss.py
+filter/get_orfs_or_cdss.txt
+filter/get_orfs_or_cdss.xml
+
+
+Licence (MIT/BSD style)
+=======================
+
+Permission to use, copy, modify, and distribute this software and its
+documentation with or without modifications and for any purpose and
+without fee is hereby granted, provided that any copyright notices
+appear in all copies and that both those copyright notices and this
+permission notice appear in supporting documentation, and that the
+names of the contributors or copyright holders not be used in
+advertising or publicity pertaining to distribution of the software
+without specific prior permission.
+
+THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/get_orfs_or_cdss/get_orfs_or_cdss.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,149 @@
+<tool id="get_orfs_or_cdss" name="Get open reading frames (ORFs) or coding sequences (CDSs)" version="0.0.1">
+	<description>e.g. to get peptides from ESTs</description>
+	<command interpreter="python">
+get_orfs_or_cdss.py $input_file $input_file.ext $table $ftype $ends $mode $min_len $strand $out_nuc_file $out_prot_file > $stdout 2>&amp;1
+	</command>
+	<inputs>
+		<param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file (nucleotides)" help="FASTA, FASTQ, or SFF format." />
+		<param name="table" type="select" label="Genetic code" help="Tables from the NCBI, these determine the start and stop codons">
+			<option value="1">1. Standard</option>
+			<option value="2">2. Vertebrate Mitochondrial</option>
+			<option value="3">3. Yeast Mitochondrial</option>
+			<option value="4">4. Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma</option>
+			<option value="5">5. Invertebrate Mitochondrial</option>
+			<option value="6">6. Ciliate Macronuclear and Dasycladacean</option>
+			<option value="9">9. Echinoderm Mitochondrial</option>
+			<option value="10">10. Euplotid Nuclear</option>
+			<option value="11">11. Bacterial</option>
+			<option value="12">12. Alternative Yeast Nuclear</option>
+			<option value="13">13. Ascidian Mitochondrial</option>
+			<option value="14">14. Flatworm Mitochondrial</option>
+			<option value="15">15. Blepharisma Macronuclear</option>
+			<option value="16">16. Chlorophycean Mitochondrial</option>
+			<option value="21">21. Trematode Mitochondrial</option>
+			<option value="22">22. Scenedesmus obliquus</option>
+			<option value="23">23. Thraustochytrium Mitochondrial</option>
+		</param>
+		<param name="ftype" type="select" value="True" label="Look for ORFs or CDSs">
+                        <option value="ORF">Look for ORFs (check for stop codons only, ignore start codons)</option>
+                        <option value="CDS">Look for CDSs (with start and stop codons)</option>
+		</param>
+                <param name="ends" type="select" value="open" label="Sequence end treatment">
+			<option value="open">Open ended (will allow missing start/stop codons at the ends)</option>
+                        <option value="closed">Complete (will check for start/stop codons at the ends)</option>
+                        <!-- TODO? Circular, for using this on finished bacteria etc -->
+                </param>
+
+		<param name="mode" type="select" label="Selection criteria" help="Suppose a sequence has ORFs/CDSs of lengths 100, 102 and 102 -- which should be taken? These options would return 3, 2 or 1 ORF.">
+                    <option value="all">All ORFs/CDSs from each sequence</option>
+                    <option value="top">All ORFs/CDSs from each sequence with the maximum length</option>
+                    <option value="one">First ORF/CDS from each sequence with the maximum length</option>
+		</param>
+                <param name="min_len" type="integer" size="5" value="30" label="Minimum length ORF/CDS (in amino acids, e.g. 30 aa = 90 bp plus any stop codon)">
+                </param>
+                <param name="strand" type="select" label="Strand to search" help="Use the forward only option if your sequence directionality is known (e.g. from poly-A tails, or strand specific RNA sequencing.">
+                    <option value="both">Search both the forward and reverse strand</option>
+                    <option value="forward">Only search the forward strand</option>
+                    <option value="reverse">Only search the reverse strand</option>
+                </param>
+	</inputs>
+	<outputs>
+		<data format="txt" name="stdout" label="${tool.name} on ${on_string}: stdout" />
+		<data name="out_nuc_file" format="fasta" label="${ftype.value}s (nucleotides)" />
+		<data name="out_prot_file" format="fasta" label="${ftype.value}s (amino acids)" />
+	</outputs>
+	<tests>
+                <test>
+                        <param name="input_file" value="get_orf_input.fasta" />
+                        <param name="table" value="1" />
+                        <param name="ftype" value="CDS" />
+                        <param name="ends" value="open" />
+                        <param name="mode" value="all" />
+                        <param name="min_len" value="10" />
+                        <param name="strand" value="forward" />
+                        <output name="out_nuc_file" file="get_orf_input.t1_nuc_out.fasta" />
+                        <output name="out_prot_file" file="get_orf_input.t1_prot_out.fasta" />
+                </test>
+		<test>
+			<param name="input_file" value="get_orf_input.fasta" />
+			<param name="table" value="11" />
+			<param name="ftype" value="CDS" />
+			<param name="ends" value="closed" />
+			<param name="mode" value="all" />
+			<param name="min_len" value="10" />
+			<param name="strand" value="forward" />
+			<output name="out_nuc_file" file="get_orf_input.t11_nuc_out.fasta" />
+			<output	name="out_prot_file" file="get_orf_input.t11_prot_out.fasta" />
+		</test>
+		<test>
+                        <param name="input_file" value="get_orf_input.fasta" />
+                        <param name="table" value="11" />
+                        <param name="ftype" value="CDS" />
+                        <param name="ends" value="open" />
+                        <param name="mode" value="all" />
+                        <param name="min_len" value="10" />
+                        <param name="strand" value="forward" />
+                        <output name="out_nuc_file" file="get_orf_input.t11_open_nuc_out.fasta" />
+                        <output name="out_prot_file" file="get_orf_input.t11_open_prot_out.fasta" />
+		</test>
+	</tests>
+	<requirements>
+		<requirement type="python-module">Bio</requirement>
+	</requirements>
+	<help>
+
+**What it does**
+
+Takes an input file of nucleotide sequences (typically FASTA, but also FASTQ
+and Standard Flowgram Format (SFF) are supported), and searches each sequence
+for open reading frames (ORFs) or potential coding sequences (CDSs) of the
+given minimum length. These are returned as FASTA files of nucleotides and
+protein sequences.
+
+You can choose to have all the ORFs/CDSs above the minimum length for each
+sequence (similar to the EMBOSS getorf tool), those with the longest length
+equal, or the first ORF/CDS with the longest length (in the special case
+where a sequence encodes two or more long ORFs/CDSs of the same length). The
+last option is a reasonable choice when the input sequences represent EST or
+mRNA sequences, where only one ORF/CDS is expected.
+
+Note that if no ORFs/CDSs in a sequence match the criteria, there will be no
+output for that sequence.
+
+Also note that the ORFs/CDSs are assigned modified identifiers to distinguish
+them from the original full length sequences, by appending a suffix.
+
+The start and stop codons are taken from the `NCBI Genetic Codes
+&lt;http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi&gt;`_.
+When searching for ORFs, the sequences will run from stop codon to stop
+codon, and any start codons are ignored. When searching for CDSs, the first
+potential start codon will be used, giving the longest possible CDS within
+each ORF, and thus the longest possible protein sequence. This is useful
+for things like BLAST or domain searching, but since this may not be the
+correct start codon may not be appropriate for signal peptide detection
+etc.
+
+**Example Usage**
+
+Given some EST sequences (Sanger capillary reads) assembled into unigenes,
+or a transcriptome assembly from some RNA-Seq, each of your nucleotide
+sequences should (barring sequencing, assembly errors, frame-shifts etc)
+encode one protein as a single ORF/CDS, which you wish to extract (and
+perhaps translate into amino acids).
+
+If your RNS-Seq data was strand specific, and assembled taking this into
+account, you should only search for ORFs/CDSs on the forward strand.
+
+**Citation**
+
+This tool uses Biopython. If you use this tool in scientific work leading
+to a publication, please cite the Biopython application note (and Galaxy
+too of course):
+
+Cock et al 2009. Biopython: freely available Python tools for computational
+molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
+http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
+
+Installed by Sabrina.
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/hmmbuild/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,4 @@
+HMMBUILD/SEARCH
+---------------
+Needs HMMER package avaliable in PATH
+Get v3.0 here: http://hmmer.janelia.org/
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/hmmsearch/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,4 @@
+HMMBUILD/SEARCH
+---------------
+Needs HMMER package avaliable in PATH
+Get v3.0 here: http://hmmer.janelia.org/
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/iAssembler/iAssembler2.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,23 @@
+<tool id="iAssembler2" name="iAssembler" version="1.0.1">
+	<description>Assembly of transcriptomes.</description>
+	<command interpreter="perl">
+		iAssembler_wrapper2.pl -i $input -e $maxlength -h $minoverlap -p $minpercent
+	</command>
+	<inputs>
+		<param name="input" format="fasta" type="data" label="nucleotide fasta file"/>
+		<param name="maxlength" type="integer" value="30" label="maximum length of end clips (6~100; default = 30)"/>
+		<param name="minoverlap" type="integer" value="40" label="minimum overlap length (>=30; default = 40)"/>
+		<param name="minpercent" type="integer" value="97" label="minimum percent identity for sequence clustering and assembly (95~100; default = 97)"/>
+	</inputs>
+	<outputs>
+		<data name="output" format="fasta" from_work_dir="unigene_seq.fasta"/>
+	</outputs>
+	<help>
+	iAssembler is a standalone package to assemble ESTs generated using Sanger and/or Roche-454 pyrosequencing technologies into contigs. The pipeline gives much higher accuracy in EST assembly than other existing assemblers by employing an iterative assembly strategy and automated error corrections of mis-assemblies. iAssembler first performs iterative assemblies using MIRA and CAP3 (default: four cycles of MIRA assemblies followed by one CAP3 assembly) to correct assembly errors (mostly sequences derived from the same transcript fail to be assembled together) which occur frequently in just one round of assembly. The program then performs post-assembly quality checking by 1) aligning each EST sequence to its corresponding unigene sequence to identify mis-assemblies; and 2) performing all-verus-all pair-wise sequence alignments of unigenes to identify sequences derived from same transcripts that fail to be assembled together. The identified mis-assemblies are then corrected by the program automatically.
+
+http://bioinfo.bti.cornell.edu/tool/iAssembler/
+
+Citation:
+Zheng Y, Zhao L, Gao J, Fei Z. (2011) iAssembler: a package for de novo assembly of Roche-454/Sanger transcriptome sequences. BMC Bioinformatics 12:453
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/iAssembler/iAssembler_README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,73 @@
+iAssembler tools for Galaxy
+
+	iAssembler is a standalone package to assemble ESTs generated using Sanger and/or Roche-454 pyrosequencing technologies into contigs. 
+	The pipeline gives much higher accuracy in EST assembly than other existing assemblers by employing an iterative assembly strategy and automated 
+	error corrections of mis-assemblies. 
+
+	iAssembler first performs iterative assemblies using MIRA and CAP3 (default: four cycles of MIRA assemblies followed by one CAP3 assembly) to correct 
+	assembly errors (mostly sequences derived from the same transcript fail to be assembled together) which occur frequently in just one round of assembly. 
+
+	The program then performs post-assembly quality checking by 
+	1) aligning each EST sequence to its corresponding unigene sequence to identify mis-assemblies; and 
+	2) performing all-verus-all pair-wise sequence alignments of unigenes to identify sequences derived from same transcripts that fail to be assembled together. 
+
+	The identified mis-assemblies are then corrected by the program automatically.
+
+	http://bioinfo.bti.cornell.edu/tool/iAssembler/
+
+	Citation:
+	Zheng Y, Zhao L, Gao J, Fei Z. (2011) iAssembler: a package for de novo assembly of Roche-454/Sanger transcriptome sequences. BMC Bioinformatics 12:453
+
+Galaxy XML and Perl wrapper script written by: Roger Ngo, Sam Min and Todd H. Oakley, UCSB
+
+Included files in this package:
+
+* iAssemble2r.xml - Galaxy XML tool for iAssembler 1.3
+* iAssembler_wrapper2.pl - Wrapper script for Galaxy XML tool
+* increment.txt - File required by iAssembler_wrapper.pl
+* iAssembler_README - Documentation file
+
+Note: iAssembler.pl MUST BE modified in lines 254-258 due to a bug preventing the program from working in the Galaxy platform.
+
+Pre-Installation:
+
+iAssembler 1.3 must be installed on the Galaxy user account. In order for the Galaxy tool wrapper to work, the iAssembler.pl
+script must be modified on lines 254-258.
+
+FROM:
+
+my $version_file = $working_dir."/mira_version";
+system("$program_bin_dir/mira | head > $version_file");
+
+TO:
+
+my $version_file = $working_dir."/mira_version";
+system("$program_bin_dir/mira > $working_dir/out");
+system("head  $working_dir/out > $version_file");
+
+
+Installation Instructions:
+
+1. Copy the iAssembler folder to a directory in your Galaxy user account.
+
+2. Copy iAssembler.xml, iAssembler_wrapper.pl and increment.txt to a folder in /galaxy-dist/tools/
+
+Note: increment.txt and iAssembler_wrapper.pl MUST be in the same directory.
+
+3. In iAssembler_wrapper.pl, modify the $iAssemblerBinPath to point to the iAssembler 1.3 directory in your
+Galaxy user account and $iAssemblerToolPath to the path of the wrapper.
+
+By default they have already been assigned as:
+
+my $iAssemblerBinPath = '/labdata/nfs/galaxy/pkgs/iAssembler';
+my $iAssemblerToolPath = '/labdata/nfs/galaxy/galaxy-dist/tools/iAssembler';
+
+4. Add the Galaxy tool information to tool_conf.xml in /galaxy-dist/
+
+5. Restart Galaxy using 
+
+./run.sh --stop-daemon
+
+and then
+
+./run.sh --reload --daemon
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/iAssembler/iAssembler_wrapper2.pl	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,43 @@
+#!/usr/bin/perl
+
+# Wrapper script written by: Roger Ngo, Sam Min and Todd H. Oakley, UCSB
+
+use warnings;
+use strict;
+use Cwd;
+
+my $dir = getcwd();
+
+my $iAssemblerBinPath = '/home/galaxy/pkgs/iAssembler';
+my $iAssemblerToolPath = '/home/galaxy/galaxy-dist/tools/iAssembler';
+
+#iAssembler_wrapper.pl -i $input -e $maxlength -h $minoverlap -p $minpercent  ...example
+
+my $input=$ARGV[1];
+my $maxlength=$ARGV[3];
+my $minoverlap=$ARGV[5];
+my $minpercent=$ARGV[7];
+
+open my $file, '<', $iAssemblerToolPath."\/increment.txt";
+	my $increment = <$file>;
+	$increment = int($increment);
+close $file;
+
+my $temp = $increment;
+
+open(UPDATE, '>'.$iAssemblerToolPath."\/increment.txt");
+	$increment = $increment + 1;
+	print UPDATE $increment;
+close(UPDATE);
+
+qx/cp $input $iAssemblerBinPath\/input.$temp.fasta/;
+
+chdir($iAssemblerBinPath);
+
+qx/$iAssemblerBinPath\/iAssembler.pl -i input.$temp.fasta -e $maxlength -h $minoverlap -p $minpercent 2>$dir\/log/;
+
+chdir("$dir");
+
+qx/cp $iAssemblerBinPath\/input.$temp.fasta_output\/unigene_seq.fasta unigene_seq.fasta/;
+
+qx/rm -rf $iAssemblerBinPath\/input.$temp.*/;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/iAssembler/increment.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,1 @@
+0
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/muscle/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,4 @@
+MUSCLE
+------
+Needs MUSCLE installed in PATH.
+Get Muscle at: http://www.drive5.com/muscle/
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/muscle/muscle.py	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,150 @@
+import os
+import optparse
+import subprocess
+from multiprocessing import Pool
+
+directory = ""
+results = "results.data"
+extension = ".fs"
+aligned_extension = ".afa"
+
+
+def unescape(string):
+    mapped_chars = {
+        '>': '__gt__',
+        '<': '__lt__',
+        "'": '__sq__',
+        '"': '__dq__',
+        '[': '__ob__',
+        ']': '__cb__',
+        '{': '__oc__',
+        '}': '__cc__',
+        '@': '__at__',
+        '\n': '__cn__',
+        '\r': '__cr__',
+        '\t': '__tc__',
+        '#': '__pd__'
+        }
+
+    for key, value in mapped_chars.iteritems():
+        string = string.replace(value, key)
+
+    return string
+
+
+def isTabular(file):
+    with open(file) as f:
+        for line in f:
+            if line[0] == '>':
+                return False
+    return True
+
+
+def toData(text):
+	text = text.split('\n')
+	result = ''
+	for line in text:
+	    if '>' in line:
+	        line = '\n' + line.replace('> ', "") + '\t'
+	    line = line.replace(" ", "\t")
+	    result += line
+	return result[1:]  # Index past the first newline char
+
+def toDataSingle(text):
+	text = text.split('\n')
+	result = ''
+	for line in text:
+		line = line + '\n'
+ 		result += line
+	return result[1:]  # Index past the first newline char
+
+def muscle(input):
+    file_name = directory + os.sep + input
+    popen = subprocess.Popen(['muscle', "-in", file_name, "-out", file_name + aligned_extension])  # ./muscle
+    popen.wait()
+
+    popen = subprocess.Popen(['pwd'])  # ./muscle
+    popen.wait()
+
+
+class Sequence:
+    def __init__(self, string):
+        lis = string.split()
+        self.species = lis[0]
+        self.family = lis[1]
+        self.name = lis[2]
+        self.header = ' '.join(lis[:-1])
+        self.sequence = lis[-1]
+        self.string = string
+
+    def printFASTA(self):
+        return '> ' + self.header + '\n' + self.sequence + '\n'
+
+
+def saveMulti(tabFile):
+    with open(tabFile) as f:
+        for line in f:
+            seq = Sequence(line)
+            with open(directory + os.sep + seq.family + extension, "a") as p:
+                p.write(seq.printFASTA())
+
+
+def saveSingle(fastaFile):
+    with open(fastaFile) as f:
+        for line in f:
+            with open(directory + os.sep + "fasta" + extension, "a") as p:
+                p.write(line)
+
+
+def main():
+    usage = """%prog [options]
+options (listed below) default to 'None' if omitted
+    """
+    parser = optparse.OptionParser(usage=usage)
+
+    parser.add_option(
+        '-d', '--directory',
+        metavar="PATH",
+        dest='path',
+        default='.',
+        help='Path to working directory.')
+
+    parser.add_option(
+        '-i', '--in',
+        dest='input',
+        action='store',
+        type='string',
+        metavar="FILE",
+        help='Name of input data.')
+
+    options, args = parser.parse_args()
+
+    global directory
+    inputFile = unescape(options.input)
+    directory = unescape(options.path) + os.sep + "data"
+
+    os.mkdir(directory)
+
+    if isTabular(inputFile):
+        saveMulti(inputFile)
+    else:
+        saveSingle(inputFile)
+
+    pool = Pool()
+    list_of_files = [file for file in os.listdir(directory) if file.lower().endswith(extension)]
+    pool.map(muscle, list_of_files)
+
+    result = [file for file in os.listdir(directory) if file.lower().endswith(aligned_extension)]
+    if isTabular(inputFile):
+	with open(directory + os.sep + results, "a") as f:
+	    for file in result:
+	        with open(directory + os.sep + file, "r") as r:
+	            f.write(toData(r.read()) + "\n")
+    else:
+	with open(directory + os.sep + results, "a") as f:
+	    for file in result:
+	        with open(directory + os.sep + file, "r") as r:
+	            f.write(toDataSingle(r.read()) + "\n")
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/muscle/muscle.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,25 @@
+<tool id="phytab_muscle" name="PHYTAB MUSCLE" version="3.8">
+  <description>MUSCLE: Multiple sequence alignment. Input can be fasta or phytab format.</description>
+  <requirements>
+    <requirement type="package">muscle</requirement>
+  </requirements>
+  <command interpreter="python">
+    muscle.py -i $data > $muscle_stdout 2>&amp;1
+  </command>
+  <inputs>
+    <param format="txt" name="data" type="data" label="Sequence data" help="This should be sequence data from EvolMAP"/>
+  </inputs>
+  <outputs>
+    <data format="txt" name="muscle_stdout" label="${tool.name} on ${on_string}: stdout" />
+    <data format="tabular" name="muscle_results" label="${tool.name} on ${on_string}: results" from_work_dir="data/results.data" />
+  </outputs>
+  <tests>
+  </tests>
+  <help>
+  **MUSCLE v3.8**
+  
+  Runs MUSCLE on EvolMAP data.
+
+  See MUSCLE help: http://www.drive5.com/muscle/muscle_userguide3.8.html
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/mview/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,6 @@
+converts an aligned sequences file in fasta format to html for visualization
+
+(Brown, Leroy, Sander 1998)
+
+
+mview package required to be installed
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/mview/mview.pl	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,12 @@
+#!/usr/bin/perl
+
+my $input = $ARGV[0];
+my $dna = $ARGV[1];
+
+if ($dna eq 'dna'){
+	$dna = '-DNA';
+}else{
+	$dna = '';
+}
+my $run = qx/mview -in pearson $dna -bold -coloring group -html head $input/;
+print $run;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/mview/mview.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,27 @@
+<tool id="mview" name="mview">
+	<description>View multiple sequence alignment in html</description>
+	<requirements><requirement type="package">mview</requirement></requirements>
+	<command interpreter="perl">mview.pl $input $dna > $output </command>
+	<inputs>
+		<param format="fasta" name="input" type="data" label="Aligned fasta file to visualize" />
+    		<param name="dna" type="select" label="Data type">
+      			<option value="DNA">DNA</option>
+      			<option value="Protein">Protein</option>
+		</param>
+
+	</inputs>
+	<outputs>
+		<data format="html" name="output" label="${tool.name} on ${on_string}: Alignment" />
+	</outputs>
+<help>
+
+**What it does**
+
+This tools converts an aligned sequences file in fasta format to html for visualizing the alignment in 
+Galaxy
+--------
+
+</help>
+</tool>
+<!-- The Galaxy wrapper for mview is part of the UCSB phylogenetics toolkit, by the Oakley Lab 
+ucsb_phylogenetics@lifesci.ucsb.edu -->
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/phytab_LB_pruner/LB_prunerG.pl	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,68 @@
+#!/usr/bin/perl -w
+use strict;
+use Bio::TreeIO;
+use Bio::Tree::Tree;
+
+###this script will find the divergence time between two species
+#input is a tree and 2 species or node names
+
+# parse in newick/new hampshire format
+
+my $infile = $ARGV[0];
+my $multiplier = $ARGV[1];
+my $outfile = $ARGV[2];
+
+open(IN, "$infile") or exit;
+open(OUT, ">$outfile") or exit;
+
+
+my $tree = Bio::TreeIO->new(-format => 'newick',
+					-file => $infile)->next_tree;
+
+
+my $total_length = $tree->total_branch_length;
+my @taxa = $tree->get_nodes;
+my $ave_node_len = $total_length / @taxa;
+my @leafs = $tree->get_leaf_nodes;
+
+for my $node ( $tree->get_leaf_nodes ) {
+	if($node->branch_length > ($multiplier * $ave_node_len)){
+  		print OUT $node->id."\t".$ARGV[0]."\t".$node->branch_length."\t$ave_node_len\n";
+  	}
+}
+my @LB_clade;
+my @names;
+my $newroot = $tree->get_root_node;
+for my $node ( $tree->get_nodes ) {
+	if($node->branch_length){
+		if($node->branch_length > ($multiplier * $ave_node_len)){	
+#This finds a long internal branch
+			#Print descendants as Long Branch Clade
+ 			for my $child ( $node->get_all_Descendents ) {
+				if($child->is_Leaf){
+  					push(@LB_clade,$child->id."\t".$ARGV[0]."\t999\t$ave_node_len");
+					push(@names, $child->id);
+				}
+			}
+  		}
+	}
+}
+
+if(@LB_clade > @leafs/2){	#More than half in LBA clade - remove those NOT in LBA clade
+	for my $node ( $tree->get_leaf_nodes ) {
+		my $curcheck = $node->id;
+		chomp($curcheck);
+		my $count = grep {/$curcheck/} @names;
+
+		if($count==0){		#Tip was not collected in @names
+	  		print OUT $node->id."\t".$ARGV[0]."\t999\t$ave_node_len\n";
+		}
+	}
+}else{
+	if(@LB_clade){
+		print OUT join("\n",@LB_clade)."\n";
+	}
+}
+
+
+close(IN);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/phytab_LB_pruner/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,2 @@
+Identify genes on very long branches.
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/phytab_LB_pruner/phytab_LB_pruner.py	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,141 @@
+import os
+import optparse
+import subprocess
+from multiprocessing import Pool
+
+directory = ""
+results = "results.data"
+extension = ""
+aligned_extension = ".tab"
+datatype = ""
+
+perlpath = "/home/galaxy/galaxy-dist/tools/ucsb_phylogenetics/"
+
+def unescape(string):
+    mapped_chars = {
+        '>': '__gt__',
+        '<': '__lt__',
+        "'": '__sq__',
+        '"': '__dq__',
+        '[': '__ob__',
+        ']': '__cb__',
+        '{': '__oc__',
+        '}': '__cc__',
+        '@': '__at__',
+        '\n': '__cn__',
+        '\r': '__cr__',
+        '\t': '__tc__',
+        '#': '__pd__'
+        }
+
+    for key, value in mapped_chars.iteritems():
+        string = string.replace(value, key)
+
+    return string
+
+
+def isTabular(file):
+    with open(file) as f:
+        for line in f:
+            if line[0] == '>':
+                return False
+    return True
+
+#def toData(text, name):
+#    name = name.replace("fasta", "") #file name has fasta when fasta file called
+#    text = name.replace(".fs.tre", "") + "\t" + text.replace(" " , "")
+#    return text
+
+
+def toData(text, name):
+    text = text.split('\n') 
+    result = ''
+    for line in text:
+        if '\t' in line:
+        	line = line.replace("./data/","") + "\n"
+        result += line
+    return result  # Index past the first newline char
+
+def LB_pruner(input):
+    file_name = directory + os.sep + input
+    popen = subprocess.Popen(['perl', perlpath+'LB_prunerG.pl', file_name, indata, file_name + aligned_extension])
+    popen.wait()
+
+class Sequence:
+    def __init__(self, string):
+        lis = string.split()
+        self.name = lis[0]
+        self.tree = lis[1]
+        self.string = string
+
+    def printFASTA(self):
+        return self.tree + '\n'
+
+def saveMulti(tabFile):
+    with open(tabFile) as f:
+        for line in f:
+            seq = Sequence(line)
+            with open(directory + os.sep + seq.name + extension, "a") as p:
+                p.write(seq.printFASTA())
+
+def saveSingle(fastaFile):
+    with open(fastaFile) as f:
+        for line in f:
+            with open(directory + os.sep + "fasta" + extension, "a") as p:
+                p.write(line)
+
+def main():
+    usage = """%prog [options]
+options (listed below) default to 'None' if omitted
+    """
+    parser = optparse.OptionParser(usage=usage)
+
+    parser.add_option(
+        '-d', '--directory',
+        metavar="PATH",
+        dest='path',
+        default='.',
+        help='Path to working directory.')
+
+    parser.add_option(
+        '-i', '--in',
+        dest='input',
+        action='store',
+        type='string',
+        metavar="FILE",
+        help='Name of input data.')
+
+    parser.add_option(
+        '-m', '--mult',
+        dest='datatype',
+        action='store',
+        type='string',
+        help='Multiplier')
+
+    options, args = parser.parse_args()
+
+    global directory
+    global indata
+    inputFile = unescape(options.input)
+    directory = unescape(options.path) + os.sep + "data"
+    indata = unescape(options.datatype)
+
+    os.mkdir(directory)
+
+    if isTabular(inputFile):
+        saveMulti(inputFile)
+    else:
+        saveSingle(inputFile)
+
+    pool = Pool()
+    list_of_files = [file for file in os.listdir(directory) if file.lower().endswith(extension)]
+    pool.map(LB_pruner, list_of_files)
+
+    result = [file for file in os.listdir(directory) if file.lower().endswith(aligned_extension)]
+    with open(directory + os.sep + results, "a") as f:
+        for file in result:
+            with open(directory + os.sep + file, "r") as r:
+                f.write(toData(r.read(),file))
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/phytab_LB_pruner/phytab_LB_pruner.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,30 @@
+<tool id="phytab_LB_pruner" name="phytab LB pruner">
+  <description>LB_pruner: Identify genes on very long branches.</description>
+  <requirements>
+    <requirement type="package">LB_prunerG.pl</requirement>
+  </requirements>
+  <command interpreter="python">
+    phytab_LB_pruner.py -i $data -m $multiplier
+  </command>
+  <inputs>
+    <param format="txt" name="data" type="data" label="Sequence data" help="This should be sequence data in phytab or fasta"/>
+    <param name="multiplier" type="float" value="4" label="Multiplier" help="Value of Multiplier, M. Brances longer than M times average are written to file" >
+    </param>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="LB pruner results" label="${tool.name} on ${on_string}: results" from_work_dir="data/results.data" />
+  </outputs>
+  <tests>
+  </tests>
+  <help>
+Input a table as follows:::
+    name	newick_tree;
+    name2	newick_tree;
+    name3	newick_tree;
+Enter a value for M, the multiplier. LB pruner will find the average of all branch 
+lengths in each newick tree.  If any branch is longer than Mx the average, that gene will 
+be written to the output file. If an internal branch is longer than M times the average, 
+then all members of that clade are written to the out file, with 999 as the length of 
+each branch.  Third column is branch length, fourth column is average BL for tree.
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/phytab_clearcut/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,6 @@
+Generate Neighbor Joining phylogeny. Input can be fasta or phytab format.
+
+clearcut -- (Evans, Sheneman, Foster 2006)
+
+
+Requires clearcut to be installed
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/phytab_clearcut/phytab_clearcut.py	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,143 @@
+import os
+import optparse
+import subprocess
+from multiprocessing import Pool
+
+directory = ""
+results = "results.data"
+extension = ".fs"
+aligned_extension = ".tre"
+datatype = ""
+
+def unescape(string):
+    mapped_chars = {
+        '>': '__gt__',
+        '<': '__lt__',
+        "'": '__sq__',
+        '"': '__dq__',
+        '[': '__ob__',
+        ']': '__cb__',
+        '{': '__oc__',
+        '}': '__cc__',
+        '@': '__at__',
+        '\n': '__cn__',
+        '\r': '__cr__',
+        '\t': '__tc__',
+        '#': '__pd__'
+        }
+
+    for key, value in mapped_chars.iteritems():
+        string = string.replace(value, key)
+
+    return string
+
+
+def isTabular(file):
+    with open(file) as f:
+        for line in f:
+            if line[0] == '>':
+                return False
+    return True
+
+def toData(text, name):
+    name = name.replace("fasta", "") #file name has fasta when fasta file called
+    text = name.replace(".fs.tre", "") + "\t" + text.replace(" " , "")
+    return text
+
+#
+#def toData(text):
+#    text = text.split('\n')
+#    result = ''
+#    for line in text:
+#        if '>' in line:
+#            line = '\n' + line.replace('>', "") + '\t'
+#        line = line.replace(" ", "\t")
+#        result += line
+#    return result[1:]  # Index past the first newline char
+
+def clearcut(input):
+    file_name = directory + os.sep + input
+    popen = subprocess.Popen(['clearcut', "--in=" + file_name, "--out="+file_name + aligned_extension, "--alignment","-k", indata])
+    popen.wait()
+
+class Sequence:
+    def __init__(self, string):
+        lis = string.split()
+        self.species = lis[0]
+        self.family = lis[1]
+        self.name = lis[2]
+        self.header = ' '.join(lis[:-1])
+        self.sequence = lis[-1]
+        self.string = string
+
+    def printFASTA(self):
+        return '>' + self.header + '\n' + self.sequence + '\n'
+
+def saveMulti(tabFile):
+    with open(tabFile) as f:
+        for line in f:
+            seq = Sequence(line)
+            with open(directory + os.sep + seq.family + extension, "a") as p:
+                p.write(seq.printFASTA())
+
+def saveSingle(fastaFile):
+    with open(fastaFile) as f:
+        for line in f:
+            with open(directory + os.sep + "fasta" + extension, "a") as p:
+                p.write(line)
+
+def main():
+    usage = """%prog [options]
+options (listed below) default to 'None' if omitted
+    """
+    parser = optparse.OptionParser(usage=usage)
+
+    parser.add_option(
+        '-d', '--directory',
+        metavar="PATH",
+        dest='path',
+        default='.',
+        help='Path to working directory.')
+
+    parser.add_option(
+        '-i', '--in',
+        dest='input',
+        action='store',
+        type='string',
+        metavar="FILE",
+        help='Name of input data.')
+
+    parser.add_option(
+        '-t', '--type',
+        dest='datatype',
+        action='store',
+        type='string',
+        help='-P for protein. -D for DNA.')
+
+    options, args = parser.parse_args()
+
+    global directory
+    global indata
+    inputFile = unescape(options.input)
+    directory = unescape(options.path) + os.sep + "data"
+    indata = "-" + unescape(options.datatype)
+
+    os.mkdir(directory)
+
+    if isTabular(inputFile):
+        saveMulti(inputFile)
+    else:
+        saveSingle(inputFile)
+
+    pool = Pool()
+    list_of_files = [file for file in os.listdir(directory) if file.lower().endswith(extension)]
+    pool.map(clearcut, list_of_files)
+
+    result = [file for file in os.listdir(directory) if file.lower().endswith(aligned_extension)]
+    with open(directory + os.sep + results, "a") as f:
+        for file in result:
+            with open(directory + os.sep + file, "r") as r:
+                f.write(toData(r.read(),file))
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/phytab_clearcut/phytab_clearcut.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,25 @@
+<tool id="phytab_clearcut" name="phytab clearcut">
+  <description>clearcut: Generate Neighbor Joining phylogeny. Input can be fasta or phytab format.</description>
+  <requirements>
+    <requirement type="package">clearcut</requirement>
+  </requirements>
+  <command interpreter="python">
+    phytab_clearcut.py -i $data -t $datatype > $clearcut_stdout 2>&amp;1
+  </command>
+  <inputs>
+    <param format="txt" name="data" type="data" label="Sequence data" help="This should be sequence data in phytab or fasta"/>
+    <param name="datatype" type="select" format="text">
+      <label>Type of data for all partitions</label>
+      <option value="P">Protein</option> 
+	<option value="D">DNA</option> 
+    </param>
+  </inputs>
+  <outputs>
+    <data format="txt" name="clearcut_stdout" label="${tool.name} on ${on_string}: stdout" />
+    <data format="tabular" name="clearcut_results" label="${tool.name} on ${on_string}: results" from_work_dir="data/results.data" />
+  </outputs>
+  <tests>
+  </tests>
+  <help>
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/phytab_prank/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,5 @@
+Implements PRANK phylogeny aware multiple sequence alignment
+
+(Loytynoja, Goldman 2008)
+
+PRANK Package required to be installed.
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/phytab_prank/phytab_prank.py	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,135 @@
+import os
+import optparse
+import subprocess
+from multiprocessing import Pool
+
+directory = ""
+results = "results.data"
+extension = ".fs"
+aligned_extension = ".afa"
+output_extension = ".afa.2.fas"
+
+
+def unescape(string):
+    mapped_chars = {
+        '>': '__gt__',
+        '<': '__lt__',
+        "'": '__sq__',
+        '"': '__dq__',
+        '[': '__ob__',
+        ']': '__cb__',
+        '{': '__oc__',
+        '}': '__cc__',
+        '@': '__at__',
+        '\n': '__cn__',
+        '\r': '__cr__',
+        '\t': '__tc__',
+        '#': '__pd__'
+        }
+
+    for key, value in mapped_chars.iteritems():
+        string = string.replace(value, key)
+
+    return string
+
+
+def isTabular(file):
+    with open(file) as f:
+        for line in f:
+            if line[0] == '>':
+                return False
+    return True
+
+
+def toData(text):
+    text = text.split('\n')
+    result = ''
+    for line in text:
+        if '>' in line:
+            line = '\n' + line.replace('>__XX__', "") + '\t'
+        line = line.replace("__XX__", "\t")
+        result += line
+    return result[1:]  # Index past the first newline char
+
+
+def prank(input):
+    file_name = directory + os.sep + input
+    popen = subprocess.Popen(['pwd'])  
+    popen.wait()
+    popen = subprocess.Popen(['prank', "-d=" + file_name, "-o=" + file_name + aligned_extension, "-quiet"])  
+    popen.wait()
+
+class Sequence:
+    def __init__(self, string):
+        lis = string.split()
+        self.species = lis[0]
+        self.family = lis[1]
+        self.name = lis[2]
+        self.header = '__XX__'.join(lis[:-1]) #prank replaces space with _ so can't join with spaces like muscle does
+        self.sequence = lis[-1]
+        self.string = string
+
+    def printFASTA(self):
+        return '>__XX__' + self.header + '\n' + self.sequence + '\n'
+
+
+def saveMulti(tabFile):
+    with open(tabFile) as f:
+        for line in f:
+            seq = Sequence(line)
+            with open(directory + os.sep + seq.family + extension, "a") as p:
+                p.write(seq.printFASTA())
+
+
+def saveSingle(fastaFile):
+    with open(fastaFile) as f:
+        for line in f:
+            with open(directory + os.sep + "fasta" + extension, "a") as p:
+                p.write(line)
+
+
+def main():
+    usage = """%prog [options]
+options (listed below) default to 'None' if omitted
+    """
+    parser = optparse.OptionParser(usage=usage)
+
+    parser.add_option(
+        '-d', '--directory',
+        metavar="PATH",
+        dest='path',
+        default='.',
+        help='Path to working directory.')
+
+    parser.add_option(
+        '-i', '--in',
+        dest='input',
+        action='store',
+        type='string',
+        metavar="FILE",
+        help='Name of input data.')
+
+    options, args = parser.parse_args()
+
+    global directory
+    inputFile = unescape(options.input)
+    directory = unescape(options.path) + os.sep + "data"
+
+    os.mkdir(directory)
+
+    if isTabular(inputFile):
+        saveMulti(inputFile)
+    else:
+        saveSingle(inputFile)
+
+    pool = Pool()
+    list_of_files = [file for file in os.listdir(directory) if file.lower().endswith(extension)]
+    pool.map(prank, list_of_files)
+    result = [file for file in os.listdir(directory) if file.lower().endswith(output_extension)]
+    with open(directory + os.sep + results, "a") as f:
+        for file in result:
+            with open(directory + os.sep + file, "r") as r:
+                f.write(toData(r.read()) + "\n")
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/phytab_prank/phytab_prank.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,20 @@
+<tool id="phytab_prank" name="PHYTAB prank">
+  <description>prank: Multiple sequence alignment. Input can be fasta or phytab format.</description>
+  <requirements>
+    <requirement type="package">prank</requirement>
+  </requirements>
+  <command interpreter="python">
+    phytab_prank.py -i $data > $prank_stdout 2>&amp;1
+  </command>
+  <inputs>
+    <param format="txt" name="data" type="data" label="Sequence data" help="This should be sequence data in phytab or fasta"/>
+  </inputs>
+  <outputs>
+    <data format="txt" name="prank_stdout" label="${tool.name} on ${on_string}: stdout" />
+    <data format="tabular" name="prank_results" label="${tool.name} on ${on_string}: results" from_work_dir="data/results.data" />
+  </outputs>
+  <tests>
+  </tests>
+  <help>
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/prottest/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,8 @@
+prottest
+
+Selection of best-fit models of protein evolution
+
+(Abascal, Zardoya, Posada 2005)
+
+
+ProtTest package required to be installed.
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/prottest/prottest.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,25 @@
+<tool id="prottest" name="prottest" version="2.4">
+	<description>Selection of best-fit models of protein evolution.</description>
+	<requirements>
+		  <requirement type="package">prottest</requirement>
+	</requirements>
+	<command interpreter="perl">
+		prottest_wrapper.pl -i $input -o $output
+	</command>
+	<inputs>
+	<param name="input" format="phylip" type="data" label="Input alignment (phylip recommended)"/>
+	</inputs>
+	<outputs>
+		<data format="txt" name="output"/>
+	</outputs>
+	<tests>
+	</tests>
+	<help>
+	ProtTest is a bioinformatic tool for the selection of the most appropriate model of protein evolution (among the set of candidate models) for the data at hand. ProtTest makes this selection by finding the model with the smallest Akaike Information Criterion (AIC) or Bayesian Information Criterion (BIC) score. At the same time, ProtTest obtains model-averaged estimates of different parameters (Posada and Buckley 2004) and calculates the importance of each of these parameters. ProtTest differs from its nucleotide homolog Modeltest (Posada and Crandall 1998) in that it does not include likelihood ratio tests (many models implemented in ProtTest are not nested).
+	
+http://darwin.uvigo.es/software/prottest.html
+
+Citation:
+Abascal F, Zardoya R, Posada, D. 2005. ProtTest: Selection of best-fit models of protein evolution. Bioinformatics: 21(9):2104-2105.
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/prottest/prottest_wrapper.pl	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,16 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+use Cwd;
+
+my $dir=getcwd();
+
+#protest directory placed in main user path.  Also, changed runProttest
+#script to include full path of jar file
+my $prottestPath='/home/galaxy/pkgs/ProtTest2.4';
+
+my $input=$ARGV[1];
+my $output=$ARGV[3];
+
+system "$prottestPath/runProtTest -i $input -o $output" ;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/prune_taxa/Prune_taxa.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,17 @@
+<tool id="Prune_taxa" name="Prune_taxa">
+    <description>Pruning taxa from a tree or multiple trees</description>
+    <command>
+		java -jar /home/galaxy/galaxy-dist/tool-data/shared/jars/phyutility.jar -pr -in $input1 -out $output -names $taxonlist 2>&amp;1
+    </command>
+    <inputs>
+	    <param name="input1" type="data" format="txt" label="Input Trees File" help="newick or nexus" />
+	    <param name="taxonlist" type="text" label="Input taxon names (space separated) to be excluded" />
+    </inputs>
+    <outputs>
+	<data format="txt" name="output" label="${tool.name} on ${on_string}: Out file" />
+    </outputs>
+    <help>
+	Calls phyutility.jar -tt to sample from a trees file.
+	Trimming (or thinning) trees can be essential if other programs require less trees than are present in your files. Phyutility will thin these files to make them more manageable.
+    </help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/prune_taxa/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,5 @@
+Removing taxa from a tree or multiple trees
+
+(Smith, Dunn 2008)
+
+Prune taxa requires Phyloinformatic Utility to be installed. (phyutility.jar)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/raxml/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,5 @@
+Implements maximum likelihood (ML) search for optimal phylogeny
+
+(Stamatakis 2006)
+
+This tool requires the RAxML package to be installed on the system.
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/raxml/raxml.pl	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,98 @@
+#! /usr/bin/perl -w
+
+use strict;
+use warnings;
+#raxml.pl Galaxy wrapper calls raxml from raxml.xml
+#xml file contains:
+#raxml.pl [GTR|CAT] [PROT|DNA] [protmodel] [morphmodel] [phylip file] [constraint] [partition] [best_tree?] [invar?] [#bootreps] [outgroup]
+
+##For debugging command line pass, uncomment next
+#for (my $i=0; $i < @ARGV; $i++){
+#	print "Parameter #$i ".$ARGV[$i]."\n\n";
+#}
+#exit;
+
+my $rate_het=shift(@ARGV);		#0 rate heterogeneity? value will = GAMMA or CAT
+my $datatype = shift(@ARGV);		#1 datatype? True=Protein False=DNA
+my $protmodel = shift(@ARGV);		#2 which protein model
+my $morphmodel = shift(@ARGV);		#3 which morphology multistate model
+my $data_file= shift(@ARGV);		#4 input a phylip file
+my $part_file = shift(@ARGV);		#5 optional partition file
+my $constraint_tree = shift(@ARGV);	#6 optional constraint tree
+my $find_best = shift(@ARGV);		#7 if ML find ML tree as well as bootstrapping
+my $invar = shift(@ARGV);		#8 if INVAR include invariant site parameter in model
+my $nboots = shift(@ARGV);		#9 Number of bootstrap reps
+my $seed = shift(@ARGV);		#10 Number of bootstrap reps
+my $long = shift(@ARGV);		#11 decide whether to do a long call or not, with multiple threads
+my $outgroup = shift(@ARGV);		#12 Specify the outgroup
+my $model;
+
+
+
+# From shell pipeline
+#        raxmlHPC-PTHREADS7.2.6 -T $processors -f a -s $data_name.data  -q $data_name.part -m $model -n $data_name -N 100 -x 1234567890 -o Limulus_polyphemus
+#        cp RAxML_bestTree.$data_name $data_nameBootBest.tre
+#        cp RAxML_bipartitions.$data_name $data_nameBoot.tre
+
+#ADD OPTIONS TO BUILD FULL RAXML COMMANDLINE ARGUMENT
+
+my $build_command;
+#First CALL RAXML THROUGH PATH with 8 threads
+if($long eq 'Long'){
+	$build_command = "raxmlHPC-PTHREADS-SSE3 -T 8";
+}else{
+	$build_command = "raxmlHPC-MPI-SSE3 ";
+}
+#Check if find best tree is desired
+	if($find_best eq "ML"){
+		$build_command = $build_command." -f a ";
+	}
+#Next add call to input phylip file
+	$build_command = $build_command." -s ".$data_file;
+#Add call to partition file name
+	unless($part_file eq 'None'){
+		$build_command = $build_command." -q ".$part_file;
+	}
+#Build substitution model
+	if($datatype eq "PROT"){
+		$model = "PROT";
+	}elsif($datatype eq "DNA"){
+		$model = "GTR";
+	}
+	if($rate_het eq "GTR"){
+		$model = $model."GAMMA";
+	}elsif($rate_het eq "CAT"){
+		$model = $model."CAT";
+	}
+	if($invar eq "INVAR"){
+		$model = $model."I";
+	}
+	if($datatype eq "PROT"){
+		$model = $model.$protmodel;
+	}
+	$build_command = $build_command." -m ".$model;
+#Add multistate morphology model
+	$build_command = $build_command." -K ".$morphmodel;
+#check constraint tree
+	unless($constraint_tree eq 'None'){
+		$build_command = $build_command." -g ".$constraint_tree;
+	}
+#N Bootstraps
+	$build_command = $build_command." -N ".$nboots;
+#Bootstrap seed
+	$build_command = $build_command." -x ".$seed;
+#Parsimony seed
+	$build_command = $build_command." -p "."1234567";
+
+
+#name output files galaxy
+	$build_command = $build_command." -n galaxy";
+#Outgroup
+	if(defined $outgroup){
+		$build_command = $build_command." -o ".$outgroup;
+	}
+
+print "Galaxy COMMAND BUILD WAS: $build_command\n";
+
+#Uncomment to actually call raxml
+system $build_command;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/raxml/raxml.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,64 @@
+<tool id="raxml" name="raxml" version="7.2.8" force_history_refresh='True'>
+    <description> Maximum Likelihood Analysis </description>
+    <requirements>
+        <requirement type="package">raxml</requirement>
+    </requirements>
+    <command interpreter="perl">
+        raxml.pl $GAMMA $PROT $protmodel $morphmodel $data_file $part_file $constraint 
+		$ML $INVAR $Boot $seed Long $Out > $raxml_log
+			2>&amp;1
+    </command>
+    <inputs>
+        <param format="phylip" name="data_file" type="data" label="Raxml Phylip File" help=""/>
+	<param name="Out" type="text" label="Outgroup (optional)" help="The name of one or more (comma-separated) outgroup(s) can be specified"/>
+	<param name='GAMMA' type='boolean' checked='false' truevalue='GTR' falsevalue='CAT' label='Gamma model of rate heterogeneity' help='If not checked, CAT model will be used.'/>
+        <param name='INVAR' type='boolean' checked='false' truevalue='INVAR' falsevalue='no' label='Estimate proportion of invariant sites' help=''/>
+	<param name='PROT' type='boolean' checked='false' truevalue='PROT' falsevalue='DNA' label='Protein Sequences' help='If not checked, DNA sequences will be assumed'/>
+    <param name="protmodel" type="select" label="Protein Model">
+      <option value="WAG">WAG</option>
+      <option value="DAYHOFF">DAYHOFF</option>
+      <option value="DCMUT">DCMUT</option>
+      <option value="JTT">JTT</option>
+      <option value="MTREV">MTREV</option>
+      <option value="RTREV">RTREV</option>
+      <option value="CPREV">CPREV</option>
+      <option value="VT">VT</option>
+      <option value="BLOSUM62">BLOSUM62</option>
+      <option value="MTMAM">MTMAM</option>
+      <option value="LG">LG</option>
+      <option value="MTART">MTART</option>
+      <option value="MTZOA">MTZOA</option>
+      <option value="PMB">PMB</option>
+      <option value="HIVB">HIVB</option>
+      <option value="HIVW">HIVW</option>
+      <option value="JTTDCMUT">JTTDCMUT</option>
+      <option value="FLU">FLU</option>
+      <option value="GTR">GTR</option>
+    </param>
+    <param name="morphmodel" type="select" optional="true" label="Multi-state morphological char model" help="Ignored unless specifying MULTI in model partition">
+      <option value="MK">MK</option>
+      <option value="ORDERED">ORDERED</option>
+      <option value="GTR">GTR</option>
+    </param>
+	<param name="Boot" type="integer" value="100" label="Specify Number of bootstrap replicates" help='100 tends to be sufficient, do not exceed 1000'/>
+	<param name="seed" type="integer" value="1234567" label="Random Number Seed"/>
+        <param format="txt" name="part_file" type="data" optional="true" label="Partition file" help="You may provide an alignment partition file."/>
+	<param format="newick" name="constraint" type="data" label="BINARY Constraint Tree" optional="true" help="This option allows you to specify an incomplete or comprehensive multifurcating constraint tree in NEWICK format."/>
+        <param name='ML' type='boolean' checked='false' truevalue='ML' falsevalue='no' label='Maximum Likelihood Search' help='Will search for best scoring tree after the bootstraps.'/>
+	<param name="Run" type="text" label="Run Name" help="For Galaxy History"/>
+    </inputs>
+    <outputs>
+	<data format="txt" name="raxml_log" label="$Run: ${tool.name} Screen Output on ${on_string}" />
+        <data format="txt" name="boot_tree" label="$Run: ${tool.name} Bootstrap Trees on ${on_string}" from_work_dir="RAxML_bootstrap.galaxy" />
+        <data format="txt" name="bipartitions" label="$Run: ${tool.name} Bootstrap bipartitions on ${on_string}" from_work_dir="RAxML_bipartitionsBranchLabels.galaxy" />
+        <data format="txt" name="branchlabelboots" label="$Run: ${tool.name} Branchlabel bootstrap bipartitions on ${on_string}" from_work_dir="RAxML_bipartitions.galaxy" />
+        <data format="txt" name="raxml_info" label="$Run: ${tool.name} Log File on ${on_string}" from_work_dir="RAxML_info.galaxy" />
+        <data format="txt" name="best_trees" label="$Run: ${tool.name} ML Tree ${on_string}" from_work_dir="RAxML_bestTree.galaxy" />
+    </outputs>
+    <tests>
+    </tests>
+    <help>
+	raxml Home Page:
+	http://www.exelixis-lab.org/software.html
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/scythe/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,4 @@
+Scythe
+------
+Needs the Scythe binary in PATH.
+Get it at: https://github.com/vsbuffalo/scythe
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/scythe/scythe.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,38 @@
+<tool id="ucsb_scythe" name="Scythe" version="0.981 BETA">
+  <description>Scythe - A very simple adapter trimmer.</description>
+  <requirements>
+    <requirement type="package">scythe</requirement>
+  </requirements>
+  <command>
+    scythe -a $adapter -p $prior $quality -o trimmed_sequences.fastq $sequence > $stdout 2>&amp;1
+  </command>
+  <inputs>
+    <param format="txt" name="adapter" type="data" label="Adapter file (-a)"/>
+    <param format="txt" name="sequence" type="data" label="Sequence file"/>
+    <param name="prior" type="float" value="0.05" label="Prior (-p)"/>
+    <param name="quality" type="select" format="text">
+      <label>Quality type (-q)</label>
+      <option value="-q illumina">Illumina</option>
+      <option value="-q solexa">Solexa</option>
+      <option value="-q sanger">Sanger</option>
+    </param>
+  <param name="min_match" type="integer" value="5" label="Minimum match (-n)" help="Smallest contaminant to consider."/>
+  </inputs>
+  <outputs>
+    <data format="txt" name="stdout" label="${tool.name} on ${on_string}: stdout" />
+    <data format="fastq" name="trimmed_sequences.fastq" label="${tool.name} on ${on_string}: results" from_work_dir="trimmed_sequences.fastq" />
+  </outputs>
+  <tests>
+  </tests>
+  <help>
+  **Scythe 0.981 BETA**
+  Scythe uses a Naive Bayesian approach to classify contaminant substrings in sequence reads. It considers quality information, which can make it robust in picking out 3'-end adapters, which often include poor quality bases.
+
+  Most next generation sequencing reads have deteriorating quality towards the 3'-end. It's common for a quality-based trimmer to be employed before mapping, assemblies, and analysis to remove these poor quality bases. However, quality-based trimming could remove bases that are helpful in identifying (and removing) 3'-end adapter contaminants. Thus, it is recommended you run Scythe before quality-based trimming, as part of a read quality control pipeline.
+
+  The Bayesian approach Scythe uses compares two likelihood models: the probability of seeing the matches in a sequence given contamination, and not given contamination. Given that the read is contaminated, the probability of seeing a certain number of matches and mistmatches is a function of the quality of the sequence. Given the read is not contaminated (and is thus assumed to be random sequence), the probability of seeing a certain number of matches and mismatches is chance. The posterior is calculated across both these likelihood models, and the class (contaminated or not contaminated) with the maximum posterior probability is the class selected.
+
+  See Scythe help: https://github.com/vsbuffalo/scythe
+  </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/tab2trees/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,3 @@
+An RTool that produces phylogeny graphics, one tree per page, from multiple data partitions or data sets
+
+Tools developed by Oakley et al
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/tab2trees/makeRtrees.pl	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,91 @@
+#!/usr/bin/perl
+
+#This script generates an R script to print trees to a pdf file
+#input is a table with treename<tab>newick tree
+use strict;
+
+my $filename = $ARGV[0];
+my $outfile = $ARGV[1];
+open FILE, $filename or die $!;
+my $treetype = $ARGV[2];
+my $extiplabels = $ARGV[3];
+my $options;
+my $labeltaxfile = $ARGV[4];
+my %labelhash;
+my $genecount=0;
+my @genes;
+
+unless($labeltaxfile eq 'None'){
+	open LABELFILE, $labeltaxfile or die $!;
+	while (<LABELFILE>) {
+	        chomp;
+	        #get a line from the data file
+	        my $currentinput = "$_";
+		if($currentinput =~ /\t/){ 
+			my @splitline = split(/\t/);
+			my $speciesname= $splitline[0];
+			$speciesname = "'".$speciesname."'";
+			my $treename = $splitline[1];
+			if(exists $labelhash{$treename}){
+				push @{ $labelhash{$treename} }, $speciesname;
+			}else{
+				push @{ $labelhash{$treename} }, $speciesname;
+				#$labelhash{$treename} = $speciesname;
+				$genecount ++;
+				push @genes, $treename;
+			}	
+		}
+	}
+
+}#end unless
+
+if($extiplabels eq 'yes'){
+	$options = ", show.tip.label=FALSE";
+}else{
+	$options = ", show.tip.label=TRUE";
+}
+
+print "require(ape);\n";
+print "pdf(file='$outfile');\n";
+
+while (<FILE>) {
+        chomp;
+        #get a line from the data file
+        my $currentinput = "$_";
+	my @splitline = split(/\t/);
+	my $treename= $splitline[0];
+	my $tree = $splitline[1];
+	my $labelsvector;
+
+	#print the R commands to make tree graphics
+        print "raw_tree <- read.tree(text = '$tree');\n";
+	print "raw_tree\$edge.length[ is.na(raw_tree\$edge.length) ] <- 0 \n";
+        print "plot(raw_tree, cex=0.6, type='$treetype' $options);\n";
+        print "title('Tree File: $treename');\n";
+
+#Add taxon labels, if optional file present and if labels exist for tree
+	if(exists $labelhash{$treename}){
+		$labelsvector = join ",", @{ $labelhash{$treename} };
+		$labelsvector = "tolabel <- c(".$labelsvector.")";
+		print "thetips <- raw_tree\$tip.label \n";
+		print $labelsvector."\n";
+		print "labels <- match(tolabel,thetips) \n";
+		print "tiplabels(tip=labels, pch=21, cex=1) \n";
+	}
+}
+print "dev.off();\n";
+close FILE;
+
+#Testing hash arrays
+#my %nums;
+#my $test='odd';
+#for my $n (4,5,6,10) {
+#    if ($n % 2) {
+#        push @{ $nums{$test} }, $n;
+#    } else {
+#        push @{ $nums{even} }, $n;
+#    }
+#}
+#
+#print join ', ', @{ $nums{even} };
+#print "\n\n";
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/tab2trees/phytab2trees.sh	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,5 @@
+
+#First call perl script which reads trees and writes 
+/home/galaxy/galaxy-dist/tools/Rtools/makeRtrees.pl $1 $2 $3 $4 $5 > Rtrees.R 2>log.txt
+
+R --vanilla < Rtrees.R 2>log.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/tab2trees/tab2trees.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,29 @@
+<tool id="tab2trees" name="tab2trees">
+  <description>Create pdf of phylogeny graphics from table of tree names and newick trees</description>
+  <command interpreter="bash">phytab2trees.sh $input $output $treetype $extips $labeltax</command>
+  <inputs>
+    <param name="input" type="data" format="tabular" label="Dataset" help="First column=Tree Name. Second Column=newick phylogeny"/>
+    <param name="treetype" type="select" label="Tree Plotting Style">
+      <option value="phylogram">phylogram</option>
+      <option value="cladogram">cladogram</option>
+      <option value="fan">fan</option>
+      <option value="unrooted">unrooted</option>
+      <option value="radial">radial</option>
+    </param>
+    <param name='extips' type='boolean' checked='false' truevalue='yes' falsevalue='no' label='Exclude Tip Labels'/>
+    <param format="tabular" name="labeltax" type="data" optional="true" label="Species to Mark File" help="tab delimited file with 2 columns, gene, species. These will be marked on the trees." /> 
+  </inputs>
+  <outputs>
+    <data format="pdf" name="output" />
+  </outputs>
+
+  <help>
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
+
+-----
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/tagdust/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,4 @@
+Tagdust
+-------
+Needs Tagdust installed in PATH.
+Get at: http://genome.gsc.riken.jp/osc/english/software/src/tagdust.tgz
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/tagdust/tagdust.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,35 @@
+<tool id="ucsb_tagdust" name="Tagdust" version="1.13">
+  <description>TagDust - A program to eliminate artifactual reads from next-generation sequencing data sets.</description>
+  <requirements>
+    <requirement type="package">tagdust</requirement>
+  </requirements>
+  <command>
+    tagdust -f $false_rate -o reads.clean.fastq -a reads.artifact.fastq $adapter $illumina > $stdout 2>&amp;1
+  </command>
+  <inputs>
+    <param format="txt" name="adapter" type="data" label="Adapter file"/>
+    <param format="txt" name="illumina" type="data" label="Illumina file"/>
+    <param name="false_rate" type="float" value="0.01" label="False discovery rate (-f)" help="default: 0.01"/>
+  </inputs>
+  <outputs>
+    <data format="txt" name="stdout" label="${tool.name} on ${on_string}: stdout" />
+    <data format="fastq" name="reads.clean.fastq" label="${tool.name} on ${on_string}: reads.clean.fastq" from_work_dir="reads.clean.fastq" />
+    <data format="fastq" name="reads.artifact.fastq" label="${tool.name} on ${on_string}: reads.artifact.fastq" from_work_dir="reads.artifact.fastq" />
+  </outputs>
+  <tests>
+  </tests>
+  <help>
+    **Tagdust 1.13**
+
+    Tagdust  compares  sequences used during the preparation of 
+    a library to the sequenced reads. A read is annotated as an
+    artifact if a large proportion of it's length can be explained by 
+    matches to library sequences.
+
+    Tagdust accepts library sequences (e.g. 5' and 3' adaptors) in
+    standard fasta format and reads in either fasta or fastq format.
+
+    See Tagdust help: http://genome.gsc.riken.jp/osc/english/dataresource/
+  </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/thinningtrees/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,5 @@
+Sub-sample trees from a posterior distribution
+
+(Smith, Dunn 2008)
+
+Thinning Trees requires Phyloinformatic Utility to be installed on the system. (phyutility.jar)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/thinningtrees/Thinning_trees.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,17 @@
+<tool id="Thinning_trees" name="Thinning_trees">
+    <description>Sub-sample trees from from a posterior distribution</description>
+    <command>
+		java -jar /home/galaxy/galaxy-dist/tool-data/shared/jars/phyutility.jar -tt $input1 -in $input2 -out $output 2>&amp;1
+    </command>
+    <inputs>
+	    <param name="input1" type="integer" value="0" label="sample every #" help="sampling frequency" />
+	    <param name="input2" type="data" format="txt" label="Input Trees File" help="newick or nexus" />
+    </inputs>
+    <outputs>
+	<data format="txt" name="output" label="${tool.name} on ${on_string}: Out file" />
+    </outputs>
+    <help>
+	Calls phyutility.jar -tt to sample from a trees file.
+	Trimming (or thinning) trees can be essential if other programs require less trees than are present in your files. Phyutility will thin these files to make them more manageable.
+    </help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/tree_support/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,5 @@
+Calculates support for nodes of a single tree (bootstrap) using a file of multiple trees
+
+(Smith, Dunn 2008)
+
+Tree Support requires Phyloinformatic Utility (phyutility.jar) to be installed on the system.
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/tree_support/tree_support.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,19 @@
+<tool id="tree_support" name="tree_support">
+	<description>Calculates support for nodes of a single tree (bootstrap) using a file of multiple trees</description>
+	<requirements>
+		<requirement type="package">phyutility</requirement>
+	</requirements>
+	<command>
+		java -jar /home/galaxy/galaxy-dist/tool-data/shared/jars/phyutility.jar -ts -in $treesfile -tree $besttree -out $outtree
+	</command>
+	<inputs>
+		<param format="txt" name="treesfile" type="data" label="Input trees file" help="A file of multiple trees in newick format"/>
+		<param format="txt" name="besttree" type="data" label="Target tree" help="Tree to find support for"/>
+	</inputs>
+	<outputs>
+		<data format="txt" name="outtree" label="Support by ${tool.name}" />
+	</outputs>
+	<help>
+	Calls phyutility
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/vert_tree_format/README.txt	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,5 @@
+Convert between phylogenetic tree file formats
+
+(Smith, Dunn 2008)
+
+Vert_tree_format requires Phyloinformatic Utility (phyutility.jar) to be installed.
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/vert_tree_format/vert_tree_format.xml	Sat Sep 08 15:33:34 2012 -0400
@@ -0,0 +1,16 @@
+<tool id="vert_tree_format" name="vert_tree_format">
+    <description>Convert between phylogenetic tree file formats</description>
+    <command>
+		java -jar /home/galaxy/galaxy-dist/tool-data/shared/jars/phyutility.jar -vert -in $input -out $output 2>&amp;1
+    </command>
+    <inputs>
+	    <param name="input" type="data" format="txt" label="Input Tree File" help="newick or nexus" />
+    </inputs>
+    <outputs>
+	<data format="txt" name="output" label="${tool.name} on ${on_string}: Out file" />
+    </outputs>
+    <help>
+	Calls phyutility.jar -vert to convert tree format.
+	The program automatically reads the tree format, so if starting from newick, will change to nexus; and vice versa.
+    </help>
+</tool>