Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
diff phylogenies/genetree_read_placement.pl @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phylogenies/genetree_read_placement.pl Tue Mar 11 12:19:13 2014 -0700 @@ -0,0 +1,70 @@ +#! /usr/bin/perl -w + +use strict; +use warnings; + +##For debugging command line pass, uncomment next +#for (my $i=0; $i < @ARGV; $i++){ +# print "Parameter #$i ".$ARGV[$i]."\n\n"; +#} +#exit; + +my $newgenes=shift(@ARGV); #0 new genes to align +my $align = shift(@ARGV); #1 alignment program to use +my $path = shift(@ARGV); #2 path to tree and gene data +my $name = shift(@ARGV); #3 name of gene family + +#If $newgenes has not hits, do not do read placement, just write tree with no hits +my $buffer; +my $lines = 0; +open(FILE, $newgenes) or die "Can't open `$newgenes': $!"; +while (sysread FILE, $buffer, 4096) { + $lines += ($buffer =~ tr/\n//); +} +close FILE; + +if($lines < 1){ + print "No hits found. Skipping read placement\n Tree copied to output.\n"; + system "cp $path.tre RAxML_labelledTree.EPA_TEST"; + system "cp $path.tre RAxML_originalLabelledTree.EPA_TEST"; +}else{ + + #First concatenate fasta files and align + system "cat $newgenes $path.fas > toalign.fas"; + + if($align eq "MUSCLE"){ + system "muscle -in toalign.fas -out aligned.fas"; + } + elsif($align eq "MAFFT") { + system "mafft --auto toalign.fas > aligned.fas"; + } + elsif($align eq "PRANK") { + system "prank -d=toalign.fas -o=aligned -f=fasta -F"; + system "mv aligned.2.fas aligned.fas"; + } + + #convert to phylip format, uses seqConverter.pl + system "/home/galaxy/galaxy-dist/tools/oakley_dev/seqConverterG.pl -daligned.fas -ope -Oaligned.phy"; + + system "raxmlHPC-PTHREADS-SSE3 -f v -s aligned.phy -m PROTGAMMAWAG -t $path.tre -n EPA_TEST -T 4"; +} + +#Now make tab delimited file to use in tab2trees +#open treefile to read tree line +open(TREE, "<","RAxML_labelledTree.EPA_TEST") or die "Can't open RESULT File!"; +my $finaltree; +while (<TREE>){ + if($_ =~ /\;/m){ + $finaltree = $_; + chomp($finaltree); + } +} +close TREE; + +$name =~ s/ /_/g; +chomp($name); +#remove clade labels +$finaltree =~ s/\[I\d+\]//g; +open(TAB, '>treeout.tab') or die "Can't open File!"; +print TAB $name."\t".$finaltree."\n"; +close TAB;