# HG changeset patch
# User ucsb-phylogenetics
# Date 1340568149 14400
# Node ID 5b23f3eb3f0989de863a3e2e2aca48fb7f943cc7
# Parent 3db16e8335e10830435082db681126c8ca3f5d55
Added sequence gap remover and Trimming Reads.
diff -r 3db16e8335e1 -r 5b23f3eb3f09 ucsb_phylogenetics/gap-rem/gap-rem.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/gap-rem/gap-rem.xml Sun Jun 24 16:02:29 2012 -0400
@@ -0,0 +1,29 @@
+
+ Removes gap in sequences
+ seqfill.pl $file $question_mark $hyphen $N $usePart $pfile
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ http://labs.eemb.ucsb.edu/oakley/todd/
+
+ Sequence Gap Remover
+
+ Takes an input phylip file and removes any specified gap characters that exist in the same
+ columns of containing sequences.
+
+ Output will be a text file.
+
+
+
diff -r 3db16e8335e1 -r 5b23f3eb3f09 ucsb_phylogenetics/gap-rem/gap_rem_README.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/gap-rem/gap_rem_README.txt Sun Jun 24 16:02:29 2012 -0400
@@ -0,0 +1,30 @@
+Sequence Gap Remover
+ http://labs.eemb.ucsb.edu/oakley/todd/
+
+ Sequence Gap Remover
+
+ Takes an input phylip file and removes any specified gap characters that exist in the same
+ columns of containing sequences.
+
+ Output will be a text file.
+
+Original Perl and Galaxy script implemented by: Roger Ngo and Todd H. Oakley, UCSB
+
+Sequence Gap Remover will remove any common gaps within a sequence in a phylip file.
+
+The package includes:
+
+* gap-rem.xml - the Galaxy tool
+* seqfill.pl - the Perl script
+* gap_rem_README.txt - the documentation file
+
+To install, copy the seqfill.pl and gap-rem.xml to a folder in the /galaxy-dist/tools/
+directory and add the XML file to tool_conf.xml.
+
+Restart the Galaxy instance with:
+
+./run.sh --stop-daemon
+
+and then
+
+./run.sh --reload --daemon
\ No newline at end of file
diff -r 3db16e8335e1 -r 5b23f3eb3f09 ucsb_phylogenetics/gap-rem/seqfill.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/gap-rem/seqfill.pl Sun Jun 24 16:02:29 2012 -0400
@@ -0,0 +1,147 @@
+#!/usr/bin/perl
+
+# Sequence Gap Remover
+
+# Todd H. Oakley and Roger Ngo, UCSB
+
+# 2012
+
+# Removes gaps in sequences from a phylip file.
+
+my $file = $ARGV[0];
+my $q_mark = $ARGV[1];
+my $hyphen = $ARGV[2];
+my $N = $ARGV[3];
+my $usePartFile = $ARGV[4];
+my $partFile = $ARGV[5];
+
+my $out = "out.phylipnon"; # output file
+
+open(FILE, $file);
+ my @speciesNames;
+ my @sequenceLines;
+
+ my @currentLineContent;
+
+ my $i = 0;
+ while($currentLine = ) {
+ chomp($currentLine);
+ @currentLineContent = split(/\t/, $currentLine);
+ $speciesNames[$i] = $currentLineContent[0];
+ $sequenceLines[$i] = $currentLineContent[1];
+ $i++;
+ }
+
+ my $dataInfo = $speciesNames[1]; # gets num of species and sequence length
+ my @numbers = split(/ /, $dataInfo);
+
+ my $numberOfSpecies = $numbers[0];
+ my $sequenceLength = $numbers[1];
+
+close(FILE);
+
+open(OUT, '>'.$out);
+ my @columnData; # this will have $sequenceLength elements
+ for($j = 0; $j < $numberOfSpecies+2; $j++) {
+ for($k = 0; $k < $sequenceLength; $k++) {
+ $currChar = substr($sequenceLines[$j], $k, 1);
+ $columnData[$k] = $columnData[$k].$currChar;
+ }
+ }
+
+ # mark locations that will be removed
+ my @flagMap;
+ for($i = 0; $i < $sequenceLength; $i++) {
+ $flagMap[$i] = 0;
+ }
+ my $index = 0;
+ foreach $el(@columnData) {
+ my $tot = 0;
+ my $q_mark_occur = 0;
+ my $hyphen_occur = 0;
+ my $N_occur = 0;
+
+ if($q_mark eq "true") {
+ $q_mark_occur = ($el =~ tr/?//);
+ }
+ if($hyphen eq "true") {
+ $hyphen_occur = ($el =~ tr/-//);
+ }
+ if($N eq "true") {
+ $N_occur = ($el =~ tr/N//);
+ }
+
+ $tot = $q_mark_occur + $hyphen_occur + $N_occur;
+ if($tot == $numberOfSpecies) {
+ $flagMap[$index] = 1;
+ }
+ $index++;
+ }
+
+ my $newSequenceLength = $sequenceLength;
+ foreach $el(@flagMap) {
+ if($el == 1) {
+ $newSequenceLength--;
+ }
+ }
+
+ print OUT $speciesNames[0]."\n";
+ print OUT $numberOfSpecies." ".$newSequenceLength."\n";
+ for($i = 2; $i < $numberOfSpecies+3; $i++) {
+ print OUT $speciesNames[$i]."\t";
+ for($j = 0; $j < $sequenceLength; $j++) {
+ if($flagMap[$j] == 0) {
+ my $character = substr($sequenceLines[$i], $j, 1);
+ print OUT $character;
+ }
+ }
+ print OUT "\n";
+ }
+
+close(OUT);
+
+my $partOut = "partOut.txt";
+
+if($usePartFile eq "true") {
+ # update the partition file
+ open(PART, $partFile);
+ my @data;
+ my @ranges;
+ my @names;
+ $i = 0;
+ while($currentLine = ) {
+ @data = split(/=/, $currentLine);
+ $names[$i] = $data[0];
+ $ranges[$i] = $data[1];
+ $i++;
+ }
+ close(PART);
+
+ my $firstFlag = 1;
+ open(PARTOUT, '>'.$partOut);
+ $j = 0;
+ my $newLower;
+ foreach $el(@ranges) {
+ print PARTOUT $names[$j]." = ";
+ @lowerUpper = split(/-/, $el);
+ if($firstFlag == 1) {
+ $newLower = $lowerUpper[0];
+ $firstFlag = 0;
+ }
+ my $currUpper = $lowerUpper[1];
+ my $newUpper = $currUpper;
+
+
+
+ for($i = $currLower; $i < $currUpper; $i++) {
+ if($flagMap[$i] == 1) {
+ $newUpper--;
+ }
+ }
+
+ print PARTOUT $newLower." - ".$newUpper."\n";
+ $newLower = $newUpper + 1;
+ $j++;
+ }
+ close(PARTOUT);
+}
diff -r 3db16e8335e1 -r 5b23f3eb3f09 ucsb_phylogenetics/phylobayes/pb.xml
--- a/ucsb_phylogenetics/phylobayes/pb.xml Thu Jun 21 17:48:46 2012 -0400
+++ b/ucsb_phylogenetics/phylobayes/pb.xml Sun Jun 24 16:02:29 2012 -0400
@@ -1,5 +1,8 @@
-
+
Runs phylobayes
+
+ Phylobayes 3.3b
+
pb.pl $filename $nchain $cycles $discrepancies $effectivesize $burnin $sampleInterval
diff -r 3db16e8335e1 -r 5b23f3eb3f09 ucsb_phylogenetics/phylobayes/phylobayes_wrapper.tar.bz2
Binary file ucsb_phylogenetics/phylobayes/phylobayes_wrapper.tar.bz2 has changed
diff -r 3db16e8335e1 -r 5b23f3eb3f09 ucsb_phylogenetics/phylomatic/phylomatic.xml
--- a/ucsb_phylogenetics/phylomatic/phylomatic.xml Thu Jun 21 17:48:46 2012 -0400
+++ b/ucsb_phylogenetics/phylomatic/phylomatic.xml Sun Jun 24 16:02:29 2012 -0400
@@ -1,5 +1,8 @@
-
+
Run Phylomatic
+
+ Phylocom Phylomatic
+
./phylomatic.pl $input1 $input2
diff -r 3db16e8335e1 -r 5b23f3eb3f09 ucsb_phylogenetics/trimmingreads/TrimmingReads.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ucsb_phylogenetics/trimmingreads/TrimmingReads.pl Sun Jun 24 16:02:29 2012 -0400
@@ -0,0 +1,310 @@
+#! /usr/bin/perl
+
+use strict;
+use warnings;
+use Getopt::Long;
+use File::Basename;
+use List::Util qw(sum min max);
+
+my $seqFormat = "a"; # 1: Sanger; 2: Solexa; 3: Illumina 1.3+; 4: Illumina 1.5+;
+my $subVal;
+
+# Parameter variables
+my $file;
+my $helpAsked;
+my $rTrimBases = 0;
+my $lTrimBases = 0;
+my $qCutOff = 0;
+my $lenCutOff = -1;
+my $outFile = "";
+my $isQualTrimming = 1;
+
+GetOptions(
+ "i=s" => \$file,
+ "h|help" => \$helpAsked,
+ "l|leftTrimBases=i" => \$lTrimBases,
+ "o|outputFile=s" => \$outFile,
+ "r|rightTrimBases=i" => \$rTrimBases,
+ "q|qualCutOff=i" => \$qCutOff,
+ "n|lenCutOff=i" => \$lenCutOff,
+ );
+if(defined($helpAsked)) {
+ prtUsage();
+ exit;
+}
+if(!defined($file)) {
+ prtError("No input files are provided");
+}
+
+my ($fileName, $filePath) = fileparse($file);
+$outFile = $file . "_trimmed" if($outFile eq "");
+
+open(I, "$file") or die "Can not open file $file\n";
+open(O, ">$outFile") or die "Can not create file $outFile\n";
+my $tmpLine = ;
+close(I);
+if($tmpLine =~ /^@/) {
+ print "Input read/sequence format: FASTQ\n";
+ if($lTrimBases == 0 && $rTrimBases == 0 && $qCutOff == 0 && $lenCutOff == -1) {
+ print "Trimming parameters are not set.\nNothing to do.\nExiting...\n";
+ unlink($outFile);
+ exit;
+ }
+ print "Checking FASTQ variant: File $file...\n";
+ my $nLines = checkFastQFormat($file, 1);
+ if($seqFormat == 1) {
+ $subVal = 33;
+ print "Input FASTQ file format: Sanger\n";
+ }
+ if($seqFormat == 2) {
+ $subVal = 64;
+ print "Input FASTQ file format: Solexa\n";
+ }
+ if($seqFormat == 3) {
+ $subVal = 64;
+ print "Input FASTQ file format: Illumina 1.3+\n";
+ }
+ if($seqFormat == 4) {
+ $subVal = 64;
+ print "Input FASTQ file format: Illumina 1.5+\n";
+ }
+ if($seqFormat == 5) {
+ $subVal = 33;
+ print "Input FASTQ file format: Illumina 1.8+\n";
+ }
+ if($lTrimBases != 0 || $rTrimBases != 0) {
+ print "Trimming $lTrimBases bases from left end and $rTrimBases bases from right end";
+ $isQualTrimming = 0;
+ }
+ else {
+ print "Trimming based on PHRED quality score (< $qCutOff)";
+ }
+ print " followed by length filtering (< $lenCutOff bp)\n";
+
+ open(I, "$file") or die "Can not open file $file\n";
+ my $c = 0;
+ my $currId1 = "";
+ my $currId2 = "";
+ my $currSeq = "";
+ my $currQual = "";
+
+
+ while(my $line = ) {
+ chomp $line;
+ $c++;
+ if($c == 5) {
+ $c = 1;
+ }
+ if($c == 1) {
+ $currId1 = $line;
+ }
+ if($c == 3) {
+ $currId2 = $line;
+ }
+ if($isQualTrimming == 0) {
+ if($c == 2) {
+ $currSeq = trimSeq($line);
+ }
+ elsif($c == 4) {
+ $currQual = trimSeq($line);
+ print O "$currId1\n$currSeq\n$currId2\n$currQual\n" if(length $currSeq >= $lenCutOff);
+ }
+ }
+ else {
+ if($c == 4) {
+ $currQual = trimSeq4Qual($line);
+ if(defined($currQual)) {
+ my $len = length $currQual;
+ $currSeq =~ /^(.{$len})/;
+ $currSeq = $1;
+ print O "$currId1\n$currSeq\n$currId2\n$currQual\n" if(length $currSeq >= $lenCutOff);
+ }
+ }
+ elsif($c == 2) {
+ $currSeq = $line;
+ }
+ }
+ }
+ print "Trimmed file is generated: $outFile\n";
+}
+else {
+ print "Input read/sequence format: FASTA\n";
+ if($qCutOff != 0) {
+ print "Warning: Quality trimming can not be performed for FASTA files\n";
+ $qCutOff = 0;
+ }
+ if($lTrimBases == 0 && $rTrimBases == 0 && $lenCutOff == -1) {
+ print "Trimming parameters are not set.\nNothing to do.\nExiting...\n";
+ unlink($outFile);
+ exit;
+ }
+ print "Trimming $lTrimBases bases from left end and $rTrimBases bases from right end followed by length filtering (< $lenCutOff bp)\n";
+ open(I, "$file") or die "Can not open file $file\n";
+ my $prevFastaSeqId = "";
+ my $fastaSeqId = "";
+ my $fastaSeq = "";
+
+ while(my $line = ) {
+ chomp $line;
+ if($line =~ /^>/) {
+ $prevFastaSeqId = $fastaSeqId;
+ $fastaSeqId = $line;
+ if($fastaSeq ne "") {
+ my $outSeq = trimSeq($fastaSeq);
+ print O "$prevFastaSeqId\n", formatSeq($outSeq), "\n" if(length $outSeq >= $lenCutOff);
+ }
+ $fastaSeq = "";
+ }
+ else {
+ $fastaSeq .= $line;
+ }
+ }
+ if($fastaSeq ne "") {
+ $prevFastaSeqId = $fastaSeqId;
+ my $outSeq = trimSeq($fastaSeq);
+ print O "$prevFastaSeqId\n", formatSeq($outSeq), "\n" if(length $outSeq >= $lenCutOff);
+ }
+ print "Trimmed read/sequence file is generated: $outFile\n";
+}
+close(O);
+close(I);
+
+
+sub trimSeq {
+ my $seq = $_[0];
+ $seq =~ /^.{$lTrimBases}(.+).{$rTrimBases}$/;
+ return $1;
+}
+
+sub trimSeq4Qual {
+ my $qual = $_[0];
+ my @ASCII = unpack("C*", $qual);
+ my $trimCount = 0;
+ for(my $i=@ASCII; $i>0; $i--) {
+ my $val = $ASCII[$i-1];
+ $val -= $subVal;
+ if($val < $qCutOff) {
+ $trimCount++;
+ }
+ else {
+ last;
+ }
+ }
+ $qual =~ /^(.+).{$trimCount}$/;
+ return $1;
+}
+
+sub formatSeq {
+ my $seq = $_[0];
+ my $newSeq = "";
+ my $ch = 60;
+ for(my $i=0; $i\n";
+ print " File containing reads/sequences in either FASTQ or FASTA format\n";
+ print "\n";
+ print "### Other options [Optional]\n";
+ print " -h | -help\n";
+ print " Prints this help\n";
+ print "--------------------------------- Trimming Options ---------------------------------\n";
+ print " -l | -leftTrimBases \n";
+ print " Number of bases to be trimmed from left end (5' end)\n";
+ print " default: 0\n";
+ print " -r | -rightTrimBases \n";
+ print " Number of bases to be trimmed from right end (3' end)\n";
+ print " default: 0\n";
+ print " -q | -qualCutOff (Only for FASTQ files)\n";
+ print " Cut-off PHRED quality score for trimming reads from right end (3' end)\n";
+ print " For eg.: -q 20, will trim bases having PHRED quality score less than 20 at 3' end of the read\n";
+ print " Note: Quality trimming can be performed only if -l and -r are not used\n";
+ print " default: 0 (i.e. quality trimming is OFF)\n";
+ print " -n | -lenCutOff \n";
+ print " Read length cut-off\n";
+ print " Reads shorter than given length will be discarded\n";
+ print " default: -1 (i.e. length filtering is OFF)\n";
+ print "--------------------------------- Output Options ---------------------------------\n";
+ print " -o | -outputFile