# HG changeset patch
# User edward-kirton
# Date 1307482183 14400
# Node ID 937ba44abdb72a4b784a42d568ef40d0d386071e
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
diff -r 000000000000 -r 937ba44abdb7 minimus2/minimus2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/minimus2/minimus2.xml Tue Jun 07 17:29:43 2011 -0400
@@ -0,0 +1,55 @@
+
+Merge two sets of assembled contig sequences
+minimus2_wrapper.pl -tmpdir $contigs_outfile.extra_files_path $infile1 $infile2 $contigs_outfile $singletons_outfile
+#if $prefix1.select == 'y':
+-prefix1 $prefix1.prefix
+#end if
+#if $prefix2.select == 'y':
+-prefix2 $prefix2.prefix
+#end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+minimus2 is part of the AMOS assembler package, designed for merging one or two sets of contig sequences.
+
+This tool preprocesses the Fasta input files prior to coassembly with minimus2 and separately returns the new contigs
+and the sequences which did not co-assemble (singletons).
+
+The input sequences in each dataset must have unique IDs; use of the optional rename with prefix option avoids this.
+
+**Documentation**
+
+http://sourceforge.net/apps/mediawiki/amos/index.php?title=Minimus2
+
+
diff -r 000000000000 -r 937ba44abdb7 minimus2/minimus2_wrapper.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/minimus2/minimus2_wrapper.pl Tue Jun 07 17:29:43 2011 -0400
@@ -0,0 +1,132 @@
+#!/usr/bin/env perl
+
+use strict;
+use Getopt::Long;
+use Env qw(TMPDIR TEMPDIR);
+
+my $usage=<<'ENDHERE';
+NAME:
+ minimus2_wrapper.pl
+PURPOSE:
+ To combine two sets of assembled contig sequences. This script wraps Minimus2, part of the AMOS package.
+REQUIRED ARGUMENTS:
+ $1 : infile1 in Fasta format
+ $2 : infile2 in Fasta format
+ $4 : outfile of combined assembly in Fasta format
+ $5 : outfile of singletons in Fasta format
+OPTIONS:
+ -tmpdir
: path of temporary directory to use (optional); tempfiles will be discarded upon completion
+ -prefix1 : rename reads in infile1 using . format
+ -prefix2 : rename reads in infile2 using . format
+NOTE:
+ - Minimus2 will fail if there are duplicate IDs between infile1 and infile2; use prefix options to avoid this.
+ENDHERE
+
+# OPTIONS
+our $tmpdir;
+my ($help,$prefix1,$prefix2);
+GetOptions(
+ 'tmpdir=s' => \$tmpdir,
+ 'prefix1=s' => \$prefix1,
+ 'prefix2=s' => \$prefix2,
+ 'help' => \$help
+);
+if ($help) { print $usage; exit; }
+
+# VALIDATE
+die("Expect exactly four arguments\n") unless @ARGV == 4;
+my ($infile1,$infile2,$contigs_outfile,$singletons_outfile)=@ARGV;
+if ($tmpdir) {
+ unless (-d $tmpdir) {
+ mkdir($tmpdir) or die("Unable to create tmpdir, $tmpdir\n");
+ }
+} elsif ($TMPDIR and -d $TMPDIR) {
+ $tmpdir=$TMPDIR;
+} elsif ($TEMPDIR and -d $TEMPDIR) {
+ $tmpdir=$TEMPDIR;
+} elsif (-d "/tmp") {
+ $tmpdir="/tmp";
+} elsif (-d "/scratch") {
+ $tmpdir="/scratch";
+} else {
+ die("Tmpdir required\n");
+}
+$tmpdir .= "/$$";
+mkdir($tmpdir) or die("Unable to mkdir $tmpdir\n");
+
+# CHECK EXECUTABLES
+my $toAmos=`which toAmos`;
+chomp $toAmos;
+dienice("toAmos executable not found\n") unless $toAmos and -f $toAmos;
+my $minimus2=`which minimus2`;
+chomp $minimus2;
+dienice("minimus2 executable not found\n") unless $minimus2 and -f $minimus2;
+my $deltafilter=`which delta-filter`;
+chomp $deltafilter;
+dienice("delta-filter executable not found\n") unless $deltafilter and -f $deltafilter;
+my $showcoords=`which show-coords`;
+chomp $showcoords;
+dienice("show-coords executable not found\n") unless $showcoords;
+
+# CONCATENATE INFILES
+open(IN1, "<$infile1") or dienice("Unable to open infile1, $infile1\n");
+my $infile="$tmpdir/infile.seq";
+open(OUT, ">$infile") or dienice("Unable to open tmpfile, $infile\n");
+my $n1=0;
+while () {
+ if (/^>/) {
+ ++$n1;
+ if ($prefix1) {
+ print OUT ">$prefix1.$n1\n";
+ } else {
+ print OUT;
+ }
+ } else {
+ print OUT;
+ }
+}
+close IN1;
+dienice("Infile 1 contains no sequences or is not in Fasta format\n") unless $n1;
+my $n2=0;
+open(IN2, "<$infile2") or dienice("Unable to open infile2, $infile2\n");
+while () {
+ if (/^>/) {
+ ++$n2;
+ if ($prefix2) {
+ print OUT ">$prefix2.$n2\n";
+ } else {
+ print OUT;
+ }
+ } else {
+ print OUT;
+ }
+}
+close IN2;
+close OUT;
+dienice("Infile 2 contains no sequences or is not in Fasta format\n") unless $n2;
+
+# CONVERT FORMAT
+eval { `toAmos -s $infile -o $tmpdir/infile.afg` };
+dienice("ERROR CONVERTING TO AMOS FORMAT\n") if $@;
+
+# CO-ASSEMBLY
+# explicitly defining the delta-filter and show-coords executables is more robust
+eval { `minimus2 $tmpdir/infile -D REFCOUNT=$n1 -D DELTAFILTER=$deltafilter -D SHOWCOORDS=$showcoords` };
+dienice("ERROR EXECUTING MINIMUS2\n") if $@;
+
+# MOVE FILES AND CLEANUP TMPDIR
+eval { `mv $tmpdir/infile.fasta $contigs_outfile` };
+dienice("ERROR MOVING CONTIGS OUTFILE\n") if $@;
+eval { `mv $tmpdir/infile.singletons.seq $singletons_outfile` };
+dienice("ERROR MOVING SINGLETONS OUTFILE\n") if $@;
+eval { `rm -rf $tmpdir` };
+die("ERROR CLEANING UP TEMP DIR\n") if $@;
+exit;
+
+# CLEANUP TEMPFILES BEFORE QUITTING
+sub dienice {
+ my $msg=shift;
+ `rm -rf $tmpdir`;
+ die($msg);
+}
+__END__