Mercurial > repos > edward-kirton > minimus2
changeset 0:937ba44abdb7 default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | edward-kirton |
---|---|
date | Tue, 07 Jun 2011 17:29:43 -0400 |
parents | |
children | |
files | minimus2/minimus2.xml minimus2/minimus2_wrapper.pl |
diffstat | 2 files changed, 187 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/minimus2/minimus2.xml Tue Jun 07 17:29:43 2011 -0400 @@ -0,0 +1,55 @@ +<tool id="minimus2" name="Minimus2" version='1.0.1'> +<description>Merge two sets of assembled contig sequences</description> +<command interpreter='perl'>minimus2_wrapper.pl -tmpdir $contigs_outfile.extra_files_path $infile1 $infile2 $contigs_outfile $singletons_outfile +#if $prefix1.select == 'y': +-prefix1 $prefix1.prefix +#end if +#if $prefix2.select == 'y': +-prefix2 $prefix2.prefix +#end if +</command> +<inputs> + <param name="infile1" type="data" format="fasta" label="Contig sequences file 1"/> + <conditional name='prefix1'> + <param name='select' type='select' label='Rename contigs in file 1 by adding prefix?'> + <option value='y'>yes, add prefix</option> + <option value='n'>no, contig IDs are unique</option> + </param> + <when value='y'> + <param name="prefix" type="text" value="1" label="Prefix for sequences in file 1" /> + </when> + <when value='n'> + </when> + </conditional> + <param name="infile2" type="data" format="fasta" label="Contig sequences file 2"/> + <conditional name='prefix2'> + <param name='select' type='select' label='Rename contigs in file 2 by adding prefix?'> + <option value='y'>yes, add prefix</option> + <option value='n'>no, contig IDs are unique</option> + </param> + <when value='y'> + <param name="prefix" type="text" value="2" label="Prefix for sequences in file 2" /> + </when> + <when value='n'> + </when> + </conditional> +</inputs> +<outputs> + <data name="contigs_outfile" format="fasta" label="contigs" /> + <data name="singletons_outfile" format="fasta" label="singletons" /> +</outputs> +<help> +**What it does** + +minimus2 is part of the AMOS assembler package, designed for merging one or two sets of contig sequences. + +This tool preprocesses the Fasta input files prior to coassembly with minimus2 and separately returns the new contigs +and the sequences which did not co-assemble (singletons). + +The input sequences in each dataset must have unique IDs; use of the optional rename with prefix option avoids this. + +**Documentation** + +http://sourceforge.net/apps/mediawiki/amos/index.php?title=Minimus2 +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/minimus2/minimus2_wrapper.pl Tue Jun 07 17:29:43 2011 -0400 @@ -0,0 +1,132 @@ +#!/usr/bin/env perl + +use strict; +use Getopt::Long; +use Env qw(TMPDIR TEMPDIR); + +my $usage=<<'ENDHERE'; +NAME: + minimus2_wrapper.pl +PURPOSE: + To combine two sets of assembled contig sequences. This script wraps Minimus2, part of the AMOS package. +REQUIRED ARGUMENTS: + $1 : infile1 in Fasta format + $2 : infile2 in Fasta format + $4 : outfile of combined assembly in Fasta format + $5 : outfile of singletons in Fasta format +OPTIONS: + -tmpdir <dir> : path of temporary directory to use (optional); tempfiles will be discarded upon completion + -prefix1 <string> : rename reads in infile1 using <prefix>.<counter> format + -prefix2 <string> : rename reads in infile2 using <prefix>.<counter> format +NOTE: + - Minimus2 will fail if there are duplicate IDs between infile1 and infile2; use prefix options to avoid this. +ENDHERE + +# OPTIONS +our $tmpdir; +my ($help,$prefix1,$prefix2); +GetOptions( + 'tmpdir=s' => \$tmpdir, + 'prefix1=s' => \$prefix1, + 'prefix2=s' => \$prefix2, + 'help' => \$help +); +if ($help) { print $usage; exit; } + +# VALIDATE +die("Expect exactly four arguments\n") unless @ARGV == 4; +my ($infile1,$infile2,$contigs_outfile,$singletons_outfile)=@ARGV; +if ($tmpdir) { + unless (-d $tmpdir) { + mkdir($tmpdir) or die("Unable to create tmpdir, $tmpdir\n"); + } +} elsif ($TMPDIR and -d $TMPDIR) { + $tmpdir=$TMPDIR; +} elsif ($TEMPDIR and -d $TEMPDIR) { + $tmpdir=$TEMPDIR; +} elsif (-d "/tmp") { + $tmpdir="/tmp"; +} elsif (-d "/scratch") { + $tmpdir="/scratch"; +} else { + die("Tmpdir required\n"); +} +$tmpdir .= "/$$"; +mkdir($tmpdir) or die("Unable to mkdir $tmpdir\n"); + +# CHECK EXECUTABLES +my $toAmos=`which toAmos`; +chomp $toAmos; +dienice("toAmos executable not found\n") unless $toAmos and -f $toAmos; +my $minimus2=`which minimus2`; +chomp $minimus2; +dienice("minimus2 executable not found\n") unless $minimus2 and -f $minimus2; +my $deltafilter=`which delta-filter`; +chomp $deltafilter; +dienice("delta-filter executable not found\n") unless $deltafilter and -f $deltafilter; +my $showcoords=`which show-coords`; +chomp $showcoords; +dienice("show-coords executable not found\n") unless $showcoords; + +# CONCATENATE INFILES +open(IN1, "<$infile1") or dienice("Unable to open infile1, $infile1\n"); +my $infile="$tmpdir/infile.seq"; +open(OUT, ">$infile") or dienice("Unable to open tmpfile, $infile\n"); +my $n1=0; +while (<IN1>) { + if (/^>/) { + ++$n1; + if ($prefix1) { + print OUT ">$prefix1.$n1\n"; + } else { + print OUT; + } + } else { + print OUT; + } +} +close IN1; +dienice("Infile 1 contains no sequences or is not in Fasta format\n") unless $n1; +my $n2=0; +open(IN2, "<$infile2") or dienice("Unable to open infile2, $infile2\n"); +while (<IN2>) { + if (/^>/) { + ++$n2; + if ($prefix2) { + print OUT ">$prefix2.$n2\n"; + } else { + print OUT; + } + } else { + print OUT; + } +} +close IN2; +close OUT; +dienice("Infile 2 contains no sequences or is not in Fasta format\n") unless $n2; + +# CONVERT FORMAT +eval { `toAmos -s $infile -o $tmpdir/infile.afg` }; +dienice("ERROR CONVERTING TO AMOS FORMAT\n") if $@; + +# CO-ASSEMBLY +# explicitly defining the delta-filter and show-coords executables is more robust +eval { `minimus2 $tmpdir/infile -D REFCOUNT=$n1 -D DELTAFILTER=$deltafilter -D SHOWCOORDS=$showcoords` }; +dienice("ERROR EXECUTING MINIMUS2\n") if $@; + +# MOVE FILES AND CLEANUP TMPDIR +eval { `mv $tmpdir/infile.fasta $contigs_outfile` }; +dienice("ERROR MOVING CONTIGS OUTFILE\n") if $@; +eval { `mv $tmpdir/infile.singletons.seq $singletons_outfile` }; +dienice("ERROR MOVING SINGLETONS OUTFILE\n") if $@; +eval { `rm -rf $tmpdir` }; +die("ERROR CLEANING UP TEMP DIR\n") if $@; +exit; + +# CLEANUP TEMPFILES BEFORE QUITTING +sub dienice { + my $msg=shift; + `rm -rf $tmpdir`; + die($msg); +} +__END__