Mercurial > repos > edward-kirton > minimus2
view minimus2/minimus2_wrapper.pl @ 0:937ba44abdb7 default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | edward-kirton |
---|---|
date | Tue, 07 Jun 2011 17:29:43 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env perl use strict; use Getopt::Long; use Env qw(TMPDIR TEMPDIR); my $usage=<<'ENDHERE'; NAME: minimus2_wrapper.pl PURPOSE: To combine two sets of assembled contig sequences. This script wraps Minimus2, part of the AMOS package. REQUIRED ARGUMENTS: $1 : infile1 in Fasta format $2 : infile2 in Fasta format $4 : outfile of combined assembly in Fasta format $5 : outfile of singletons in Fasta format OPTIONS: -tmpdir <dir> : path of temporary directory to use (optional); tempfiles will be discarded upon completion -prefix1 <string> : rename reads in infile1 using <prefix>.<counter> format -prefix2 <string> : rename reads in infile2 using <prefix>.<counter> format NOTE: - Minimus2 will fail if there are duplicate IDs between infile1 and infile2; use prefix options to avoid this. ENDHERE # OPTIONS our $tmpdir; my ($help,$prefix1,$prefix2); GetOptions( 'tmpdir=s' => \$tmpdir, 'prefix1=s' => \$prefix1, 'prefix2=s' => \$prefix2, 'help' => \$help ); if ($help) { print $usage; exit; } # VALIDATE die("Expect exactly four arguments\n") unless @ARGV == 4; my ($infile1,$infile2,$contigs_outfile,$singletons_outfile)=@ARGV; if ($tmpdir) { unless (-d $tmpdir) { mkdir($tmpdir) or die("Unable to create tmpdir, $tmpdir\n"); } } elsif ($TMPDIR and -d $TMPDIR) { $tmpdir=$TMPDIR; } elsif ($TEMPDIR and -d $TEMPDIR) { $tmpdir=$TEMPDIR; } elsif (-d "/tmp") { $tmpdir="/tmp"; } elsif (-d "/scratch") { $tmpdir="/scratch"; } else { die("Tmpdir required\n"); } $tmpdir .= "/$$"; mkdir($tmpdir) or die("Unable to mkdir $tmpdir\n"); # CHECK EXECUTABLES my $toAmos=`which toAmos`; chomp $toAmos; dienice("toAmos executable not found\n") unless $toAmos and -f $toAmos; my $minimus2=`which minimus2`; chomp $minimus2; dienice("minimus2 executable not found\n") unless $minimus2 and -f $minimus2; my $deltafilter=`which delta-filter`; chomp $deltafilter; dienice("delta-filter executable not found\n") unless $deltafilter and -f $deltafilter; my $showcoords=`which show-coords`; chomp $showcoords; dienice("show-coords executable not found\n") unless $showcoords; # CONCATENATE INFILES open(IN1, "<$infile1") or dienice("Unable to open infile1, $infile1\n"); my $infile="$tmpdir/infile.seq"; open(OUT, ">$infile") or dienice("Unable to open tmpfile, $infile\n"); my $n1=0; while (<IN1>) { if (/^>/) { ++$n1; if ($prefix1) { print OUT ">$prefix1.$n1\n"; } else { print OUT; } } else { print OUT; } } close IN1; dienice("Infile 1 contains no sequences or is not in Fasta format\n") unless $n1; my $n2=0; open(IN2, "<$infile2") or dienice("Unable to open infile2, $infile2\n"); while (<IN2>) { if (/^>/) { ++$n2; if ($prefix2) { print OUT ">$prefix2.$n2\n"; } else { print OUT; } } else { print OUT; } } close IN2; close OUT; dienice("Infile 2 contains no sequences or is not in Fasta format\n") unless $n2; # CONVERT FORMAT eval { `toAmos -s $infile -o $tmpdir/infile.afg` }; dienice("ERROR CONVERTING TO AMOS FORMAT\n") if $@; # CO-ASSEMBLY # explicitly defining the delta-filter and show-coords executables is more robust eval { `minimus2 $tmpdir/infile -D REFCOUNT=$n1 -D DELTAFILTER=$deltafilter -D SHOWCOORDS=$showcoords` }; dienice("ERROR EXECUTING MINIMUS2\n") if $@; # MOVE FILES AND CLEANUP TMPDIR eval { `mv $tmpdir/infile.fasta $contigs_outfile` }; dienice("ERROR MOVING CONTIGS OUTFILE\n") if $@; eval { `mv $tmpdir/infile.singletons.seq $singletons_outfile` }; dienice("ERROR MOVING SINGLETONS OUTFILE\n") if $@; eval { `rm -rf $tmpdir` }; die("ERROR CLEANING UP TEMP DIR\n") if $@; exit; # CLEANUP TEMPFILES BEFORE QUITTING sub dienice { my $msg=shift; `rm -rf $tmpdir`; die($msg); } __END__