# HG changeset patch
# User edward-kirton
# Date 1307483536 14400
# Node ID 4afe13ac23b677b83ec4e9aed4621ff70a88411e
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
diff -r 000000000000 -r 4afe13ac23b6 velvet/jgi_assembly.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/velvet/jgi_assembly.py Tue Jun 07 17:52:16 2011 -0400
@@ -0,0 +1,117 @@
+"""
+Assembly classes
+"""
+
+import data
+import logging
+import re
+import string
+from cgi import escape
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes import metadata
+import galaxy.model
+from galaxy import util
+from sniff import *
+
+log = logging.getLogger(__name__)
+
+class Assembly( data.Text ):
+ """Class describing an assembly"""
+
+ """Add metadata elements"""
+ MetadataElement( name="contigs", default=0, desc="Number of contigs", readonly=True, visible=False, optional=True, no_value=0 )
+ MetadataElement( name="reads", default=0, desc="Number of reads", readonly=True, visible=False, optional=True, no_value=0 )
+
+
+class Ace(Assembly):
+ """Class describing an assembly Ace file"""
+
+ file_ext = "ace"
+
+# def init_meta( self, dataset, copy_from=None ):
+# Assembly.init_meta( self, dataset, copy_from=copy_from )
+
+ def set_meta( self, dataset, overwrite=True, **kwd ):
+ """
+ Set the number of assembled contigs and read sequences and the number of data lines in dataset.
+ """
+ contigs = 0
+ reads = 0
+ for line in file( dataset.file_name ):
+ line = line.strip()
+ if line and line.startswith( '#' ):
+ # Don't count comment lines
+ continue
+ if line and line.startswith( 'CO' ):
+ contigs += 1
+ if line and line.startswith( 'RD' ):
+ reads += 1
+ dataset.metadata.contigs = contigs
+ dataset.metadata.reads = reads
+
+ def set_peek( self, dataset, is_multi_byte=False ):
+ if not dataset.dataset.purged:
+ dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
+ if dataset.metadata.contigs:
+ dataset.blurb = "%s contigs" % util.commaify( str( dataset.metadata.contigs ) )
+ else:
+ dataset.blurb = data.nice_size( dataset.get_size() )
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disk'
+
+ def sniff( self, filename ):
+ """
+ Determines whether the file is in ace format
+
+ An ace file contains these sections
+ AS \d+ \d+
+
+ CO \S+ \d+ \d+ \d+ \w
+ [atcgATCGN\*]+
+
+ BQ
+ [\d\s]+
+
+ AF \S+ [CU] \-?\d+
+
+ BS \d+ \d+ \S+
+
+ RD \S+ \d+ \d+ \d+
+ [ATCGN\*]+
+
+ QA \d+ \d+ \d+ \d+
+ DS .*
+
+ Currently we only check if file begins with AS
+
+ >>> fname = get_test_fname( 'genome.ace' )
+ >>> Ace().sniff( fname )
+ True
+ >>> fname = get_test_fname( 'genome.fasta' )
+ >>> Ace().sniff( fname )
+ False
+ """
+
+ try:
+ fh = open( filename )
+ line = fh.readline()
+ line = line.strip()
+ if line:
+ if line.startswith( 'AS ' ):
+ fh.close()
+ return True
+ fh.close()
+ return False
+ except:
+ pass
+ return False
+
+class Velveth(Assembly):
+ composite_type='basic'
+ file_ext = "txt"
+
+ def __init__(self,**kwd):
+ Assembly.__init__(self,**kwd)
+ self.add_composite_file('Roadmap')
+ self.add_composite_file('Sequences')
diff -r 000000000000 -r 4afe13ac23b6 velvet/suite_config.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/velvet/suite_config.xml Tue Jun 07 17:52:16 2011 -0400
@@ -0,0 +1,9 @@
+
+ Velvet assembler (a different version than in galaxy-central)
+
+ Hash reads
+
+
+ Generate contigs
+
+
diff -r 000000000000 -r 4afe13ac23b6 velvet/velvetg_jgi.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/velvet/velvetg_jgi.xml Tue Jun 07 17:52:16 2011 -0400
@@ -0,0 +1,134 @@
+
+Assemble preprocessed reads
+velvetg_jgi_wrapper.pl
+$velveth.extra_files_path
+$velvetg_outfile.extra_files_path
+$velvetg_outfile
+$contigs_outfile
+$stats_outfile
+$lastgraph_outfile
+$unused_reads_outfile
+$amos_outfile
+velvetg $velvetg_outfile.extra_files_path
+-very_clean yes
+#if $ins_length != '':
+-ins_length $ins_length
+ #if $ins_length_sd != '':
+ -ins_length_sd $ins_length_sd
+ #end if
+#end if
+#if $ins_length2 != '':
+-ins_length2 $ins_length2
+ #if $ins_length2_sd != '':
+ -ins_length2_sd $ins_length2_sd
+ #end if
+#end if
+#if $ins_length3 != '':
+-ins_length3 $ins_length3
+ #if $ins_length3_sd != '':
+ -ins_length3_sd $ins_length3_sd
+ #end if
+#end if
+#if $ins_length4 != '':
+-ins_length4 $ins_length4
+ #if $ins_length4_sd != '':
+ -ins_length4_sd $ins_length4_sd
+ #end if
+#end if
+#if $ins_length5 != '':
+-ins_length5 $ins_length5
+ #if $ins_length5_sd != '':
+ -ins_length5_sd $ins_length5_sd
+ #end if
+#end if
+#if $ins_length_long != '':
+-ins_length_long $ins_length_long
+ #if $ins_length_long_sd != '':
+ -ins_length_long_sd $ins_length_long_sd
+ #end if
+#end if
+-exp_cov $exp_cov
+-cov_cutoff $cov_cutoff
+-long_cov_cutoff $long_cov_cutoff
+-max_coverage $max_coverage
+-min_contig_lgth $min_contig_lgth
+-read_trkg $read_trkg
+-amos_file $amos_file
+-unused_reads $unused_reads
+-max_branch_length $max_branch_length
+-max_divergence $max_divergence
+-max_gap_count $max_gap_count
+-scaffolding $scaffolding
+-long_mult_cutoff $long_mult_cutoff
+-min_pair_count $min_pair_count
+-alignments $alignments
+-exportFiltered $exportFiltered
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ unused_reads is True
+
+
+ amos_file is True
+
+
+
+
+
+**What it does**
+
+This tool assembles contigous sequences from preprocessed reads by constructing a de Bruijn graph.
+
+Sequences must be preprocessed by the velveth tool.
+
+**Manual**
+
+http://www.ebi.ac.uk/~zerbino/velvet/Manual.pdf
+
+
diff -r 000000000000 -r 4afe13ac23b6 velvet/velvetg_jgi_wrapper.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/velvet/velvetg_jgi_wrapper.pl Tue Jun 07 17:52:16 2011 -0400
@@ -0,0 +1,66 @@
+#!/usr/bin/env perl
+
+# Conventience wrapper for velvetg; copies outfiles to galaxy-specified destinations.
+# Please email bugs/feature requests to Edward Kirton (ESKirton@LBL.gov)
+#
+# History:
+# - 2010/03/04 : file created
+# - 2001/02/05 : added new options, outfiles; renamed to velvetg_jgi to avoid collision with the other velvetg tool
+
+use strict;
+use warnings;
+use File::Copy;
+
+# shift wrapper args
+my $velveth_path=shift @ARGV or die;
+my $velvetg_path=shift @ARGV or die;
+my $velvetg_outfile=shift @ARGV or die;
+my $contigs_outfile=shift @ARGV or die;
+my $stats_outfile=shift @ARGV or die;
+my $lastgraph_outfile=shift @ARGV or die;
+my $unused_reads_outfile=shift @ARGV or die;
+my $amos_outfile=shift @ARGV or die;
+
+# setup velvetg folder
+die("Velveth folder does not exist: $velveth_path\n") unless -d $velveth_path;
+-d $velvetg_path or mkdir($velvetg_path) or die("Unable to create output folder, $velvetg_path: $!\n");
+die("velveth Sequences file does not exist: $velveth_path/Sequences") unless -f "$velveth_path/Sequences";
+symlink("$velveth_path/Sequences", "$velvetg_path/Sequences");
+die("velveth Roadmaps file does not exist: $velveth_path/Roadmaps") unless -f "$velveth_path/Roadmaps";
+symlink("$velveth_path/Roadmaps", "$velvetg_path/Roadmaps");
+die("velveth Log file does not exist: $velveth_path/Log") unless -f "$velveth_path/Log";
+copy("$velveth_path/Log", "$velvetg_path/Log");
+
+# run command (remaining args, starting with exe path)
+open (VELVETG, "@ARGV|") or die("Unable to run velvetg\n");
+open (OUT, ">$velvetg_outfile") or die("Unable to open outfile, $velvetg_outfile: $!\n");
+while () {
+ print OUT $_;
+ print if /^Final graph/;
+}
+close VELVETG;
+close OUT;
+
+# process output
+unlink($contigs_outfile);
+move("$velvetg_path/contigs.fa", $contigs_outfile);
+unlink($stats_outfile);
+move("$velvetg_path/stats.txt", $stats_outfile);
+
+unlink($lastgraph_outfile);
+if ( -f "$velvetg_path/LastGraph") {
+ move("$velvetg_path/LastGraph", $lastgraph_outfile);
+} elsif ( -f "$velvetg_path/Graph2") {
+ move("$velvetg_path/Graph2", $lastgraph_outfile);
+} else {
+ open(OUT, ">$lastgraph_outfile") or die($!);
+ print OUT "ERROR: $velvetg_path/LastGraph not found!\n";
+ close OUT;
+}
+unlink($unused_reads_outfile);
+move("$velvetg_path/UnusedReads.fa", $unused_reads_outfile);
+if ( $amos_outfile ne 'None' ) {
+ unlink($amos_outfile);
+ move("$velvetg_path/velvet_asm.afg", $amos_outfile);
+}
+exit;
diff -r 000000000000 -r 4afe13ac23b6 velvet/velveth_jgi.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/velvet/velveth_jgi.xml Tue Jun 07 17:52:16 2011 -0400
@@ -0,0 +1,276 @@
+
+Preprocess sequences for velvet assembly
+velveth_jgi_wrapper.pl $output $output.extra_files_path
+velveth $output.extra_files_path $hash_length $strand_specific
+#for $i in $short.files
+$short.libtype $i.filetype.select $i.filetype.file
+#end for
+#for $i in $short2.files
+$short2.libtype $i.filetype.select $i.filetype.file
+#end for
+#for $i in $short3.files
+$short3.libtype $i.filetype.select $i.filetype.file
+#end for
+#for $i in $short4.files
+$short4.libtype $i.filetype.select $i.filetype.file
+#end for
+#for $i in $short5.files
+$short5.libtype $i.filetype.select $i.filetype.file
+#end for
+#for $i in $long.files
+$long.libtype $i.filetype.select $i.filetype.file
+#end for
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool preprocesses the sequence data using the specified hash length. A larger hash length increases sensitivity at the expense of read depth.
+
+**Manual**
+
+http://www.ebi.ac.uk/~zerbino/velvet/Manual.pdf
+
+
diff -r 000000000000 -r 4afe13ac23b6 velvet/velveth_jgi_wrapper.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/velvet/velveth_jgi_wrapper.pl Tue Jun 07 17:52:16 2011 -0400
@@ -0,0 +1,31 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+my $start=time;
+my $outfile=shift @ARGV;
+my $outdir=shift @ARGV;
+my $kmer=$ARGV[2];
+die ("USER ERROR: Hash length (kmer) must be odd!\n") unless $kmer % 2;
+my $tot_reads=0;
+open (VELVETH, "@ARGV 2>&1|") or die("Unable to run velveth: $!\n");
+open(OUT, ">$outfile") or die($!);
+while () {
+ print OUT $_;
+ if (/^\[\d+\.\d+\] (\d+) sequences found/) {
+ $tot_reads += $1;
+ }
+}
+close VELVETH;
+close OUT;
+die("No reads found\n") unless $tot_reads;
+my $sec=time-$start;
+my $min=int($sec/60);
+$sec -= ($min*60);
+my $hr=int($min/60);
+$min -= ($hr*60);
+print "$tot_reads processed in";
+print " $hr hr" if $hr;
+print " $min min" if $min;
+print " $sec sec\n";
+exit