# HG changeset patch # User edward-kirton # Date 1307483536 14400 # Node ID 4afe13ac23b677b83ec4e9aed4621ff70a88411e Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository diff -r 000000000000 -r 4afe13ac23b6 velvet/jgi_assembly.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/velvet/jgi_assembly.py Tue Jun 07 17:52:16 2011 -0400 @@ -0,0 +1,117 @@ +""" +Assembly classes +""" + +import data +import logging +import re +import string +from cgi import escape +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes import metadata +import galaxy.model +from galaxy import util +from sniff import * + +log = logging.getLogger(__name__) + +class Assembly( data.Text ): + """Class describing an assembly""" + + """Add metadata elements""" + MetadataElement( name="contigs", default=0, desc="Number of contigs", readonly=True, visible=False, optional=True, no_value=0 ) + MetadataElement( name="reads", default=0, desc="Number of reads", readonly=True, visible=False, optional=True, no_value=0 ) + + +class Ace(Assembly): + """Class describing an assembly Ace file""" + + file_ext = "ace" + +# def init_meta( self, dataset, copy_from=None ): +# Assembly.init_meta( self, dataset, copy_from=copy_from ) + + def set_meta( self, dataset, overwrite=True, **kwd ): + """ + Set the number of assembled contigs and read sequences and the number of data lines in dataset. + """ + contigs = 0 + reads = 0 + for line in file( dataset.file_name ): + line = line.strip() + if line and line.startswith( '#' ): + # Don't count comment lines + continue + if line and line.startswith( 'CO' ): + contigs += 1 + if line and line.startswith( 'RD' ): + reads += 1 + dataset.metadata.contigs = contigs + dataset.metadata.reads = reads + + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + if dataset.metadata.contigs: + dataset.blurb = "%s contigs" % util.commaify( str( dataset.metadata.contigs ) ) + else: + dataset.blurb = data.nice_size( dataset.get_size() ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def sniff( self, filename ): + """ + Determines whether the file is in ace format + + An ace file contains these sections + AS \d+ \d+ + + CO \S+ \d+ \d+ \d+ \w + [atcgATCGN\*]+ + + BQ + [\d\s]+ + + AF \S+ [CU] \-?\d+ + + BS \d+ \d+ \S+ + + RD \S+ \d+ \d+ \d+ + [ATCGN\*]+ + + QA \d+ \d+ \d+ \d+ + DS .* + + Currently we only check if file begins with AS + + >>> fname = get_test_fname( 'genome.ace' ) + >>> Ace().sniff( fname ) + True + >>> fname = get_test_fname( 'genome.fasta' ) + >>> Ace().sniff( fname ) + False + """ + + try: + fh = open( filename ) + line = fh.readline() + line = line.strip() + if line: + if line.startswith( 'AS ' ): + fh.close() + return True + fh.close() + return False + except: + pass + return False + +class Velveth(Assembly): + composite_type='basic' + file_ext = "txt" + + def __init__(self,**kwd): + Assembly.__init__(self,**kwd) + self.add_composite_file('Roadmap') + self.add_composite_file('Sequences') diff -r 000000000000 -r 4afe13ac23b6 velvet/suite_config.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/velvet/suite_config.xml Tue Jun 07 17:52:16 2011 -0400 @@ -0,0 +1,9 @@ + + Velvet assembler (a different version than in galaxy-central) + + Hash reads + + + Generate contigs + + diff -r 000000000000 -r 4afe13ac23b6 velvet/velvetg_jgi.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/velvet/velvetg_jgi.xml Tue Jun 07 17:52:16 2011 -0400 @@ -0,0 +1,134 @@ + +Assemble preprocessed reads +velvetg_jgi_wrapper.pl +$velveth.extra_files_path +$velvetg_outfile.extra_files_path +$velvetg_outfile +$contigs_outfile +$stats_outfile +$lastgraph_outfile +$unused_reads_outfile +$amos_outfile +velvetg $velvetg_outfile.extra_files_path +-very_clean yes +#if $ins_length != '': +-ins_length $ins_length + #if $ins_length_sd != '': + -ins_length_sd $ins_length_sd + #end if +#end if +#if $ins_length2 != '': +-ins_length2 $ins_length2 + #if $ins_length2_sd != '': + -ins_length2_sd $ins_length2_sd + #end if +#end if +#if $ins_length3 != '': +-ins_length3 $ins_length3 + #if $ins_length3_sd != '': + -ins_length3_sd $ins_length3_sd + #end if +#end if +#if $ins_length4 != '': +-ins_length4 $ins_length4 + #if $ins_length4_sd != '': + -ins_length4_sd $ins_length4_sd + #end if +#end if +#if $ins_length5 != '': +-ins_length5 $ins_length5 + #if $ins_length5_sd != '': + -ins_length5_sd $ins_length5_sd + #end if +#end if +#if $ins_length_long != '': +-ins_length_long $ins_length_long + #if $ins_length_long_sd != '': + -ins_length_long_sd $ins_length_long_sd + #end if +#end if +-exp_cov $exp_cov +-cov_cutoff $cov_cutoff +-long_cov_cutoff $long_cov_cutoff +-max_coverage $max_coverage +-min_contig_lgth $min_contig_lgth +-read_trkg $read_trkg +-amos_file $amos_file +-unused_reads $unused_reads +-max_branch_length $max_branch_length +-max_divergence $max_divergence +-max_gap_count $max_gap_count +-scaffolding $scaffolding +-long_mult_cutoff $long_mult_cutoff +-min_pair_count $min_pair_count +-alignments $alignments +-exportFiltered $exportFiltered + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + unused_reads is True + + + amos_file is True + + + + + +**What it does** + +This tool assembles contigous sequences from preprocessed reads by constructing a de Bruijn graph. + +Sequences must be preprocessed by the velveth tool. + +**Manual** + +http://www.ebi.ac.uk/~zerbino/velvet/Manual.pdf + + diff -r 000000000000 -r 4afe13ac23b6 velvet/velvetg_jgi_wrapper.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/velvet/velvetg_jgi_wrapper.pl Tue Jun 07 17:52:16 2011 -0400 @@ -0,0 +1,66 @@ +#!/usr/bin/env perl + +# Conventience wrapper for velvetg; copies outfiles to galaxy-specified destinations. +# Please email bugs/feature requests to Edward Kirton (ESKirton@LBL.gov) +# +# History: +# - 2010/03/04 : file created +# - 2001/02/05 : added new options, outfiles; renamed to velvetg_jgi to avoid collision with the other velvetg tool + +use strict; +use warnings; +use File::Copy; + +# shift wrapper args +my $velveth_path=shift @ARGV or die; +my $velvetg_path=shift @ARGV or die; +my $velvetg_outfile=shift @ARGV or die; +my $contigs_outfile=shift @ARGV or die; +my $stats_outfile=shift @ARGV or die; +my $lastgraph_outfile=shift @ARGV or die; +my $unused_reads_outfile=shift @ARGV or die; +my $amos_outfile=shift @ARGV or die; + +# setup velvetg folder +die("Velveth folder does not exist: $velveth_path\n") unless -d $velveth_path; +-d $velvetg_path or mkdir($velvetg_path) or die("Unable to create output folder, $velvetg_path: $!\n"); +die("velveth Sequences file does not exist: $velveth_path/Sequences") unless -f "$velveth_path/Sequences"; +symlink("$velveth_path/Sequences", "$velvetg_path/Sequences"); +die("velveth Roadmaps file does not exist: $velveth_path/Roadmaps") unless -f "$velveth_path/Roadmaps"; +symlink("$velveth_path/Roadmaps", "$velvetg_path/Roadmaps"); +die("velveth Log file does not exist: $velveth_path/Log") unless -f "$velveth_path/Log"; +copy("$velveth_path/Log", "$velvetg_path/Log"); + +# run command (remaining args, starting with exe path) +open (VELVETG, "@ARGV|") or die("Unable to run velvetg\n"); +open (OUT, ">$velvetg_outfile") or die("Unable to open outfile, $velvetg_outfile: $!\n"); +while () { + print OUT $_; + print if /^Final graph/; +} +close VELVETG; +close OUT; + +# process output +unlink($contigs_outfile); +move("$velvetg_path/contigs.fa", $contigs_outfile); +unlink($stats_outfile); +move("$velvetg_path/stats.txt", $stats_outfile); + +unlink($lastgraph_outfile); +if ( -f "$velvetg_path/LastGraph") { + move("$velvetg_path/LastGraph", $lastgraph_outfile); +} elsif ( -f "$velvetg_path/Graph2") { + move("$velvetg_path/Graph2", $lastgraph_outfile); +} else { + open(OUT, ">$lastgraph_outfile") or die($!); + print OUT "ERROR: $velvetg_path/LastGraph not found!\n"; + close OUT; +} +unlink($unused_reads_outfile); +move("$velvetg_path/UnusedReads.fa", $unused_reads_outfile); +if ( $amos_outfile ne 'None' ) { + unlink($amos_outfile); + move("$velvetg_path/velvet_asm.afg", $amos_outfile); +} +exit; diff -r 000000000000 -r 4afe13ac23b6 velvet/velveth_jgi.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/velvet/velveth_jgi.xml Tue Jun 07 17:52:16 2011 -0400 @@ -0,0 +1,276 @@ + +Preprocess sequences for velvet assembly +velveth_jgi_wrapper.pl $output $output.extra_files_path +velveth $output.extra_files_path $hash_length $strand_specific +#for $i in $short.files +$short.libtype $i.filetype.select $i.filetype.file +#end for +#for $i in $short2.files +$short2.libtype $i.filetype.select $i.filetype.file +#end for +#for $i in $short3.files +$short3.libtype $i.filetype.select $i.filetype.file +#end for +#for $i in $short4.files +$short4.libtype $i.filetype.select $i.filetype.file +#end for +#for $i in $short5.files +$short5.libtype $i.filetype.select $i.filetype.file +#end for +#for $i in $long.files +$long.libtype $i.filetype.select $i.filetype.file +#end for + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool preprocesses the sequence data using the specified hash length. A larger hash length increases sensitivity at the expense of read depth. + +**Manual** + +http://www.ebi.ac.uk/~zerbino/velvet/Manual.pdf + + diff -r 000000000000 -r 4afe13ac23b6 velvet/velveth_jgi_wrapper.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/velvet/velveth_jgi_wrapper.pl Tue Jun 07 17:52:16 2011 -0400 @@ -0,0 +1,31 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +my $start=time; +my $outfile=shift @ARGV; +my $outdir=shift @ARGV; +my $kmer=$ARGV[2]; +die ("USER ERROR: Hash length (kmer) must be odd!\n") unless $kmer % 2; +my $tot_reads=0; +open (VELVETH, "@ARGV 2>&1|") or die("Unable to run velveth: $!\n"); +open(OUT, ">$outfile") or die($!); +while () { + print OUT $_; + if (/^\[\d+\.\d+\] (\d+) sequences found/) { + $tot_reads += $1; + } +} +close VELVETH; +close OUT; +die("No reads found\n") unless $tot_reads; +my $sec=time-$start; +my $min=int($sec/60); +$sec -= ($min*60); +my $hr=int($min/60); +$min -= ($hr*60); +print "$tot_reads processed in"; +print " $hr hr" if $hr; +print " $min min" if $min; +print " $sec sec\n"; +exit