changeset 0:f9e4e6fe0e73 default tip

Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author edward-kirton
date Tue, 07 Jun 2011 17:41:56 -0400
parents
children
files phrap/jgi_assembly.py phrap/phrap.xml phrap/phrap_wrapper.sh
diffstat 3 files changed, 214 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/phrap/jgi_assembly.py	Tue Jun 07 17:41:56 2011 -0400
@@ -0,0 +1,117 @@
+"""
+Assembly classes
+"""
+
+import data
+import logging
+import re
+import string
+from cgi import escape
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes import metadata
+import galaxy.model
+from galaxy import util
+from sniff import *
+
+log = logging.getLogger(__name__)
+
+class Assembly( data.Text ):
+    """Class describing an assembly"""
+
+    """Add metadata elements"""
+    MetadataElement( name="contigs", default=0, desc="Number of contigs", readonly=True, visible=False, optional=True, no_value=0 )
+    MetadataElement( name="reads", default=0, desc="Number of reads", readonly=True, visible=False, optional=True, no_value=0 )
+
+
+class Ace(Assembly):
+    """Class describing an assembly Ace file"""
+
+    file_ext = "ace"
+
+#    def init_meta( self, dataset, copy_from=None ):
+#        Assembly.init_meta( self, dataset, copy_from=copy_from )
+
+    def set_meta( self, dataset, overwrite=True, **kwd ):
+        """
+        Set the number of assembled contigs and read sequences and the number of data lines in dataset.
+        """
+        contigs = 0
+        reads = 0
+        for line in file( dataset.file_name ):
+            line = line.strip()
+            if line and line.startswith( '#' ):
+                # Don't count comment lines
+                continue
+            if line and line.startswith( 'CO' ):
+                contigs += 1
+            if line and line.startswith( 'RD' ):
+                reads += 1
+        dataset.metadata.contigs = contigs
+        dataset.metadata.reads = reads
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        if not dataset.dataset.purged:
+            dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
+            if dataset.metadata.contigs:
+                dataset.blurb = "%s contigs" % util.commaify( str( dataset.metadata.contigs ) )
+            else:
+                dataset.blurb = data.nice_size( dataset.get_size() )
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is in ace format
+
+        An ace file contains these sections 
+        AS  \d+ \d+
+
+        CO \S+ \d+ \d+ \d+ \w
+        [atcgATCGN\*]+
+
+        BQ
+        [\d\s]+
+
+        AF \S+ [CU] \-?\d+
+
+        BS \d+ \d+ \S+
+
+        RD \S+ \d+ \d+ \d+
+        [ATCGN\*]+
+
+        QA \d+ \d+ \d+ \d+
+        DS .*
+
+        Currently we only check if file begins with AS
+
+        >>> fname = get_test_fname( 'genome.ace' )
+        >>> Ace().sniff( fname )
+        True
+        >>> fname = get_test_fname( 'genome.fasta' )
+        >>> Ace().sniff( fname )
+        False
+        """
+
+        try:
+            fh = open( filename )
+            line = fh.readline()
+            line = line.strip()
+            if line:
+                if line.startswith( 'AS ' ):
+                    fh.close()
+                    return True
+            fh.close()
+            return False
+        except:
+            pass
+        return False
+
+class Velveth(Assembly):
+    composite_type='basic'
+    file_ext = "txt"
+
+    def __init__(self,**kwd):
+        Assembly.__init__(self,**kwd)
+        self.add_composite_file('Roadmap')
+        self.add_composite_file('Sequences')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/phrap/phrap.xml	Tue Jun 07 17:41:56 2011 -0400
@@ -0,0 +1,41 @@
+<tool id="phrap" name="Phrap" version="1.0.0">
+<description>Assemble long reads/short contigs</description>
+<command interpreter='bash'>phrap_wrapper.sh $label $fasta_infile $qual_infile $fasta_outfile $qual_outfile $singlets_outfile $fasta_outfile.extra_files_path</command>
+
+<inputs>
+    <param name="label" type="text" value="seqs" label="Label for naming contig sequences (e.g. library or sample name)" help="Contigs will be named LABEL.Contig1, ..." />
+    <param name="fasta_infile" type="data" format="fasta" label="Long reads or contig sequences" />
+    <param name="qual_infile" type="data" format="qual" optional='True' label="Optional sequence quality scores (Phred-scaled)" />
+</inputs>
+
+<outputs>
+    <data name="fasta_outfile" format="fasta" />
+    <data name="qual_outfile" format="qual" label="Contig quality scores" />
+    <data name="singlets_outfile" format="fasta" label="Singlets Fasta" />
+</outputs>
+
+<help>
+**What it does**
+
+phrap ("phragment assembly program", or "phil's revised assembly
+program"; a homonym of "frappe" = French for "swat") -- a
+program for assembling shotgun DNA sequence data.  Key features:
+allows use of entire read (not just trimmed high quality part); uses a
+combination of user-supplied and internally computed data quality
+information to improve accuracy of assembly in the presence of
+repeats; constructs contig sequence as a mosaic of the highest quality
+parts of reads (rather than a consensus); provides extensive information
+about assembly (including quality values for contig sequence) to
+assist trouble-shooting; able to handle very large datasets.
+
+**Notes**
+
+phrap is great for assembling Sanger shotgun reads but should not be used for next-generation data (e.g. Illumina, Solid, 454, etc.).
+However phrap is useful for combining short-read assemblies from Velvet or Abyss (i.e. results from using multiple k-mer parameters). 
+When used for this purpose, you should filter out short sequences first (e.g. less than 200-1000bp, depending on number of contigs).
+
+**Reference**
+
+http://www.phrap.org/phredphrap/phrap.html
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/phrap/phrap_wrapper.sh	Tue Jun 07 17:41:56 2011 -0400
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+if [ $# -ne 7 ]
+then
+    echo "ERROR: Expected exactly 7 arguments; got: $*" 1>&2
+    exit 1
+fi
+
+# GET ARGS
+LABEL=$1; shift
+FASTA_INFILE=$1; shift
+QUAL_INFILE=$1; shift
+FASTA_OUTFILE=$1; shift
+QUAL_OUTFILE=$1; shift
+SINGLETS_OUTFILE=$1; shift
+DIR=$1; shift
+
+if [ ! -e $FASTA_INFILE ]
+then
+    echo "Fasta infile not found: $FASTA_INFILE" 1>&2
+    exit 1
+fi
+
+# ALL FILES GO IN THIS extra_files_path
+mkdir $DIR
+cd $DIR
+ln -s $FASTA_INFILE ./$LABEL
+if [ $QUAL_INFILE != 'None' ]
+then
+    ln -s $QUAL_INFILE ./$LABEL.qual
+fi
+
+# RUN COMMAND
+phrap $LABEL 1> phrap.stdout 2> phrap.stderr
+if [ $? -ne 0 ]
+then
+    echo "COMMAND FAILURE; LOG:" 1>&2
+    cat phrap.stderr 1>&2
+    exit $?
+fi
+
+# SYMLINK OUTFILES
+link () { # args: src, dest
+    # if dest defined and src exist, then create symlink
+    if [[ $2 != 'None' && -e "$1" ]]
+    then
+        if [ -e $2 ]
+        then
+            rm $2
+        fi
+        ln -s $1 $2
+    fi
+}
+link $DIR/$LABEL.contigs $FASTA_OUTFILE
+link $DIR/$LABEL.contigs.qual $QUAL_OUTFILE
+link $DIR/$LABEL.singlets $SINGLETS_OUTFILE