Mercurial > repos > rnateam > sortmerna

--- a/sortmerna.py	Mon Aug 03 08:18:26 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Runs SortMeRNA
-"""
-
-import subprocess
-import optparse
-import shlex
-
-
-def main():
-    """Parse the command line, exectutes SortMeRNA and buildtrie if neeeded."""
-    #TODO: Put all SortMeRNA options in the command-line parser
-    parser = optparse.OptionParser()
-    parser.add_option('--sortmerna', dest='sortmerna_cmd', help='')
-    parser.add_option('--buildtrie', dest='buildtrie',
-                      default=False, action='store_true', help='')
-    (options, args) = parser.parse_args()
-    if not args:
-        raise Exception('Please provide at least one database')
-
-    if options.buildtrie:
-        buildtrie = 'buildtrie'
-        for database in args:
-            run_buildtrie([buildtrie, '--db', database])
-
-    if options.sortmerna_cmd:
-        sortmerna = 'sortmerna'
-        run_sortmerna([sortmerna] +
-                      shlex.split(options.sortmerna_cmd) +
-                      ['-m', '262144', '-n', str(len(args)), '--db'] +
-                      args)
-
-
-def run_buildtrie(cmd):
-    """Run the BuildTrie program."""
-    try:
-        stdout_arg = subprocess.PIPE
-        stderr_arg = subprocess.PIPE
-        child_process = subprocess.Popen(args=" ".join(cmd), shell=True,
-                                         stdin=None, stdout=stdout_arg,
-                                         stderr=stderr_arg)
-        stdout_str, stderr_str = child_process.communicate()
-        return_code = child_process.returncode
-        if return_code is not 0:
-            raise Exception(stderr_str)
-
-    except Exception, error:
-        raise Exception('Error while running Buildtrie:\n' +
-                        '\n'.join([str(error), stdout_str, stderr_str]))
-
-
-def run_sortmerna(cmd):
-    """Run the SortMeRNA program."""
-    try:
-        stdout_arg = subprocess.PIPE
-        stderr_arg = subprocess.PIPE
-        child_process = subprocess.Popen(args=" ".join(cmd), shell=True,
-                                         stdin=None, stdout=stdout_arg,
-                                         stderr=stderr_arg)
-        stdout_str, stderr_str = child_process.communicate()
-        return_code = child_process.returncode
-        if return_code is not 0:
-            raise Exception(stderr_str)
-    except Exception, error:
-        raise Exception('Error while running SortMeRNA:\n' +
-                        '\n'.join([str(error), stdout_str, stderr_str]))
-
-
-if __name__ == "__main__":
-    main()
--- a/sortmerna.xml	Mon Aug 03 08:18:26 2015 -0400
+++ b/sortmerna.xml	Wed Aug 05 02:50:43 2015 -0400
@@ -1,7 +1,7 @@
-<tool id="bg_sortmerna" name="Filter with SortMeRNA" version="1.9.0">
+<tool id="bg_sortmerna" name="Filter with SortMeRNA" version="2.0.0">
     <description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description>
     <requirements>
-        <requirement type='package' version="1.9">sortmerna</requirement>
+        <requirement type='package' version="2.0">sortmerna</requirement>
     </requirements>
     <stdio>
         <regex match="This program builds a Burst trie on an input rRNA database"
@@ -18,145 +18,167 @@
 sortmerna --version 2>&1|grep 'SortMeRNA version'
 ]]>
     </version_command>
-    <command interpreter="python">
+    <command>
 <![CDATA[
-    sortmerna.py
-    --sortmerna "
-        $strand_search
-        #if str( $read_family.read_family_selector ) == 'other':
-            --I $input_reads -r $read_family.ratio_parameter
-        #else:
-            $read_family.read_family_selector $input_reads
-        #end if
-
-        #if str( $sequencing_type.sequencing_type_selector ) == 'paired':
-            $sequencing_type.paired_type
+    #set $ref = ''
+    #set $sep=''
+    #if str( $databases_type.databases_selector ) == 'history':
+        #for $db in $databases_type.database_name
+            #set $ref += $sep + str($db) + ',' + $os.path.splitext($os.path.basename(str($db)))[0]
+            #set $sep = ':'
+        #end for
+        indexdb_rna --ref $ref
+        &&
+    #else:
+        ## databases path is not directly accessible, must match by hand with LOC file contents
+        #set $data_table = dict([(_[0], _[2]) for _ in $databases_type.input_databases.input.options.tool_data_table.data])
+        #for $db in $databases_type.input_databases.value
+            #set $ref += $sep + $data_table[$db] + ',' + $os.path.splitext($data_table[$db])[0]
+            #set $sep = ':'
+        #end for
+    #end if
+    sortmerna --ref $ref --reads $input_reads --aligned aligned
+    #if str( $sequencing_type.sequencing_type_selector ) == 'paired'
+        $sequencing_type.paired_type
+    #end if
+    $strand_search
+    $aligned_fastx.aligned_fastx_selector
+    #if $aligned_fastx.aligned_fastx_selector == '--fastx'
+        #if $aligned_fastx.other
+            --other other_file
         #end if
-
-        #if $outputs_selected:
-            #if 'accept' in $outputs_selected.value:
-                --accept accept_file
-            #end if
-            #if 'other' in $outputs_selected.value:
-                --other other_file
-            #end if
-        #end if
-
-        $log
-        -a \${GALAXY_SLOTS:-4}
-        "
-        #if str( $databases_type.databases_selector ) == 'history':
-            --buildtrie
-            #for $db in $databases_type.input_databases
-                $db.database_name
-            #end for
-        #else:
-            ## databases path is not directly accessible, must match by hand with LOC file contents
-            ${' '.join([dict([(x[0], x[2]) for x in $databases_type.input_databases.input.options.tool_data_table.data])[y]
-                       for y in $databases_type.input_databases.value])}
-        #end if
+    #end if
+    $aligned_sam.aligned_sam_selector
+    #if $aligned_sam.aligned_sam_selector == '--sam'
+        $aligned_sam.sq
+    #end if
+    $aligned_blast
+    $log
+    -a \${GALAXY_SLOTS:-1}
 ]]>
     </command>
     <inputs>
-    <conditional name="read_family">
-        <param name="read_family_selector" type="select" format="text" label="Sequencing technology of querying sequences (reads)"
-            help="The Illumina platform is more common for large scale metatranscriptomic projects requiring a high throughput.">
-            <option value="--I">Illumina Solexa</option>
-            <option value="--454">454 Roche</option>
-            <option value="other">Other</option>
+        <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences" help="In FASTA or FASTQ format (--reads)"/>
+        <conditional name="sequencing_type">
+            <param name="sequencing_type_selector" type="select" label="Sequencing type">
+                <option value="not_paired">Reads are not paired</option>
+                <option value="paired">Reads are paired</option>
+            </param>
+            <when value="paired">
+                <param name="paired_type" type="select" display="radio" label="If one of the paired-end reads aligns and the other one does not">
+                    <option value="">leave the reads split between aligned and rejected files</option>
+                    <option value="--paired-in">output both reads to aligned file (--paired-in)</option>
+                    <option value="--paired-out">output both reads to rejected file (--paired-out)</option>
+                </param>
+            </when>
+        </conditional>
+
+        <param name="strand_search" type="select" label="Which strands to search" display="radio">
+            <option value="">Search both strands</option>
+            <option value="-F">Search only the forward strand (-F)</option>
+            <option value="-R">Search only the reverse-complementary strand (-R)</option>
         </param>
-        <when value="other">
-            <param name="ratio_parameter" type="float" value="1" min="0" max="1"
-                label="Ratio parameter (the number of hits on the read / read length)"
-                help="The ratio parameter for SortMeRNA has been set to r=0.25 for Illumina Solexa reads and to r=0.15 for 454 Roche reads.
-                    For other read types, if the sequencing technology produces high quality reads with a low substitution error rate
-                    (0.1 substitutions per 100 bases, such as Illumina), then the ratio parameter can be set to r=[0.23,0.27].
-                    If the sequencing technology has a high indel error rate (1-2 indels per 100 bases, such as 454 or Ion Torrent),
-                    then the ratio parameter can be set to r=[0.13,0.17] (-r)."/>
-        </when>
-    </conditional>
-    <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences (reads)" help=""/>
-    <conditional name="sequencing_type">
-        <param name="sequencing_type_selector" type="select" label="Sequencing type">
-            <option value="not_paired">Reads are not paired</option>
-            <option value="paired">Reads are paired</option>
-        </param>
-        <when value="paired">
-            <param name="paired_type" type="select" display="radio" label="If one read of a pair is accepted and the other not, output both reads"
-                help="SortMeRNA does not use the pairing information for filtering RNA,
-                    however if one read of a pair is accepted and the other is not,
-                    the resulting output may break apart the pair into two separate files.
-                    The purpose of 'Reads are paired' option is to preserve the pairing of the reads.">
-                <option value="--paired-in">to accepted file (--paired-in)</option>
-                <option value="--paired-out">to rejected file (--paired-out)</option>
-            </param>
-        </when>
-    </conditional>

-    <param name="strand_search" type="select" label="Which strands to search" display="radio">
-        <option value="">Search both strands</option>
-        <option value="-F">Search only the forward strand (-F)</option>
-        <option value="-R">Search only the reverse-complementary strand (-R)</option>
-    </param>
+        <conditional name="databases_type">
+            <param name="databases_selector" type="select" label="Databases to query"
+                help="Public rRNA databases provided with SortMeRNA have been indexed.
+                    On the contrary, personal databases must be indexed each time SortMeRNA is launched.
+                    Please be patient, this may take some time depending on the size of the given database.">
+                <option value="cached" selected="true">Public ribosomal databases</option>
+                <option value="history">Databases from your history</option>
+            </param>
+            <when value="cached">
+                <param name="input_databases" label="rRNA databases" type="select" display="checkboxes" multiple="true">
+                    <options from_data_table="rRNA_databases" />
+                    <validator type="no_options" message="Select at least one database"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="database_name" type="data" format="fasta" multiple="true" label="rRNA databases"
+                    help="Your databases will be indexed first, which may take up to several minutes."/>
+            </when>
+        </conditional>

-    <conditional name="databases_type">
-        <param name="databases_selector" type="select" label="Databases to query"
-            help="Public rRNA databases provided with SortMeRNA have been indexed.
-                On the contrary, personal databases must be indexed each time SortMeRNA is launched.
-                Please be patient, this may take some time depending on the size of the given database.">
-            <option value="cached" selected="true">Public ribosomal databases</option>
-            <option value="history">Databases from your history</option>
-        </param>
-        <when value="cached">
-            <param name="input_databases" label="rRNA database" type="select" display="checkboxes" multiple="true">
-                <options from_data_table="rRNA_databases" />
-                <validator type="no_options" message="Select at least one database"/>
+        <!-- Outputs -->
+        <conditional name="aligned_fastx">
+            <param name="aligned_fastx_selector" type="select" label="Include aligned reads in FASTA/FASTQ format">
+                <option value="--fastx">Yes (--fastx)</option>
+                <option value="">No</option>
             </param>
-        </when>
-        <when value="history">
-            <repeat name="input_databases" title="Database" min="1">
-                <param name="database_name" type="data" format="fasta" label="rRNA database"
-                    help="Your database will be indexed first, which may take up to several minutes."/>
-            </repeat>
-        </when>
-    </conditional>
-
-    <!-- Outputs -->
-    <param name="outputs_selected" type="select" display="checkboxes" multiple="true" label="Output options">
-        <option value="accept" selected="True">Reads matching to at least one database</option>
-        <option value="other">Reads not found in any database</option>
-    </param>
-    <param name="log" type="boolean" checked="False" truevalue="--log log_file" falsevalue="" label="Statistics file"
-           help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution. (--log)">
-    </param>
-
+            <when value="--fastx">
+                <param name="other" type="boolean" label="Include rejected reads file" help="(--other)" />
+            </when>
+            <when value="" />
+        </conditional>
+        <conditional name="aligned_sam">
+            <param name="aligned_sam_selector" type="select" label="Include alignments in SAM format">
+                <option value="--sam">Yes (--sam)</option>
+                <option value="">No</option>
+            </param>
+            <when value="--sam">
+                <param name="sq" type="boolean" truevalue="--SQ" falsevalue="" label="Add SQ tags to the SAM file" help="(--SQ)" />
+            </when>
+            <when value="" />
+        </conditional>
+        <param name="aligned_blast" type="select" label="Include alignments in BLAST-like format">
+            <option value="--blast 0">pairwise (--blast 0)</option>
+            <option value="--blast 1">tabular BLAST -m 8 format (--blast 1)</option>
+            <option value="--blast 2">tabular + column for CIGAR (--blast 2)</option>
+            <option value="--blast 3">tabular + columns for CIGAR and query coverage (--blast 3)</option>
+            <option value="" selected="true">No</option>
+        </param>
+        <param name="log" type="boolean" checked="False" truevalue="--log" falsevalue="" label="Generate statistics file"
+               help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution. (--log)">
+        </param>
     </inputs>
     <outputs>
-        <data format_source="input_reads" name="output_accept" from_work_dir="accept_file.dat"
-            label="Matching reads on ${on_string} (${input_reads.datatype.file_ext})">
-            <filter>outputs_selected and 'accept' in outputs_selected</filter>
+        <data format_source="input_reads" name="output_fastx" from_work_dir="aligned.dat"
+            label="Aligned reads on ${on_string} (${input_reads.datatype.file_ext})">
+            <filter>aligned_fastx['aligned_fastx_selector']</filter>
         </data>
         <data format_source="input_reads" name="output_other" from_work_dir="other_file.dat"
-            label="Reads not found on ${on_string} (${input_reads.datatype.file_ext})">
-            <filter>outputs_selected and 'other' in outputs_selected</filter>
+            label="Rejected reads on ${on_string} (${input_reads.datatype.file_ext})">
+            <filter>aligned_fastx['aligned_fastx_selector'] and aligned_fastx['other']</filter>
+        </data>
+        <data format="sam" name="output_sam" from_work_dir="aligned.sam"
+            label="Alignments on ${on_string} (SAM)">
+            <filter>aligned_sam['aligned_sam_selector']</filter>
         </data>
-        <data format="txt" name="output_log" label="${tool.name} statistics (txt)" from_work_dir="log_file.log">
+        <data format="tabular" name="output_blast" from_work_dir="aligned.blast"
+            label="Alignments on ${on_string} (BLAST)">
+            <filter>aligned_blast</filter>
+            <change_format>
+                <when input="aligned_blast" value="--blast 0" format="txt" />
+            </change_format>
+        </data>
+        <data format="txt" name="output_log" label="${tool.name} statistics (txt)" from_work_dir="aligned.log">
             <filter>log</filter>
         </data>
     </outputs>
     <tests>
         <test>
-            <param name="read_family_selector" value="I" />
-            <param name="input_reads" value="sortmerna_wrapper_in1.fastq" />
+            <param name="input_reads" value="read_small.fastq" />
             <param name="sequencing_type_selector" value="not_paired" />
             <param name="strand_search" value="" />
-            <param name="databases_selector" value="cached" />
-            <param name="input_databases" value="rfam-5.8s,rfam-5s" />
-            <param name="outputs_selected" value="accept,other" />
+            <param name="databases_selector" value="history" />
+            <param name="database_name" value="ref_small.fasta" />
+            <param name="other" value="True" />
             <param name="log" value="" />
-            <param name="options_type_selector" value="less" />
-            <output name="output_accept" file="sortmerna_wrapper_accept1.fastq" />
+            <output name="output_fastx" file="sortmerna_wrapper_accept1.fastq" />
             <output name="output_other" file="sortmerna_wrapper_other1.fastq" />
+            <output name="output_sam" file="sortmerna_wrapper_sam1.sam" lines_diff="2" />
+        </test>
+        <test>
+            <param name="input_reads" value="read_small.fasta" />
+            <param name="sequencing_type_selector" value="not_paired" />
+            <param name="strand_search" value="" />
+            <param name="databases_selector" value="history" />
+            <param name="database_name" value="ref_small.fasta" />
+            <param name="other" value="True" />
+            <param name="log" value="" />
+            <output name="output_fastx" file="sortmerna_wrapper_accept2.fasta" />
+            <output name="output_other" file="sortmerna_wrapper_other2.fasta" />
+            <output name="output_sam" file="sortmerna_wrapper_sam2.sam" lines_diff="2" />
         </test>
     </tests>
     <help>
--- a/test-data/merged-paired-reads_output.fastq	Mon Aug 03 08:18:26 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-@PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1
-CTCGCGCCACTGGCTGGCTCTCCCCTCTCTCTCTCTCTCTCCCCTCTCTCTCCCCGAGTACACCCGCGCGGGC
-+PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1
-___cccccggggghhhhf_YbcgecZYa^Y^ceXccacXIXaafX`_]VMHVbdhSTFKU_Z]b]WFW_aZ__
-@PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1
-CGCGCGCACCATGCCCGTGCTCGACTGCTCGCCGCCCGCGCCGCGCCACGCCTCGCCGTTCACCTTGACCTCCACGCGCAGGCC
-+PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1
-__beeeecggcgfhhhhhhhhhhfhhhhhhhhhhhgececcccccccacaccccaccccac`bbbcbbbcbbc[`_a[aLT[aa
-@PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1
-GCGGTCTGGAACCACCTGATCCCATCCCGAACTCAGCCGTGAAACAGACCAGCGCCGATGGTAGTGTGGCTTCTGCCCGT
-+PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1
-bbbeeeeegggggiiiiiiihiiihiiiiiiihiidhhigfhihiihgiiiiiggeccaccbbb_b`baccab`]_ba[T
-@PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1
-ATGCGCTTGAGGTAGCCAAGGGCACGCAGTTCACACCGTATGGCGGCGATGCGAGGCGGCGGCAGGAGG
-+PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1
-bbbeeeeegggggiiiiiiiiihfhiheghihgihiiihiiiiihifc^acac^_cEHVZaccaac^__
-@PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1
-GCATCGCCGCCCTCGCCCTCGCCCCCGCCGCCCTCGCCCT
-+PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1
-___c`cc`Yeee[ePY_`UY^_eGL_F_`dUU\F\^Q\a_
-@PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1
-TGACCAATGTTCTTCATAACGTCCCCGATAGACATGCCAACATTCGGTCCGTTGCGAAGATAGTCATCGCCGTCATAGTC
-+PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1
-_bbecdecgggggihhiiiihghhhiihiichdfghiffghfghiifiifhhhiiggaaaddddbdd`bccaQ\a`bc_b
-@PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1
-CCCCACTCACCACGCACGGCACCGCACGGCACTCTCACGGTTTCTCTCTCTCTCTCCCATCCTTCTCACTCCTTTCTCTCTCTCCCTATCTCTCTCTCTC
-+PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1
-bbbeeeeeggggghiiiiihfhifhhiihiagfhiiiiig^dceeeedcddddcccccRZ``bbcbcbbR_bb`b`bb`bbb`bbcbR]b_]]`bb_b_b
-@PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1
-GCATGCCAAACGTGGAGGCGGTGGGCGCGACGCCGGTGGGGAACTTGGACTTGTACATGGCCGGCCGCTGCGTCACCATCGA
-+PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1
-bbbeeeeeggggegifhiiii^fgh\edgffeeccc\acccWV_ccccbccccbbdccccccccaaacaZ]acaa^bccccb
-@PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1
-AGCCTGACGATAACCTACTTTCACACTGGTTGCAGCACTATCATCGGCGCAAAGTCGTTTCACGGTCCTGTTCGGGATGGGAAGGGGTGGGACCGACTTGC
-+PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1
-bbbeeeeegggggiiiiiiiiiihiiiiigiiiiiiiiiiiiiiiiiiiihiiggggeeeeddccccccccccccccccccbbccccHXaacccc]acacb
-@PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1
-GTTCGATTAGTCTTTCGCCCCTATACCCAAATTTGACGATCGATTTGCACGTCAGAATCGCTAC
-+PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1
-baaeeeeeggggghhifhiiihfgbghhhiihihfhihfhihdfghhiiifhiihdbfgddged
-@PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1
-GCCGAAGAAGGACGTGGGAATCTGCGATAAGCCTGGTGGAGTCGATAACCGGACGTTGAGACCAGGATTTCCGAATGGGGAAACCCCGCACGACGTGTCGT
-+PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1
-bbbeeeeegggggifgiiiiiiiiiihiihiiiiiicfhgheghiiihihggcecccccccccccccccccccccccccccaacccaccc__[_a[a^[_Q
-
-@PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1
-GCGCTCGAGGAGGACGTGGCGCCCGAGGCTGCCGAGGCTG
-+PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1
-Z__cccc`ecg^_fe^[^^^Y_ffc]_`[eZ_c_W\]]b`
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/read_small.fasta	Wed Aug 05 02:50:43 2015 -0400
@@ -0,0 +1,2 @@
+>read1
+GCCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAGAGTGGAATACCATGTGTAGCGGTGAAATGCGTAGATATATGGAGGAACACC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/read_small.fastq	Wed Aug 05 02:50:43 2015 -0400
@@ -0,0 +1,4 @@
+@read1
+GCCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAGAGTGGAATACCATGTGTAGCGGTGAAATGCGTAGATATATGGAGGAACACC
++read1
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ref_small.fasta	Wed Aug 05 02:50:43 2015 -0400
@@ -0,0 +1,2 @@
+>EncFa169 count=1; cluster_weight=27830; cluster=EncFa169; cluster_score=1.000000; cluster_center=True;
+AGAGTTTGATCCTGGCTCAGGACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAACGCTTCTTTCCTCCCGAGTGCTTGCACTCAATTGGAAAGAGGAGTGGCGGACGGGTGAGTAACACGTGGGTAACCTACCCATCAGAGGGGGATAACACTTGGAAACAGGTGCTAATACCGCATAACAGTTTATGCCGCATGGCATAAGAGTGAAAGGCGCTTTCGGGTGTCGCTGATGGATGGACCCGCGGTGCATTAGCTAGTTGGTGAGGTAACGGCTCACCAAGGCCACGATGCATAGCCGACCTGAGAGGGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCGGCAATGGACGAAAGTCTGACCGAGCAACGCCGCGTGAGTGAAGAAGGTTTTCGGATCGTAAAACTCTGTTGTTAGAGAAGAACAAGGACGTTAGTAACTGAACGTCCCCTGACGGTATCTAACCAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTCTTAAGTCTGATGTGAAAGCCCCCGGCTCAACCGGGGAGGGTCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAGAGTGGAATTCCATGTGTAGCGGTGAAATGCGTAGATATATGGAGGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTGGAGGGTTTCCGCCCTTCAGTGCTGCAGCAAACGCATTAAGCACTCCGCCTGGGGAGTACGACCGCAAGGTTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTTTGACCACTCTAGAGATAGAGCTTTCCCTTCGGGGACAAAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTATTGTTAGTTGCCATCATTTAGTTGGGCACTCTAGCGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGGAAGTACAACGAGTCGCTAGACCGCGAGGTCATGCAAATCTCTTAAAGCTTCTCTCAGTTCGGATTGCAGGCTGCAACTCGCCTGCATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCACGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTTTGGAGCCAGCCGCCTAAGGTGGGATAGATGATTGGGGTGAAGTCGTAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCT
--- a/test-data/sortmerna_wrapper_accept1.fastq	Mon Aug 03 08:18:26 2015 -0400
+++ b/test-data/sortmerna_wrapper_accept1.fastq	Wed Aug 05 02:50:43 2015 -0400
@@ -1,28 +1,4 @@
-@PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1
-CTCGCGCCACTGGCTGGCTCTCCCCTCTCTCTCTCTCTCTCCCCTCTCTCTCCCCGAGTACACCCGCGCGGGC
-+PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1
-___cccccggggghhhhf_YbcgecZYa^Y^ceXccacXIXaafX`_]VMHVbdhSTFKU_Z]b]WFW_aZ__
-@PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1
-GCGGTCTGGAACCACCTGATCCCATCCCGAACTCAGCCGTGAAACAGACCAGCGCCGATGGTAGTGTGGCTTCTGCCCGT
-+PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1
-bbbeeeeegggggiiiiiiihiiihiiiiiiihiidhhigfhihiihgiiiiiggeccaccbbb_b`baccab`]_ba[T
-@PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1
-GCATCGCCGCCCTCGCCCTCGCCCCCGCCGCCCTCGCCCT
-+PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1
-___c`cc`Yeee[ePY_`UY^_eGL_F_`dUU\F\^Q\a_
-@PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1
-CCCCACTCACCACGCACGGCACCGCACGGCACTCTCACGGTTTCTCTCTCTCTCTCCCATCCTTCTCACTCCTTTCTCTCTCTCCCTATCTCTCTCTCTC
-+PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1
-bbbeeeeeggggghiiiiihfhifhhiihiagfhiiiiig^dceeeedcddddcccccRZ``bbcbcbbR_bb`b`bb`bbb`bbcbR]b_]]`bb_b_b
-@PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1
-AGCCTGACGATAACCTACTTTCACACTGGTTGCAGCACTATCATCGGCGCAAAGTCGTTTCACGGTCCTGTTCGGGATGGGAAGGGGTGGGACCGACTTGC
-+PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1
-bbbeeeeegggggiiiiiiiiiihiiiiigiiiiiiiiiiiiiiiiiiiihiiggggeeeeddccccccccccccccccccbbccccHXaacccc]acacb
-@PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1
-GCCGAAGAAGGACGTGGGAATCTGCGATAAGCCTGGTGGAGTCGATAACCGGACGTTGAGACCAGGATTTCCGAATGGGGAAACCCCGCACGACGTGTCGT
-+PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1
-bbbeeeeegggggifgiiiiiiiiiihiihiiiiiicfhgheghiiihihggcecccccccccccccccccccccccccccaacccaccc__[_a[a^[_Q
-@PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1
-GCGCTCGAGGAGGACGTGGCGCCCGAGGCTGCCGAGGCTG
-+PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1
-Z__cccc`ecg^_fe^[^^^Y_ffc]_`[eZ_c_W\]]b`
+@read1
+GCCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAGAGTGGAATACCATGTGTAGCGGTGAAATGCGTAGATATATGGAGGAACACC
++read1
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sortmerna_wrapper_accept2.fasta	Wed Aug 05 02:50:43 2015 -0400
@@ -0,0 +1,2 @@
+>read1
+GCCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAGAGTGGAATACCATGTGTAGCGGTGAAATGCGTAGATATATGGAGGAACACC
--- a/test-data/sortmerna_wrapper_in1.fastq	Mon Aug 03 08:18:26 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-@PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1
-CGCGCGCACCATGCCCGTGCTCGACTGCTCGCCGCCCGCGCCGCGCCACGCCTCGCCGTTCACCTTGACCTCCACGCGCAGGCC
-+PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1
-__beeeecggcgfhhhhhhhhhhfhhhhhhhhhhhgececcccccccacaccccaccccac`bbbcbbbcbbc[`_a[aLT[aa
-@PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1
-ATGCGCTTGAGGTAGCCAAGGGCACGCAGTTCACACCGTATGGCGGCGATGCGAGGCGGCGGCAGGAGG
-+PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1
-bbbeeeeegggggiiiiiiiiihfhiheghihgihiiihiiiiihifc^acac^_cEHVZaccaac^__
-@PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1
-CTCGCGCCACTGGCTGGCTCTCCCCTCTCTCTCTCTCTCTCCCCTCTCTCTCCCCGAGTACACCCGCGCGGGC
-+PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1
-___cccccggggghhhhf_YbcgecZYa^Y^ceXccacXIXaafX`_]VMHVbdhSTFKU_Z]b]WFW_aZ__
-@PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1
-GCGGTCTGGAACCACCTGATCCCATCCCGAACTCAGCCGTGAAACAGACCAGCGCCGATGGTAGTGTGGCTTCTGCCCGT
-+PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1
-bbbeeeeegggggiiiiiiihiiihiiiiiiihiidhhigfhihiihgiiiiiggeccaccbbb_b`baccab`]_ba[T
-@PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1
-GCATCGCCGCCCTCGCCCTCGCCCCCGCCGCCCTCGCCCT
-+PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1
-___c`cc`Yeee[ePY_`UY^_eGL_F_`dUU\F\^Q\a_
-@PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1
-TGACCAATGTTCTTCATAACGTCCCCGATAGACATGCCAACATTCGGTCCGTTGCGAAGATAGTCATCGCCGTCATAGTC
-+PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1
-_bbecdecgggggihhiiiihghhhiihiichdfghiffghfghiifiifhhhiiggaaaddddbdd`bccaQ\a`bc_b
-@PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1
-GCATGCCAAACGTGGAGGCGGTGGGCGCGACGCCGGTGGGGAACTTGGACTTGTACATGGCCGGCCGCTGCGTCACCATCGA
-+PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1
-bbbeeeeeggggegifhiiii^fgh\edgffeeccc\acccWV_ccccbccccbbdccccccccaaacaZ]acaa^bccccb
-@PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1
-CCCCACTCACCACGCACGGCACCGCACGGCACTCTCACGGTTTCTCTCTCTCTCTCCCATCCTTCTCACTCCTTTCTCTCTCTCCCTATCTCTCTCTCTC
-+PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1
-bbbeeeeeggggghiiiiihfhifhhiihiagfhiiiiig^dceeeedcddddcccccRZ``bbcbcbbR_bb`b`bb`bbb`bbcbR]b_]]`bb_b_b
-@PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1
-AGCCTGACGATAACCTACTTTCACACTGGTTGCAGCACTATCATCGGCGCAAAGTCGTTTCACGGTCCTGTTCGGGATGGGAAGGGGTGGGACCGACTTGC
-+PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1
-bbbeeeeegggggiiiiiiiiiihiiiiigiiiiiiiiiiiiiiiiiiiihiiggggeeeeddccccccccccccccccccbbccccHXaacccc]acacb
-@PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1
-GCCGAAGAAGGACGTGGGAATCTGCGATAAGCCTGGTGGAGTCGATAACCGGACGTTGAGACCAGGATTTCCGAATGGGGAAACCCCGCACGACGTGTCGT
-+PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1
-bbbeeeeegggggifgiiiiiiiiiihiihiiiiiicfhgheghiiihihggcecccccccccccccccccccccccccccaacccaccc__[_a[a^[_Q
-@PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1
-GTTCGATTAGTCTTTCGCCCCTATACCCAAATTTGACGATCGATTTGCACGTCAGAATCGCTAC
-+PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1
-baaeeeeeggggghhifhiiihfgbghhhiihihfhihfhihdfghhiiifhiihdbfgddged
-@PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1
-GCGCTCGAGGAGGACGTGGCGCCCGAGGCTGCCGAGGCTG
-+PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1
-Z__cccc`ecg^_fe^[^^^Y_ffc]_`[eZ_c_W\]]b`
--- a/test-data/sortmerna_wrapper_other1.fastq	Mon Aug 03 08:18:26 2015 -0400
+++ b/test-data/sortmerna_wrapper_other1.fastq	Wed Aug 05 02:50:43 2015 -0400
@@ -1,20 +0,0 @@
-@PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1
-CGCGCGCACCATGCCCGTGCTCGACTGCTCGCCGCCCGCGCCGCGCCACGCCTCGCCGTTCACCTTGACCTCCACGCGCAGGCC
-+PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1
-__beeeecggcgfhhhhhhhhhhfhhhhhhhhhhhgececcccccccacaccccaccccac`bbbcbbbcbbc[`_a[aLT[aa
-@PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1
-ATGCGCTTGAGGTAGCCAAGGGCACGCAGTTCACACCGTATGGCGGCGATGCGAGGCGGCGGCAGGAGG
-+PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1
-bbbeeeeegggggiiiiiiiiihfhiheghihgihiiihiiiiihifc^acac^_cEHVZaccaac^__
-@PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1
-TGACCAATGTTCTTCATAACGTCCCCGATAGACATGCCAACATTCGGTCCGTTGCGAAGATAGTCATCGCCGTCATAGTC
-+PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1
-_bbecdecgggggihhiiiihghhhiihiichdfghiffghfghiifiifhhhiiggaaaddddbdd`bccaQ\a`bc_b
-@PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1
-GCATGCCAAACGTGGAGGCGGTGGGCGCGACGCCGGTGGGGAACTTGGACTTGTACATGGCCGGCCGCTGCGTCACCATCGA
-+PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1
-bbbeeeeeggggegifhiiii^fgh\edgffeeccc\acccWV_ccccbccccbbdccccccccaaacaZ]acaa^bccccb
-@PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1
-GTTCGATTAGTCTTTCGCCCCTATACCCAAATTTGACGATCGATTTGCACGTCAGAATCGCTAC
-+PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1
-baaeeeeeggggghhifhiiihfgbghhhiihihfhihfhihdfghhiiifhiihdbfgddged
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sortmerna_wrapper_sam1.sam	Wed Aug 05 02:50:43 2015 -0400
@@ -0,0 +1,3 @@
+@HD	VN:1.0	SO:unsorted
+@PG	ID:sortmerna	VN:1.0	CL:sortmerna --ref /tmp/tmpY80cK0/files/000/dataset_2.dat,dataset_2 --reads /tmp/tmpY80cK0/files/000/dataset_1.dat --aligned aligned --fastx --other other_file.dat dat -a 1
+read1	0	EncFa169	645	255	2S87M	*	0	0	GCCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAGAGTGGAATACCATGTGTAGCGGTGAAATGCGTAGATATATGGAGGAACACC	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	AS:i:169	NM:i:1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sortmerna_wrapper_sam2.sam	Wed Aug 05 02:50:43 2015 -0400
@@ -0,0 +1,3 @@
+@HD	VN:1.0	SO:unsorted
+@PG	ID:sortmerna	VN:1.0	CL:sortmerna --ref /tmp/tmpY80cK0/files/000/dataset_7.dat,dataset_7 --reads /tmp/tmpY80cK0/files/000/dataset_6.dat --aligned aligned --fastx --other other_file.dat dat -a 1
+read1	0	EncFa169	645	255	2S87M	*	0	0	GCCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAGAGTGGAATACCATGTGTAGCGGTGAAATGCGTAGATATATGGAGGAACACC	*	AS:i:169	NM:i:1
--- a/tool-data/rRNA_databases.loc	Mon Aug 03 08:18:26 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-rfam-5.8s	Database Rfam 5.8s	$SORTMERNADIR/rRNA_databases/rfam-5.8s-database-id98.fasta
-rfam-5s	Database Rfam 5s	$SORTMERNADIR/rRNA_databases/rfam-5s-database-id98.fasta
-silva-arc-16s	Database Silva-Arc 16s	$SORTMERNADIR/rRNA_databases/silva-arc-16s-database-id95.fasta
-silva-arc-23s	Database Silva-Arc 23s	$SORTMERNADIR/rRNA_databases/silva-arc-23s-database-id98.fasta
-silva-bac-16s	Database Silva-Bac 16s	$SORTMERNADIR/rRNA_databases/silva-bac-16s-database-id85.fasta
-silva-bac-23s	Database Silva-Bac 23s	$SORTMERNADIR/rRNA_databases/silva-bac-23s-database-id98.fasta
-silva-euk-18s	Databse Silva-Euk 18s	$SORTMERNADIR/rRNA_databases/silva-euk-18s-database-id95.fasta
-silva-euk-28s	Database Silva-Euk 28s	$SORTMERNADIR/rRNA_databases/silva-euk-28s-database-id98.fasta
--- a/tool-data/rRNA_databases.loc.sample	Mon Aug 03 08:18:26 2015 -0400
+++ b/tool-data/rRNA_databases.loc.sample	Wed Aug 05 02:50:43 2015 -0400
@@ -1,26 +1,30 @@
 #This is a sample file distributed with Galaxy that is used to define a
-#list of public ribosomal databases, using three columns tab separated
-#(longer whitespace are TAB characters):
+#list of public ribosomal databases for SortMeRNA, using the following format
+#(white space characters are TAB characters):
 #
-#<unique_id>    <database_caption>      <base_name_path>
+#<unique_id>	<database_caption>	<fasta_file_path>
 #
-#It is important that the actual database name does not have a space in it,
-#and that the first tab that appears in the line is right before the path.
+#So, for example, if your database is rfam-5.8s-id98 and the path to your FASTA
+#file is /data/rRNA_databases/rfam-5.8s-id98.fasta, then the rRNA_databases.loc
+#entry would look like this:
 #
-#So, for example, if your database is rfam-5.8s and the path to your base name
-#is /data/rRNA_databases/rfam-5.8s, then the rRNA_databases.loc entry would look like this:
+#rfam-5.8s-id98	Rfam 5.8S eukarya	/data/rRNA_databases/rfam-5.8s-id98.fasta
 #
-#rfam-5.8s          Rfam 5.8S eukarya            /data/rRNA_databases/rfam-5.8s
+#For each rRNA database, you need to create the index files using the
+#indexdb_rna program provided by SortMeRNA. You need to specify as index
+#basename the path of the FASTA file without extension. For example, for the
+#previous database the command is:
 #
-#Since SortMeRNA comes bundled with eight ribosomal databases, which are ready
-#for use after the tool installation, this sample file is in fact an actual file
-#to save the user the trouble of setting it.
+#  indexdb_rna --ref /data/rRNA_databases/rfam-5.8s-id98.fasta,/data/rRNA_databases/rfam-5.8s-id98
 #
-rfam-5.8s	Rfam 5.8S eukarya	$SORTMERNADIR/rRNA_databases/rfam-5.8s-database-id98.fasta
-rfam-5s	Rfam 5S archaea/bacteria	$SORTMERNADIR/rRNA_databases/rfam-5s-database-id98.fasta
-silva-arc-16s	SILVA 16S archaea	$SORTMERNADIR/rRNA_databases/silva-arc-16s-database-id95.fasta
-silva-arc-23s	SILVA 16S bacteria	$SORTMERNADIR/rRNA_databases/silva-arc-23s-database-id98.fasta
-silva-bac-16s	SILVA 16S bacteria	$SORTMERNADIR/rRNA_databases/silva-bac-16s-database-id85.fasta
-silva-bac-23s	SILVA 23S bacteria	$SORTMERNADIR/rRNA_databases/silva-bac-23s-database-id98.fasta
-silva-euk-18s	SILVA 18S eukarya	$SORTMERNADIR/rRNA_databases/silva-euk-18s-database-id95.fasta
-silva-euk-28s	SILVA 28S eukarya	$SORTMERNADIR/rRNA_databases/silva-euk-28s-database-id98.fasta
+#Since SortMeRNA comes bundled with eight ribosomal databases, you can use them
+#by creating the actual index files as explained above and uncommenting the
+#following lines.
+#rfam-5.8s-id98	Rfam 5.8S eukarya	$SORTMERNADIR/rRNA_databases/rfam-5.8s-database-id98.fasta
+#rfam-5s-id98	Rfam 5S archaea/bacteria	$SORTMERNADIR/rRNA_databases/rfam-5s-database-id98.fasta
+#silva-arc-16s-id95	SILVA v.119 16S archaea	$SORTMERNADIR/rRNA_databases/silva-arc-16s-id95.fasta
+#silva-arc-23s-id98	SILVA v.119 23S archaea	$SORTMERNADIR/rRNA_databases/silva-arc-23s-id98.fasta
+#silva-bac-16s-id90	SILVA v.119 16S bacteria	$SORTMERNADIR/rRNA_databases/silva-bac-16s-id90.fasta
+#silva-bac-23s-id98	SILVA v.119 23S bacteria	$SORTMERNADIR/rRNA_databases/silva-bac-23s-id98.fasta
+#silva-euk-18s-id95	SILVA v.119 18S eukarya	$SORTMERNADIR/rRNA_databases/silva-euk-18s-id95.fasta
+#silva-euk-28s-id98	SILVA v.119 28S eukarya	$SORTMERNADIR/rRNA_databases/silva-euk-28s-id98.fasta
--- a/tool_dependencies.xml	Mon Aug 03 08:18:26 2015 -0400
+++ b/tool_dependencies.xml	Wed Aug 05 02:50:43 2015 -0400
@@ -1,9 +1,9 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="sortmerna" version="1.9">
+    <package name="sortmerna" version="2.0">
         <install version="1.0">
             <actions>
-                <action type="download_by_url">http://bioinfo.lifl.fr/RNA/sortmerna/code/sortmerna-1.9.tar.gz</action>
+                <action type="download_by_url" target_filename="sortmerna-2.0.tar.gz">https://github.com/biocore/sortmerna/archive/2.0.tar.gz</action>
                 <action type="autoconf"/>
                 <action type="set_environment">
                     <environment_variable name="SORTMERNADIR" action="set_to">$INSTALL_DIR</environment_variable>