Mercurial > repos > rnateam > sortmerna
changeset 1:b482293b2987 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/sortmerna commit 844d980a7de5f199487ca9591420a23df63c5246-dirty
author | iuc |
---|---|
date | Wed, 05 Aug 2015 02:50:43 -0400 |
parents | a8ac09e937f3 |
children | 3699b6b771e0 |
files | sortmerna.py sortmerna.xml test-data/merged-paired-reads_output.fastq test-data/read_small.fasta test-data/read_small.fastq test-data/ref_small.fasta test-data/sortmerna_wrapper_accept1.fastq test-data/sortmerna_wrapper_accept2.fasta test-data/sortmerna_wrapper_in1.fastq test-data/sortmerna_wrapper_other1.fastq test-data/sortmerna_wrapper_other2.fasta test-data/sortmerna_wrapper_sam1.sam test-data/sortmerna_wrapper_sam2.sam tool-data/rRNA_databases.loc tool-data/rRNA_databases.loc.sample tool_dependencies.xml |
diffstat | 15 files changed, 185 insertions(+), 365 deletions(-) [+] |
line wrap: on
line diff
--- a/sortmerna.py Mon Aug 03 08:18:26 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,72 +0,0 @@ -#!/usr/bin/env python - -""" -Runs SortMeRNA -""" - -import subprocess -import optparse -import shlex - - -def main(): - """Parse the command line, exectutes SortMeRNA and buildtrie if neeeded.""" - #TODO: Put all SortMeRNA options in the command-line parser - parser = optparse.OptionParser() - parser.add_option('--sortmerna', dest='sortmerna_cmd', help='') - parser.add_option('--buildtrie', dest='buildtrie', - default=False, action='store_true', help='') - (options, args) = parser.parse_args() - if not args: - raise Exception('Please provide at least one database') - - if options.buildtrie: - buildtrie = 'buildtrie' - for database in args: - run_buildtrie([buildtrie, '--db', database]) - - if options.sortmerna_cmd: - sortmerna = 'sortmerna' - run_sortmerna([sortmerna] + - shlex.split(options.sortmerna_cmd) + - ['-m', '262144', '-n', str(len(args)), '--db'] + - args) - - -def run_buildtrie(cmd): - """Run the BuildTrie program.""" - try: - stdout_arg = subprocess.PIPE - stderr_arg = subprocess.PIPE - child_process = subprocess.Popen(args=" ".join(cmd), shell=True, - stdin=None, stdout=stdout_arg, - stderr=stderr_arg) - stdout_str, stderr_str = child_process.communicate() - return_code = child_process.returncode - if return_code is not 0: - raise Exception(stderr_str) - - except Exception, error: - raise Exception('Error while running Buildtrie:\n' + - '\n'.join([str(error), stdout_str, stderr_str])) - - -def run_sortmerna(cmd): - """Run the SortMeRNA program.""" - try: - stdout_arg = subprocess.PIPE - stderr_arg = subprocess.PIPE - child_process = subprocess.Popen(args=" ".join(cmd), shell=True, - stdin=None, stdout=stdout_arg, - stderr=stderr_arg) - stdout_str, stderr_str = child_process.communicate() - return_code = child_process.returncode - if return_code is not 0: - raise Exception(stderr_str) - except Exception, error: - raise Exception('Error while running SortMeRNA:\n' + - '\n'.join([str(error), stdout_str, stderr_str])) - - -if __name__ == "__main__": - main()
--- a/sortmerna.xml Mon Aug 03 08:18:26 2015 -0400 +++ b/sortmerna.xml Wed Aug 05 02:50:43 2015 -0400 @@ -1,7 +1,7 @@ -<tool id="bg_sortmerna" name="Filter with SortMeRNA" version="1.9.0"> +<tool id="bg_sortmerna" name="Filter with SortMeRNA" version="2.0.0"> <description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description> <requirements> - <requirement type='package' version="1.9">sortmerna</requirement> + <requirement type='package' version="2.0">sortmerna</requirement> </requirements> <stdio> <regex match="This program builds a Burst trie on an input rRNA database" @@ -18,145 +18,167 @@ sortmerna --version 2>&1|grep 'SortMeRNA version' ]]> </version_command> - <command interpreter="python"> + <command> <![CDATA[ - sortmerna.py - --sortmerna " - $strand_search - #if str( $read_family.read_family_selector ) == 'other': - --I $input_reads -r $read_family.ratio_parameter - #else: - $read_family.read_family_selector $input_reads - #end if - - #if str( $sequencing_type.sequencing_type_selector ) == 'paired': - $sequencing_type.paired_type + #set $ref = '' + #set $sep='' + #if str( $databases_type.databases_selector ) == 'history': + #for $db in $databases_type.database_name + #set $ref += $sep + str($db) + ',' + $os.path.splitext($os.path.basename(str($db)))[0] + #set $sep = ':' + #end for + indexdb_rna --ref $ref + && + #else: + ## databases path is not directly accessible, must match by hand with LOC file contents + #set $data_table = dict([(_[0], _[2]) for _ in $databases_type.input_databases.input.options.tool_data_table.data]) + #for $db in $databases_type.input_databases.value + #set $ref += $sep + $data_table[$db] + ',' + $os.path.splitext($data_table[$db])[0] + #set $sep = ':' + #end for + #end if + sortmerna --ref $ref --reads $input_reads --aligned aligned + #if str( $sequencing_type.sequencing_type_selector ) == 'paired' + $sequencing_type.paired_type + #end if + $strand_search + $aligned_fastx.aligned_fastx_selector + #if $aligned_fastx.aligned_fastx_selector == '--fastx' + #if $aligned_fastx.other + --other other_file #end if - - #if $outputs_selected: - #if 'accept' in $outputs_selected.value: - --accept accept_file - #end if - #if 'other' in $outputs_selected.value: - --other other_file - #end if - #end if - - $log - -a \${GALAXY_SLOTS:-4} - " - #if str( $databases_type.databases_selector ) == 'history': - --buildtrie - #for $db in $databases_type.input_databases - $db.database_name - #end for - #else: - ## databases path is not directly accessible, must match by hand with LOC file contents - ${' '.join([dict([(x[0], x[2]) for x in $databases_type.input_databases.input.options.tool_data_table.data])[y] - for y in $databases_type.input_databases.value])} - #end if + #end if + $aligned_sam.aligned_sam_selector + #if $aligned_sam.aligned_sam_selector == '--sam' + $aligned_sam.sq + #end if + $aligned_blast + $log + -a \${GALAXY_SLOTS:-1} ]]> </command> <inputs> - <conditional name="read_family"> - <param name="read_family_selector" type="select" format="text" label="Sequencing technology of querying sequences (reads)" - help="The Illumina platform is more common for large scale metatranscriptomic projects requiring a high throughput."> - <option value="--I">Illumina Solexa</option> - <option value="--454">454 Roche</option> - <option value="other">Other</option> + <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences" help="In FASTA or FASTQ format (--reads)"/> + <conditional name="sequencing_type"> + <param name="sequencing_type_selector" type="select" label="Sequencing type"> + <option value="not_paired">Reads are not paired</option> + <option value="paired">Reads are paired</option> + </param> + <when value="paired"> + <param name="paired_type" type="select" display="radio" label="If one of the paired-end reads aligns and the other one does not"> + <option value="">leave the reads split between aligned and rejected files</option> + <option value="--paired-in">output both reads to aligned file (--paired-in)</option> + <option value="--paired-out">output both reads to rejected file (--paired-out)</option> + </param> + </when> + </conditional> + + <param name="strand_search" type="select" label="Which strands to search" display="radio"> + <option value="">Search both strands</option> + <option value="-F">Search only the forward strand (-F)</option> + <option value="-R">Search only the reverse-complementary strand (-R)</option> </param> - <when value="other"> - <param name="ratio_parameter" type="float" value="1" min="0" max="1" - label="Ratio parameter (the number of hits on the read / read length)" - help="The ratio parameter for SortMeRNA has been set to r=0.25 for Illumina Solexa reads and to r=0.15 for 454 Roche reads. - For other read types, if the sequencing technology produces high quality reads with a low substitution error rate - (0.1 substitutions per 100 bases, such as Illumina), then the ratio parameter can be set to r=[0.23,0.27]. - If the sequencing technology has a high indel error rate (1-2 indels per 100 bases, such as 454 or Ion Torrent), - then the ratio parameter can be set to r=[0.13,0.17] (-r)."/> - </when> - </conditional> - <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences (reads)" help=""/> - <conditional name="sequencing_type"> - <param name="sequencing_type_selector" type="select" label="Sequencing type"> - <option value="not_paired">Reads are not paired</option> - <option value="paired">Reads are paired</option> - </param> - <when value="paired"> - <param name="paired_type" type="select" display="radio" label="If one read of a pair is accepted and the other not, output both reads" - help="SortMeRNA does not use the pairing information for filtering RNA, - however if one read of a pair is accepted and the other is not, - the resulting output may break apart the pair into two separate files. - The purpose of 'Reads are paired' option is to preserve the pairing of the reads."> - <option value="--paired-in">to accepted file (--paired-in)</option> - <option value="--paired-out">to rejected file (--paired-out)</option> - </param> - </when> - </conditional> - <param name="strand_search" type="select" label="Which strands to search" display="radio"> - <option value="">Search both strands</option> - <option value="-F">Search only the forward strand (-F)</option> - <option value="-R">Search only the reverse-complementary strand (-R)</option> - </param> + <conditional name="databases_type"> + <param name="databases_selector" type="select" label="Databases to query" + help="Public rRNA databases provided with SortMeRNA have been indexed. + On the contrary, personal databases must be indexed each time SortMeRNA is launched. + Please be patient, this may take some time depending on the size of the given database."> + <option value="cached" selected="true">Public ribosomal databases</option> + <option value="history">Databases from your history</option> + </param> + <when value="cached"> + <param name="input_databases" label="rRNA databases" type="select" display="checkboxes" multiple="true"> + <options from_data_table="rRNA_databases" /> + <validator type="no_options" message="Select at least one database"/> + </param> + </when> + <when value="history"> + <param name="database_name" type="data" format="fasta" multiple="true" label="rRNA databases" + help="Your databases will be indexed first, which may take up to several minutes."/> + </when> + </conditional> - <conditional name="databases_type"> - <param name="databases_selector" type="select" label="Databases to query" - help="Public rRNA databases provided with SortMeRNA have been indexed. - On the contrary, personal databases must be indexed each time SortMeRNA is launched. - Please be patient, this may take some time depending on the size of the given database."> - <option value="cached" selected="true">Public ribosomal databases</option> - <option value="history">Databases from your history</option> - </param> - <when value="cached"> - <param name="input_databases" label="rRNA database" type="select" display="checkboxes" multiple="true"> - <options from_data_table="rRNA_databases" /> - <validator type="no_options" message="Select at least one database"/> + <!-- Outputs --> + <conditional name="aligned_fastx"> + <param name="aligned_fastx_selector" type="select" label="Include aligned reads in FASTA/FASTQ format"> + <option value="--fastx">Yes (--fastx)</option> + <option value="">No</option> </param> - </when> - <when value="history"> - <repeat name="input_databases" title="Database" min="1"> - <param name="database_name" type="data" format="fasta" label="rRNA database" - help="Your database will be indexed first, which may take up to several minutes."/> - </repeat> - </when> - </conditional> - - <!-- Outputs --> - <param name="outputs_selected" type="select" display="checkboxes" multiple="true" label="Output options"> - <option value="accept" selected="True">Reads matching to at least one database</option> - <option value="other">Reads not found in any database</option> - </param> - <param name="log" type="boolean" checked="False" truevalue="--log log_file" falsevalue="" label="Statistics file" - help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution. (--log)"> - </param> - + <when value="--fastx"> + <param name="other" type="boolean" label="Include rejected reads file" help="(--other)" /> + </when> + <when value="" /> + </conditional> + <conditional name="aligned_sam"> + <param name="aligned_sam_selector" type="select" label="Include alignments in SAM format"> + <option value="--sam">Yes (--sam)</option> + <option value="">No</option> + </param> + <when value="--sam"> + <param name="sq" type="boolean" truevalue="--SQ" falsevalue="" label="Add SQ tags to the SAM file" help="(--SQ)" /> + </when> + <when value="" /> + </conditional> + <param name="aligned_blast" type="select" label="Include alignments in BLAST-like format"> + <option value="--blast 0">pairwise (--blast 0)</option> + <option value="--blast 1">tabular BLAST -m 8 format (--blast 1)</option> + <option value="--blast 2">tabular + column for CIGAR (--blast 2)</option> + <option value="--blast 3">tabular + columns for CIGAR and query coverage (--blast 3)</option> + <option value="" selected="true">No</option> + </param> + <param name="log" type="boolean" checked="False" truevalue="--log" falsevalue="" label="Generate statistics file" + help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution. (--log)"> + </param> </inputs> <outputs> - <data format_source="input_reads" name="output_accept" from_work_dir="accept_file.dat" - label="Matching reads on ${on_string} (${input_reads.datatype.file_ext})"> - <filter>outputs_selected and 'accept' in outputs_selected</filter> + <data format_source="input_reads" name="output_fastx" from_work_dir="aligned.dat" + label="Aligned reads on ${on_string} (${input_reads.datatype.file_ext})"> + <filter>aligned_fastx['aligned_fastx_selector']</filter> </data> <data format_source="input_reads" name="output_other" from_work_dir="other_file.dat" - label="Reads not found on ${on_string} (${input_reads.datatype.file_ext})"> - <filter>outputs_selected and 'other' in outputs_selected</filter> + label="Rejected reads on ${on_string} (${input_reads.datatype.file_ext})"> + <filter>aligned_fastx['aligned_fastx_selector'] and aligned_fastx['other']</filter> + </data> + <data format="sam" name="output_sam" from_work_dir="aligned.sam" + label="Alignments on ${on_string} (SAM)"> + <filter>aligned_sam['aligned_sam_selector']</filter> </data> - <data format="txt" name="output_log" label="${tool.name} statistics (txt)" from_work_dir="log_file.log"> + <data format="tabular" name="output_blast" from_work_dir="aligned.blast" + label="Alignments on ${on_string} (BLAST)"> + <filter>aligned_blast</filter> + <change_format> + <when input="aligned_blast" value="--blast 0" format="txt" /> + </change_format> + </data> + <data format="txt" name="output_log" label="${tool.name} statistics (txt)" from_work_dir="aligned.log"> <filter>log</filter> </data> </outputs> <tests> <test> - <param name="read_family_selector" value="I" /> - <param name="input_reads" value="sortmerna_wrapper_in1.fastq" /> + <param name="input_reads" value="read_small.fastq" /> <param name="sequencing_type_selector" value="not_paired" /> <param name="strand_search" value="" /> - <param name="databases_selector" value="cached" /> - <param name="input_databases" value="rfam-5.8s,rfam-5s" /> - <param name="outputs_selected" value="accept,other" /> + <param name="databases_selector" value="history" /> + <param name="database_name" value="ref_small.fasta" /> + <param name="other" value="True" /> <param name="log" value="" /> - <param name="options_type_selector" value="less" /> - <output name="output_accept" file="sortmerna_wrapper_accept1.fastq" /> + <output name="output_fastx" file="sortmerna_wrapper_accept1.fastq" /> <output name="output_other" file="sortmerna_wrapper_other1.fastq" /> + <output name="output_sam" file="sortmerna_wrapper_sam1.sam" lines_diff="2" /> + </test> + <test> + <param name="input_reads" value="read_small.fasta" /> + <param name="sequencing_type_selector" value="not_paired" /> + <param name="strand_search" value="" /> + <param name="databases_selector" value="history" /> + <param name="database_name" value="ref_small.fasta" /> + <param name="other" value="True" /> + <param name="log" value="" /> + <output name="output_fastx" file="sortmerna_wrapper_accept2.fasta" /> + <output name="output_other" file="sortmerna_wrapper_other2.fasta" /> + <output name="output_sam" file="sortmerna_wrapper_sam2.sam" lines_diff="2" /> </test> </tests> <help>
--- a/test-data/merged-paired-reads_output.fastq Mon Aug 03 08:18:26 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ -@PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 -CTCGCGCCACTGGCTGGCTCTCCCCTCTCTCTCTCTCTCTCCCCTCTCTCTCCCCGAGTACACCCGCGCGGGC -+PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 -___cccccggggghhhhf_YbcgecZYa^Y^ceXccacXIXaafX`_]VMHVbdhSTFKU_Z]b]WFW_aZ__ -@PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 -CGCGCGCACCATGCCCGTGCTCGACTGCTCGCCGCCCGCGCCGCGCCACGCCTCGCCGTTCACCTTGACCTCCACGCGCAGGCC -+PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 -__beeeecggcgfhhhhhhhhhhfhhhhhhhhhhhgececcccccccacaccccaccccac`bbbcbbbcbbc[`_a[aLT[aa -@PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 -GCGGTCTGGAACCACCTGATCCCATCCCGAACTCAGCCGTGAAACAGACCAGCGCCGATGGTAGTGTGGCTTCTGCCCGT -+PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 -bbbeeeeegggggiiiiiiihiiihiiiiiiihiidhhigfhihiihgiiiiiggeccaccbbb_b`baccab`]_ba[T -@PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 -ATGCGCTTGAGGTAGCCAAGGGCACGCAGTTCACACCGTATGGCGGCGATGCGAGGCGGCGGCAGGAGG -+PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 -bbbeeeeegggggiiiiiiiiihfhiheghihgihiiihiiiiihifc^acac^_cEHVZaccaac^__ -@PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 -GCATCGCCGCCCTCGCCCTCGCCCCCGCCGCCCTCGCCCT -+PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 -___c`cc`Yeee[ePY_`UY^_eGL_F_`dUU\F\^Q\a_ -@PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 -TGACCAATGTTCTTCATAACGTCCCCGATAGACATGCCAACATTCGGTCCGTTGCGAAGATAGTCATCGCCGTCATAGTC -+PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 -_bbecdecgggggihhiiiihghhhiihiichdfghiffghfghiifiifhhhiiggaaaddddbdd`bccaQ\a`bc_b -@PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 -CCCCACTCACCACGCACGGCACCGCACGGCACTCTCACGGTTTCTCTCTCTCTCTCCCATCCTTCTCACTCCTTTCTCTCTCTCCCTATCTCTCTCTCTC -+PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 -bbbeeeeeggggghiiiiihfhifhhiihiagfhiiiiig^dceeeedcddddcccccRZ``bbcbcbbR_bb`b`bb`bbb`bbcbR]b_]]`bb_b_b -@PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 -GCATGCCAAACGTGGAGGCGGTGGGCGCGACGCCGGTGGGGAACTTGGACTTGTACATGGCCGGCCGCTGCGTCACCATCGA -+PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 -bbbeeeeeggggegifhiiii^fgh\edgffeeccc\acccWV_ccccbccccbbdccccccccaaacaZ]acaa^bccccb -@PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 -AGCCTGACGATAACCTACTTTCACACTGGTTGCAGCACTATCATCGGCGCAAAGTCGTTTCACGGTCCTGTTCGGGATGGGAAGGGGTGGGACCGACTTGC -+PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 -bbbeeeeegggggiiiiiiiiiihiiiiigiiiiiiiiiiiiiiiiiiiihiiggggeeeeddccccccccccccccccccbbccccHXaacccc]acacb -@PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 -GTTCGATTAGTCTTTCGCCCCTATACCCAAATTTGACGATCGATTTGCACGTCAGAATCGCTAC -+PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 -baaeeeeeggggghhifhiiihfgbghhhiihihfhihfhihdfghhiiifhiihdbfgddged -@PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 -GCCGAAGAAGGACGTGGGAATCTGCGATAAGCCTGGTGGAGTCGATAACCGGACGTTGAGACCAGGATTTCCGAATGGGGAAACCCCGCACGACGTGTCGT -+PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 -bbbeeeeegggggifgiiiiiiiiiihiihiiiiiicfhgheghiiihihggcecccccccccccccccccccccccccccaacccaccc__[_a[a^[_Q - -@PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 -GCGCTCGAGGAGGACGTGGCGCCCGAGGCTGCCGAGGCTG -+PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 -Z__cccc`ecg^_fe^[^^^Y_ffc]_`[eZ_c_W\]]b` -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/read_small.fasta Wed Aug 05 02:50:43 2015 -0400 @@ -0,0 +1,2 @@ +>read1 +GCCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAGAGTGGAATACCATGTGTAGCGGTGAAATGCGTAGATATATGGAGGAACACC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/read_small.fastq Wed Aug 05 02:50:43 2015 -0400 @@ -0,0 +1,4 @@ +@read1 +GCCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAGAGTGGAATACCATGTGTAGCGGTGAAATGCGTAGATATATGGAGGAACACC ++read1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ref_small.fasta Wed Aug 05 02:50:43 2015 -0400 @@ -0,0 +1,2 @@ +>EncFa169 count=1; cluster_weight=27830; cluster=EncFa169; cluster_score=1.000000; cluster_center=True; +AGAGTTTGATCCTGGCTCAGGACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAACGCTTCTTTCCTCCCGAGTGCTTGCACTCAATTGGAAAGAGGAGTGGCGGACGGGTGAGTAACACGTGGGTAACCTACCCATCAGAGGGGGATAACACTTGGAAACAGGTGCTAATACCGCATAACAGTTTATGCCGCATGGCATAAGAGTGAAAGGCGCTTTCGGGTGTCGCTGATGGATGGACCCGCGGTGCATTAGCTAGTTGGTGAGGTAACGGCTCACCAAGGCCACGATGCATAGCCGACCTGAGAGGGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCGGCAATGGACGAAAGTCTGACCGAGCAACGCCGCGTGAGTGAAGAAGGTTTTCGGATCGTAAAACTCTGTTGTTAGAGAAGAACAAGGACGTTAGTAACTGAACGTCCCCTGACGGTATCTAACCAGAAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTTCTTAAGTCTGATGTGAAAGCCCCCGGCTCAACCGGGGAGGGTCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAGAGTGGAATTCCATGTGTAGCGGTGAAATGCGTAGATATATGGAGGAACACCAGTGGCGAAGGCGGCTCTCTGGTCTGTAACTGACGCTGAGGCTCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAAGTGTTGGAGGGTTTCCGCCCTTCAGTGCTGCAGCAAACGCATTAAGCACTCCGCCTGGGGAGTACGACCGCAAGGTTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCTTTGACCACTCTAGAGATAGAGCTTTCCCTTCGGGGACAAAGTGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTATTGTTAGTTGCCATCATTTAGTTGGGCACTCTAGCGAGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTGCTACAATGGGAAGTACAACGAGTCGCTAGACCGCGAGGTCATGCAAATCTCTTAAAGCTTCTCTCAGTTCGGATTGCAGGCTGCAACTCGCCTGCATGAAGCCGGAATCGCTAGTAATCGCGGATCAGCACGCCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGTTTGTAACACCCGAAGTCGGTGAGGTAACCTTTTTGGAGCCAGCCGCCTAAGGTGGGATAGATGATTGGGGTGAAGTCGTAACAAGGTAGCCGTATCGGAAGGTGCGGCTGGATCACCT
--- a/test-data/sortmerna_wrapper_accept1.fastq Mon Aug 03 08:18:26 2015 -0400 +++ b/test-data/sortmerna_wrapper_accept1.fastq Wed Aug 05 02:50:43 2015 -0400 @@ -1,28 +1,4 @@ -@PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 -CTCGCGCCACTGGCTGGCTCTCCCCTCTCTCTCTCTCTCTCCCCTCTCTCTCCCCGAGTACACCCGCGCGGGC -+PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 -___cccccggggghhhhf_YbcgecZYa^Y^ceXccacXIXaafX`_]VMHVbdhSTFKU_Z]b]WFW_aZ__ -@PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 -GCGGTCTGGAACCACCTGATCCCATCCCGAACTCAGCCGTGAAACAGACCAGCGCCGATGGTAGTGTGGCTTCTGCCCGT -+PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 -bbbeeeeegggggiiiiiiihiiihiiiiiiihiidhhigfhihiihgiiiiiggeccaccbbb_b`baccab`]_ba[T -@PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 -GCATCGCCGCCCTCGCCCTCGCCCCCGCCGCCCTCGCCCT -+PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 -___c`cc`Yeee[ePY_`UY^_eGL_F_`dUU\F\^Q\a_ -@PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 -CCCCACTCACCACGCACGGCACCGCACGGCACTCTCACGGTTTCTCTCTCTCTCTCCCATCCTTCTCACTCCTTTCTCTCTCTCCCTATCTCTCTCTCTC -+PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 -bbbeeeeeggggghiiiiihfhifhhiihiagfhiiiiig^dceeeedcddddcccccRZ``bbcbcbbR_bb`b`bb`bbb`bbcbR]b_]]`bb_b_b -@PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 -AGCCTGACGATAACCTACTTTCACACTGGTTGCAGCACTATCATCGGCGCAAAGTCGTTTCACGGTCCTGTTCGGGATGGGAAGGGGTGGGACCGACTTGC -+PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 -bbbeeeeegggggiiiiiiiiiihiiiiigiiiiiiiiiiiiiiiiiiiihiiggggeeeeddccccccccccccccccccbbccccHXaacccc]acacb -@PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 -GCCGAAGAAGGACGTGGGAATCTGCGATAAGCCTGGTGGAGTCGATAACCGGACGTTGAGACCAGGATTTCCGAATGGGGAAACCCCGCACGACGTGTCGT -+PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 -bbbeeeeegggggifgiiiiiiiiiihiihiiiiiicfhgheghiiihihggcecccccccccccccccccccccccccccaacccaccc__[_a[a^[_Q -@PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 -GCGCTCGAGGAGGACGTGGCGCCCGAGGCTGCCGAGGCTG -+PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 -Z__cccc`ecg^_fe^[^^^Y_ffc]_`[eZ_c_W\]]b` +@read1 +GCCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAGAGTGGAATACCATGTGTAGCGGTGAAATGCGTAGATATATGGAGGAACACC ++read1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sortmerna_wrapper_accept2.fasta Wed Aug 05 02:50:43 2015 -0400 @@ -0,0 +1,2 @@ +>read1 +GCCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAGAGTGGAATACCATGTGTAGCGGTGAAATGCGTAGATATATGGAGGAACACC
--- a/test-data/sortmerna_wrapper_in1.fastq Mon Aug 03 08:18:26 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -@PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 -CGCGCGCACCATGCCCGTGCTCGACTGCTCGCCGCCCGCGCCGCGCCACGCCTCGCCGTTCACCTTGACCTCCACGCGCAGGCC -+PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 -__beeeecggcgfhhhhhhhhhhfhhhhhhhhhhhgececcccccccacaccccaccccac`bbbcbbbcbbc[`_a[aLT[aa -@PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 -ATGCGCTTGAGGTAGCCAAGGGCACGCAGTTCACACCGTATGGCGGCGATGCGAGGCGGCGGCAGGAGG -+PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 -bbbeeeeegggggiiiiiiiiihfhiheghihgihiiihiiiiihifc^acac^_cEHVZaccaac^__ -@PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 -CTCGCGCCACTGGCTGGCTCTCCCCTCTCTCTCTCTCTCTCCCCTCTCTCTCCCCGAGTACACCCGCGCGGGC -+PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 -___cccccggggghhhhf_YbcgecZYa^Y^ceXccacXIXaafX`_]VMHVbdhSTFKU_Z]b]WFW_aZ__ -@PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 -GCGGTCTGGAACCACCTGATCCCATCCCGAACTCAGCCGTGAAACAGACCAGCGCCGATGGTAGTGTGGCTTCTGCCCGT -+PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 -bbbeeeeegggggiiiiiiihiiihiiiiiiihiidhhigfhihiihgiiiiiggeccaccbbb_b`baccab`]_ba[T -@PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 -GCATCGCCGCCCTCGCCCTCGCCCCCGCCGCCCTCGCCCT -+PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 -___c`cc`Yeee[ePY_`UY^_eGL_F_`dUU\F\^Q\a_ -@PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 -TGACCAATGTTCTTCATAACGTCCCCGATAGACATGCCAACATTCGGTCCGTTGCGAAGATAGTCATCGCCGTCATAGTC -+PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 -_bbecdecgggggihhiiiihghhhiihiichdfghiffghfghiifiifhhhiiggaaaddddbdd`bccaQ\a`bc_b -@PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 -GCATGCCAAACGTGGAGGCGGTGGGCGCGACGCCGGTGGGGAACTTGGACTTGTACATGGCCGGCCGCTGCGTCACCATCGA -+PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 -bbbeeeeeggggegifhiiii^fgh\edgffeeccc\acccWV_ccccbccccbbdccccccccaaacaZ]acaa^bccccb -@PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 -CCCCACTCACCACGCACGGCACCGCACGGCACTCTCACGGTTTCTCTCTCTCTCTCCCATCCTTCTCACTCCTTTCTCTCTCTCCCTATCTCTCTCTCTC -+PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 -bbbeeeeeggggghiiiiihfhifhhiihiagfhiiiiig^dceeeedcddddcccccRZ``bbcbcbbR_bb`b`bb`bbb`bbcbR]b_]]`bb_b_b -@PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 -AGCCTGACGATAACCTACTTTCACACTGGTTGCAGCACTATCATCGGCGCAAAGTCGTTTCACGGTCCTGTTCGGGATGGGAAGGGGTGGGACCGACTTGC -+PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 -bbbeeeeegggggiiiiiiiiiihiiiiigiiiiiiiiiiiiiiiiiiiihiiggggeeeeddccccccccccccccccccbbccccHXaacccc]acacb -@PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 -GCCGAAGAAGGACGTGGGAATCTGCGATAAGCCTGGTGGAGTCGATAACCGGACGTTGAGACCAGGATTTCCGAATGGGGAAACCCCGCACGACGTGTCGT -+PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 -bbbeeeeegggggifgiiiiiiiiiihiihiiiiiicfhgheghiiihihggcecccccccccccccccccccccccccccaacccaccc__[_a[a^[_Q -@PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 -GTTCGATTAGTCTTTCGCCCCTATACCCAAATTTGACGATCGATTTGCACGTCAGAATCGCTAC -+PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 -baaeeeeeggggghhifhiiihfgbghhhiihihfhihfhihdfghhiiifhiihdbfgddged -@PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 -GCGCTCGAGGAGGACGTGGCGCCCGAGGCTGCCGAGGCTG -+PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 -Z__cccc`ecg^_fe^[^^^Y_ffc]_`[eZ_c_W\]]b`
--- a/test-data/sortmerna_wrapper_other1.fastq Mon Aug 03 08:18:26 2015 -0400 +++ b/test-data/sortmerna_wrapper_other1.fastq Wed Aug 05 02:50:43 2015 -0400 @@ -1,20 +0,0 @@ -@PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 -CGCGCGCACCATGCCCGTGCTCGACTGCTCGCCGCCCGCGCCGCGCCACGCCTCGCCGTTCACCTTGACCTCCACGCGCAGGCC -+PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 -__beeeecggcgfhhhhhhhhhhfhhhhhhhhhhhgececcccccccacaccccaccccac`bbbcbbbcbbc[`_a[aLT[aa -@PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 -ATGCGCTTGAGGTAGCCAAGGGCACGCAGTTCACACCGTATGGCGGCGATGCGAGGCGGCGGCAGGAGG -+PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 -bbbeeeeegggggiiiiiiiiihfhiheghihgihiiihiiiiihifc^acac^_cEHVZaccaac^__ -@PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 -TGACCAATGTTCTTCATAACGTCCCCGATAGACATGCCAACATTCGGTCCGTTGCGAAGATAGTCATCGCCGTCATAGTC -+PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 -_bbecdecgggggihhiiiihghhhiihiichdfghiffghfghiifiifhhhiiggaaaddddbdd`bccaQ\a`bc_b -@PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 -GCATGCCAAACGTGGAGGCGGTGGGCGCGACGCCGGTGGGGAACTTGGACTTGTACATGGCCGGCCGCTGCGTCACCATCGA -+PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 -bbbeeeeeggggegifhiiii^fgh\edgffeeccc\acccWV_ccccbccccbbdccccccccaaacaZ]acaa^bccccb -@PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 -GTTCGATTAGTCTTTCGCCCCTATACCCAAATTTGACGATCGATTTGCACGTCAGAATCGCTAC -+PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 -baaeeeeeggggghhifhiiihfgbghhhiihihfhihfhihdfghhiiifhiihdbfgddged
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sortmerna_wrapper_sam1.sam Wed Aug 05 02:50:43 2015 -0400 @@ -0,0 +1,3 @@ +@HD VN:1.0 SO:unsorted +@PG ID:sortmerna VN:1.0 CL:sortmerna --ref /tmp/tmpY80cK0/files/000/dataset_2.dat,dataset_2 --reads /tmp/tmpY80cK0/files/000/dataset_1.dat --aligned aligned --fastx --other other_file.dat dat -a 1 +read1 0 EncFa169 645 255 2S87M * 0 0 GCCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAGAGTGGAATACCATGTGTAGCGGTGAAATGCGTAGATATATGGAGGAACACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII AS:i:169 NM:i:1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sortmerna_wrapper_sam2.sam Wed Aug 05 02:50:43 2015 -0400 @@ -0,0 +1,3 @@ +@HD VN:1.0 SO:unsorted +@PG ID:sortmerna VN:1.0 CL:sortmerna --ref /tmp/tmpY80cK0/files/000/dataset_7.dat,dataset_7 --reads /tmp/tmpY80cK0/files/000/dataset_6.dat --aligned aligned --fastx --other other_file.dat dat -a 1 +read1 0 EncFa169 645 255 2S87M * 0 0 GCCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAGAGTGGAATACCATGTGTAGCGGTGAAATGCGTAGATATATGGAGGAACACC * AS:i:169 NM:i:1
--- a/tool-data/rRNA_databases.loc Mon Aug 03 08:18:26 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -rfam-5.8s Database Rfam 5.8s $SORTMERNADIR/rRNA_databases/rfam-5.8s-database-id98.fasta -rfam-5s Database Rfam 5s $SORTMERNADIR/rRNA_databases/rfam-5s-database-id98.fasta -silva-arc-16s Database Silva-Arc 16s $SORTMERNADIR/rRNA_databases/silva-arc-16s-database-id95.fasta -silva-arc-23s Database Silva-Arc 23s $SORTMERNADIR/rRNA_databases/silva-arc-23s-database-id98.fasta -silva-bac-16s Database Silva-Bac 16s $SORTMERNADIR/rRNA_databases/silva-bac-16s-database-id85.fasta -silva-bac-23s Database Silva-Bac 23s $SORTMERNADIR/rRNA_databases/silva-bac-23s-database-id98.fasta -silva-euk-18s Databse Silva-Euk 18s $SORTMERNADIR/rRNA_databases/silva-euk-18s-database-id95.fasta -silva-euk-28s Database Silva-Euk 28s $SORTMERNADIR/rRNA_databases/silva-euk-28s-database-id98.fasta
--- a/tool-data/rRNA_databases.loc.sample Mon Aug 03 08:18:26 2015 -0400 +++ b/tool-data/rRNA_databases.loc.sample Wed Aug 05 02:50:43 2015 -0400 @@ -1,26 +1,30 @@ #This is a sample file distributed with Galaxy that is used to define a -#list of public ribosomal databases, using three columns tab separated -#(longer whitespace are TAB characters): +#list of public ribosomal databases for SortMeRNA, using the following format +#(white space characters are TAB characters): # -#<unique_id> <database_caption> <base_name_path> +#<unique_id> <database_caption> <fasta_file_path> # -#It is important that the actual database name does not have a space in it, -#and that the first tab that appears in the line is right before the path. +#So, for example, if your database is rfam-5.8s-id98 and the path to your FASTA +#file is /data/rRNA_databases/rfam-5.8s-id98.fasta, then the rRNA_databases.loc +#entry would look like this: # -#So, for example, if your database is rfam-5.8s and the path to your base name -#is /data/rRNA_databases/rfam-5.8s, then the rRNA_databases.loc entry would look like this: +#rfam-5.8s-id98 Rfam 5.8S eukarya /data/rRNA_databases/rfam-5.8s-id98.fasta # -#rfam-5.8s Rfam 5.8S eukarya /data/rRNA_databases/rfam-5.8s +#For each rRNA database, you need to create the index files using the +#indexdb_rna program provided by SortMeRNA. You need to specify as index +#basename the path of the FASTA file without extension. For example, for the +#previous database the command is: # -#Since SortMeRNA comes bundled with eight ribosomal databases, which are ready -#for use after the tool installation, this sample file is in fact an actual file -#to save the user the trouble of setting it. +# indexdb_rna --ref /data/rRNA_databases/rfam-5.8s-id98.fasta,/data/rRNA_databases/rfam-5.8s-id98 # -rfam-5.8s Rfam 5.8S eukarya $SORTMERNADIR/rRNA_databases/rfam-5.8s-database-id98.fasta -rfam-5s Rfam 5S archaea/bacteria $SORTMERNADIR/rRNA_databases/rfam-5s-database-id98.fasta -silva-arc-16s SILVA 16S archaea $SORTMERNADIR/rRNA_databases/silva-arc-16s-database-id95.fasta -silva-arc-23s SILVA 16S bacteria $SORTMERNADIR/rRNA_databases/silva-arc-23s-database-id98.fasta -silva-bac-16s SILVA 16S bacteria $SORTMERNADIR/rRNA_databases/silva-bac-16s-database-id85.fasta -silva-bac-23s SILVA 23S bacteria $SORTMERNADIR/rRNA_databases/silva-bac-23s-database-id98.fasta -silva-euk-18s SILVA 18S eukarya $SORTMERNADIR/rRNA_databases/silva-euk-18s-database-id95.fasta -silva-euk-28s SILVA 28S eukarya $SORTMERNADIR/rRNA_databases/silva-euk-28s-database-id98.fasta +#Since SortMeRNA comes bundled with eight ribosomal databases, you can use them +#by creating the actual index files as explained above and uncommenting the +#following lines. +#rfam-5.8s-id98 Rfam 5.8S eukarya $SORTMERNADIR/rRNA_databases/rfam-5.8s-database-id98.fasta +#rfam-5s-id98 Rfam 5S archaea/bacteria $SORTMERNADIR/rRNA_databases/rfam-5s-database-id98.fasta +#silva-arc-16s-id95 SILVA v.119 16S archaea $SORTMERNADIR/rRNA_databases/silva-arc-16s-id95.fasta +#silva-arc-23s-id98 SILVA v.119 23S archaea $SORTMERNADIR/rRNA_databases/silva-arc-23s-id98.fasta +#silva-bac-16s-id90 SILVA v.119 16S bacteria $SORTMERNADIR/rRNA_databases/silva-bac-16s-id90.fasta +#silva-bac-23s-id98 SILVA v.119 23S bacteria $SORTMERNADIR/rRNA_databases/silva-bac-23s-id98.fasta +#silva-euk-18s-id95 SILVA v.119 18S eukarya $SORTMERNADIR/rRNA_databases/silva-euk-18s-id95.fasta +#silva-euk-28s-id98 SILVA v.119 28S eukarya $SORTMERNADIR/rRNA_databases/silva-euk-28s-id98.fasta
--- a/tool_dependencies.xml Mon Aug 03 08:18:26 2015 -0400 +++ b/tool_dependencies.xml Wed Aug 05 02:50:43 2015 -0400 @@ -1,9 +1,9 @@ <?xml version="1.0"?> <tool_dependency> - <package name="sortmerna" version="1.9"> + <package name="sortmerna" version="2.0"> <install version="1.0"> <actions> - <action type="download_by_url">http://bioinfo.lifl.fr/RNA/sortmerna/code/sortmerna-1.9.tar.gz</action> + <action type="download_by_url" target_filename="sortmerna-2.0.tar.gz">https://github.com/biocore/sortmerna/archive/2.0.tar.gz</action> <action type="autoconf"/> <action type="set_environment"> <environment_variable name="SORTMERNADIR" action="set_to">$INSTALL_DIR</environment_variable>