Mercurial > repos > rnateam > sortmerna
changeset 0:a8ac09e937f3 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/sortmerna commit 04cfb5475292e4fd1f7c0ca86d8d0d5e5f886c3d-dirty
author | rnateam |
---|---|
date | Mon, 03 Aug 2015 08:18:26 -0400 |
parents | |
children | b482293b2987 |
files | readme.md sortmerna.py sortmerna.xml test-data/merged-paired-reads_output.fastq test-data/sortmerna_wrapper_accept1.fastq test-data/sortmerna_wrapper_in1.fastq test-data/sortmerna_wrapper_other1.fastq tool-data/rRNA_databases.loc tool-data/rRNA_databases.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml |
diffstat | 11 files changed, 561 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.md Mon Aug 03 08:18:26 2015 -0400 @@ -0,0 +1,46 @@ +========= +SortMeRNA +========= + +SortMeRNA, a fast and accurate filtering of ribosomal RNAs in metatranscriptomic data. + +For more information, please see http://bioinfo.lifl.fr/RNA/sortmerna/. + + +============ +Installation +============ + +It is recommended to install this wrapper via the `Galaxy Tool Shed`. + +.. _`Galaxy Tool Shed`: https://testtoolshed.g2.bx.psu.edu/view/bgruening/sortmerna + + +======= +History +======= +- 0.1: First version of the wrapper from Jean-Frédéric +- 1.9.0: First version with data tables, new dependency definition, generall restructuring + + +=============================== +Wrapper Licence (MIT/BSD style) +=============================== + +Permission to use, copy, modify, and distribute this software and its +documentation with or without modifications and for any purpose and +without fee is hereby granted, provided that any copyright notices +appear in all copies and that both those copyright notices and this +permission notice appear in supporting documentation, and that the +names of the contributors or copyright holders not be used in +advertising or publicity pertaining to distribution of the software +without specific prior permission. + +THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL +WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT +OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +OR PERFORMANCE OF THIS SOFTWARE.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sortmerna.py Mon Aug 03 08:18:26 2015 -0400 @@ -0,0 +1,72 @@ +#!/usr/bin/env python + +""" +Runs SortMeRNA +""" + +import subprocess +import optparse +import shlex + + +def main(): + """Parse the command line, exectutes SortMeRNA and buildtrie if neeeded.""" + #TODO: Put all SortMeRNA options in the command-line parser + parser = optparse.OptionParser() + parser.add_option('--sortmerna', dest='sortmerna_cmd', help='') + parser.add_option('--buildtrie', dest='buildtrie', + default=False, action='store_true', help='') + (options, args) = parser.parse_args() + if not args: + raise Exception('Please provide at least one database') + + if options.buildtrie: + buildtrie = 'buildtrie' + for database in args: + run_buildtrie([buildtrie, '--db', database]) + + if options.sortmerna_cmd: + sortmerna = 'sortmerna' + run_sortmerna([sortmerna] + + shlex.split(options.sortmerna_cmd) + + ['-m', '262144', '-n', str(len(args)), '--db'] + + args) + + +def run_buildtrie(cmd): + """Run the BuildTrie program.""" + try: + stdout_arg = subprocess.PIPE + stderr_arg = subprocess.PIPE + child_process = subprocess.Popen(args=" ".join(cmd), shell=True, + stdin=None, stdout=stdout_arg, + stderr=stderr_arg) + stdout_str, stderr_str = child_process.communicate() + return_code = child_process.returncode + if return_code is not 0: + raise Exception(stderr_str) + + except Exception, error: + raise Exception('Error while running Buildtrie:\n' + + '\n'.join([str(error), stdout_str, stderr_str])) + + +def run_sortmerna(cmd): + """Run the SortMeRNA program.""" + try: + stdout_arg = subprocess.PIPE + stderr_arg = subprocess.PIPE + child_process = subprocess.Popen(args=" ".join(cmd), shell=True, + stdin=None, stdout=stdout_arg, + stderr=stderr_arg) + stdout_str, stderr_str = child_process.communicate() + return_code = child_process.returncode + if return_code is not 0: + raise Exception(stderr_str) + except Exception, error: + raise Exception('Error while running SortMeRNA:\n' + + '\n'.join([str(error), stdout_str, stderr_str])) + + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sortmerna.xml Mon Aug 03 08:18:26 2015 -0400 @@ -0,0 +1,238 @@ +<tool id="bg_sortmerna" name="Filter with SortMeRNA" version="1.9.0"> + <description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description> + <requirements> + <requirement type='package' version="1.9">sortmerna</requirement> + </requirements> + <stdio> + <regex match="This program builds a Burst trie on an input rRNA database" + source="both" + level="fatal" + description="Buildtrie program failed to execute." /> + <regex match="The database name" + source="both" + level="fatal" + description="The database ${databases} has not been preprocessed using buildtrie before using SortMeRNA." /> + </stdio> + <version_command> +<![CDATA[ +sortmerna --version 2>&1|grep 'SortMeRNA version' +]]> + </version_command> + <command interpreter="python"> +<![CDATA[ + sortmerna.py + --sortmerna " + $strand_search + #if str( $read_family.read_family_selector ) == 'other': + --I $input_reads -r $read_family.ratio_parameter + #else: + $read_family.read_family_selector $input_reads + #end if + + #if str( $sequencing_type.sequencing_type_selector ) == 'paired': + $sequencing_type.paired_type + #end if + + #if $outputs_selected: + #if 'accept' in $outputs_selected.value: + --accept accept_file + #end if + #if 'other' in $outputs_selected.value: + --other other_file + #end if + #end if + + $log + -a \${GALAXY_SLOTS:-4} + " + #if str( $databases_type.databases_selector ) == 'history': + --buildtrie + #for $db in $databases_type.input_databases + $db.database_name + #end for + #else: + ## databases path is not directly accessible, must match by hand with LOC file contents + ${' '.join([dict([(x[0], x[2]) for x in $databases_type.input_databases.input.options.tool_data_table.data])[y] + for y in $databases_type.input_databases.value])} + #end if +]]> + </command> + <inputs> + <conditional name="read_family"> + <param name="read_family_selector" type="select" format="text" label="Sequencing technology of querying sequences (reads)" + help="The Illumina platform is more common for large scale metatranscriptomic projects requiring a high throughput."> + <option value="--I">Illumina Solexa</option> + <option value="--454">454 Roche</option> + <option value="other">Other</option> + </param> + <when value="other"> + <param name="ratio_parameter" type="float" value="1" min="0" max="1" + label="Ratio parameter (the number of hits on the read / read length)" + help="The ratio parameter for SortMeRNA has been set to r=0.25 for Illumina Solexa reads and to r=0.15 for 454 Roche reads. + For other read types, if the sequencing technology produces high quality reads with a low substitution error rate + (0.1 substitutions per 100 bases, such as Illumina), then the ratio parameter can be set to r=[0.23,0.27]. + If the sequencing technology has a high indel error rate (1-2 indels per 100 bases, such as 454 or Ion Torrent), + then the ratio parameter can be set to r=[0.13,0.17] (-r)."/> + </when> + </conditional> + <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences (reads)" help=""/> + <conditional name="sequencing_type"> + <param name="sequencing_type_selector" type="select" label="Sequencing type"> + <option value="not_paired">Reads are not paired</option> + <option value="paired">Reads are paired</option> + </param> + <when value="paired"> + <param name="paired_type" type="select" display="radio" label="If one read of a pair is accepted and the other not, output both reads" + help="SortMeRNA does not use the pairing information for filtering RNA, + however if one read of a pair is accepted and the other is not, + the resulting output may break apart the pair into two separate files. + The purpose of 'Reads are paired' option is to preserve the pairing of the reads."> + <option value="--paired-in">to accepted file (--paired-in)</option> + <option value="--paired-out">to rejected file (--paired-out)</option> + </param> + </when> + </conditional> + + <param name="strand_search" type="select" label="Which strands to search" display="radio"> + <option value="">Search both strands</option> + <option value="-F">Search only the forward strand (-F)</option> + <option value="-R">Search only the reverse-complementary strand (-R)</option> + </param> + + <conditional name="databases_type"> + <param name="databases_selector" type="select" label="Databases to query" + help="Public rRNA databases provided with SortMeRNA have been indexed. + On the contrary, personal databases must be indexed each time SortMeRNA is launched. + Please be patient, this may take some time depending on the size of the given database."> + <option value="cached" selected="true">Public ribosomal databases</option> + <option value="history">Databases from your history</option> + </param> + <when value="cached"> + <param name="input_databases" label="rRNA database" type="select" display="checkboxes" multiple="true"> + <options from_data_table="rRNA_databases" /> + <validator type="no_options" message="Select at least one database"/> + </param> + </when> + <when value="history"> + <repeat name="input_databases" title="Database" min="1"> + <param name="database_name" type="data" format="fasta" label="rRNA database" + help="Your database will be indexed first, which may take up to several minutes."/> + </repeat> + </when> + </conditional> + + <!-- Outputs --> + <param name="outputs_selected" type="select" display="checkboxes" multiple="true" label="Output options"> + <option value="accept" selected="True">Reads matching to at least one database</option> + <option value="other">Reads not found in any database</option> + </param> + <param name="log" type="boolean" checked="False" truevalue="--log log_file" falsevalue="" label="Statistics file" + help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution. (--log)"> + </param> + + </inputs> + <outputs> + <data format_source="input_reads" name="output_accept" from_work_dir="accept_file.dat" + label="Matching reads on ${on_string} (${input_reads.datatype.file_ext})"> + <filter>outputs_selected and 'accept' in outputs_selected</filter> + </data> + <data format_source="input_reads" name="output_other" from_work_dir="other_file.dat" + label="Reads not found on ${on_string} (${input_reads.datatype.file_ext})"> + <filter>outputs_selected and 'other' in outputs_selected</filter> + </data> + <data format="txt" name="output_log" label="${tool.name} statistics (txt)" from_work_dir="log_file.log"> + <filter>log</filter> + </data> + </outputs> + <tests> + <test> + <param name="read_family_selector" value="I" /> + <param name="input_reads" value="sortmerna_wrapper_in1.fastq" /> + <param name="sequencing_type_selector" value="not_paired" /> + <param name="strand_search" value="" /> + <param name="databases_selector" value="cached" /> + <param name="input_databases" value="rfam-5.8s,rfam-5s" /> + <param name="outputs_selected" value="accept,other" /> + <param name="log" value="" /> + <param name="options_type_selector" value="less" /> + <output name="output_accept" file="sortmerna_wrapper_accept1.fastq" /> + <output name="output_other" file="sortmerna_wrapper_other1.fastq" /> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +SortMeRNA_ is a software designed to rapidly filter ribosomal RNA fragments +from metatransriptomic data produced by next-generation sequencers. +It is capable of handling large RNA databases and sorting out all fragments +matching to the database with high accuracy and specificity. + +.. _SortMeRNA: http://bioinfo.lifl.fr/RNA/sortmerna/ + + +**Input** + +The input is one file of reads in FASTA or FASTQ format and any number of rRNA databases to search against. +If the user has two foward-reverse paired-sequencing reads files, they may use +the script "merge_paired_reads.sh" to interleave the reads into one file, preserving their order. + +If the sequencing type for the reads is paired-ended, the user has two options under +"Sequencing type" to filter the reads and preserve their order in the file. +For a further example of each option, please refer to Section 4.2.3 in the `SortMeRNA User Manual`_. + +.. _sortmerna user manual: http://bioinfo.lifl.fr/RNA/sortmerna/code/SortMeRNA-user-manual-v1.7.pdf + + +**Output** + +The output will follow the same format (FASTA or FASTQ) as the reads. Optionally, a statistic file for the rRNA content of reads, as well as rRNA subunit distribution can be generated. + + +**rRNA databases** + +SortMeRNA is distributed with 8 representative rRNA databases, which were +all constructed from the SILVA SSU,LSU (version 111) and the RFAM 5/5.8S +(version 11.0) databases using the tool UCLUST. + ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ +| Representative database | id % | average id% | # seq (clustered) | Origin | # seq (original) | ++==========================+======+=============+===================+========================+===================+ +| SILVA 16S bacteria | 85 | 91.6 | 8174 | SILVA SSU Ref NR v.111 | 244077 | ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ +| SILVA 16S archaea | 95 | 96.7 | 3845 | SILVA SSU Ref NR v.111 | 10919 | ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ +| SILVA 18S eukarya | 95 | 96.7 | 4512 | SILVA SSU Ref NR v.111 | 31862 | ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ +| SILVA 23S bacteria | 98 | 99.4 | 3055 | SILVA LSU Ref v.111 | 19580 | ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ +| SILVA 23s archaea | 98 | 99.5 | 164 | SILVA LSU Ref v.111 | 405 | ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ +| SILVA 28S eukarya | 98 | 99.1 | 4578 | SILVA LSU Ref v.111 | 9321 | ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ +| Rfam 5S archaea/bacteria | 98 | 99.2 | 59513 | RFAM | 116760 | ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ +| Rfam 5.8S eukarya | 98 | 98.9 | 13034 | RFAM | 225185 | ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ + +id %: members of the cluster must have identity at least 'id %' identity with the representative sequence + +average id %: average identity of a cluster member to the representative sequence + +The user may also choose to use their own rRNA databases. + +.. class:: warningmark + +Note that your personal databases are indexed each time, and that +this may take some time depending on the size of the given database. +]]> + </help> + + <citations> + <citation type="doi">10.1093/bioinformatics/bts611</citation> + <citation type="doi">10.1093/nar/gks1219</citation> + <citation type="doi">10.1093/nar/gks1005</citation> + <citation type="doi">10.1093/bioinformatics/btq461</citation> + <citation type="doi">10.1038/nbt.2198</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/merged-paired-reads_output.fastq Mon Aug 03 08:18:26 2015 -0400 @@ -0,0 +1,50 @@ +@PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 +CTCGCGCCACTGGCTGGCTCTCCCCTCTCTCTCTCTCTCTCCCCTCTCTCTCCCCGAGTACACCCGCGCGGGC ++PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 +___cccccggggghhhhf_YbcgecZYa^Y^ceXccacXIXaafX`_]VMHVbdhSTFKU_Z]b]WFW_aZ__ +@PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 +CGCGCGCACCATGCCCGTGCTCGACTGCTCGCCGCCCGCGCCGCGCCACGCCTCGCCGTTCACCTTGACCTCCACGCGCAGGCC ++PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 +__beeeecggcgfhhhhhhhhhhfhhhhhhhhhhhgececcccccccacaccccaccccac`bbbcbbbcbbc[`_a[aLT[aa +@PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 +GCGGTCTGGAACCACCTGATCCCATCCCGAACTCAGCCGTGAAACAGACCAGCGCCGATGGTAGTGTGGCTTCTGCCCGT ++PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 +bbbeeeeegggggiiiiiiihiiihiiiiiiihiidhhigfhihiihgiiiiiggeccaccbbb_b`baccab`]_ba[T +@PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 +ATGCGCTTGAGGTAGCCAAGGGCACGCAGTTCACACCGTATGGCGGCGATGCGAGGCGGCGGCAGGAGG ++PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 +bbbeeeeegggggiiiiiiiiihfhiheghihgihiiihiiiiihifc^acac^_cEHVZaccaac^__ +@PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 +GCATCGCCGCCCTCGCCCTCGCCCCCGCCGCCCTCGCCCT ++PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 +___c`cc`Yeee[ePY_`UY^_eGL_F_`dUU\F\^Q\a_ +@PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 +TGACCAATGTTCTTCATAACGTCCCCGATAGACATGCCAACATTCGGTCCGTTGCGAAGATAGTCATCGCCGTCATAGTC ++PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 +_bbecdecgggggihhiiiihghhhiihiichdfghiffghfghiifiifhhhiiggaaaddddbdd`bccaQ\a`bc_b +@PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 +CCCCACTCACCACGCACGGCACCGCACGGCACTCTCACGGTTTCTCTCTCTCTCTCCCATCCTTCTCACTCCTTTCTCTCTCTCCCTATCTCTCTCTCTC ++PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 +bbbeeeeeggggghiiiiihfhifhhiihiagfhiiiiig^dceeeedcddddcccccRZ``bbcbcbbR_bb`b`bb`bbb`bbcbR]b_]]`bb_b_b +@PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 +GCATGCCAAACGTGGAGGCGGTGGGCGCGACGCCGGTGGGGAACTTGGACTTGTACATGGCCGGCCGCTGCGTCACCATCGA ++PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 +bbbeeeeeggggegifhiiii^fgh\edgffeeccc\acccWV_ccccbccccbbdccccccccaaacaZ]acaa^bccccb +@PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 +AGCCTGACGATAACCTACTTTCACACTGGTTGCAGCACTATCATCGGCGCAAAGTCGTTTCACGGTCCTGTTCGGGATGGGAAGGGGTGGGACCGACTTGC ++PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 +bbbeeeeegggggiiiiiiiiiihiiiiigiiiiiiiiiiiiiiiiiiiihiiggggeeeeddccccccccccccccccccbbccccHXaacccc]acacb +@PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 +GTTCGATTAGTCTTTCGCCCCTATACCCAAATTTGACGATCGATTTGCACGTCAGAATCGCTAC ++PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 +baaeeeeeggggghhifhiiihfgbghhhiihihfhihfhihdfghhiiifhiihdbfgddged +@PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 +GCCGAAGAAGGACGTGGGAATCTGCGATAAGCCTGGTGGAGTCGATAACCGGACGTTGAGACCAGGATTTCCGAATGGGGAAACCCCGCACGACGTGTCGT ++PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 +bbbeeeeegggggifgiiiiiiiiiihiihiiiiiicfhgheghiiihihggcecccccccccccccccccccccccccccaacccaccc__[_a[a^[_Q + +@PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 +GCGCTCGAGGAGGACGTGGCGCCCGAGGCTGCCGAGGCTG ++PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 +Z__cccc`ecg^_fe^[^^^Y_ffc]_`[eZ_c_W\]]b` +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sortmerna_wrapper_accept1.fastq Mon Aug 03 08:18:26 2015 -0400 @@ -0,0 +1,28 @@ +@PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 +CTCGCGCCACTGGCTGGCTCTCCCCTCTCTCTCTCTCTCTCCCCTCTCTCTCCCCGAGTACACCCGCGCGGGC ++PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 +___cccccggggghhhhf_YbcgecZYa^Y^ceXccacXIXaafX`_]VMHVbdhSTFKU_Z]b]WFW_aZ__ +@PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 +GCGGTCTGGAACCACCTGATCCCATCCCGAACTCAGCCGTGAAACAGACCAGCGCCGATGGTAGTGTGGCTTCTGCCCGT ++PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 +bbbeeeeegggggiiiiiiihiiihiiiiiiihiidhhigfhihiihgiiiiiggeccaccbbb_b`baccab`]_ba[T +@PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 +GCATCGCCGCCCTCGCCCTCGCCCCCGCCGCCCTCGCCCT ++PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 +___c`cc`Yeee[ePY_`UY^_eGL_F_`dUU\F\^Q\a_ +@PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 +CCCCACTCACCACGCACGGCACCGCACGGCACTCTCACGGTTTCTCTCTCTCTCTCCCATCCTTCTCACTCCTTTCTCTCTCTCCCTATCTCTCTCTCTC ++PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 +bbbeeeeeggggghiiiiihfhifhhiihiagfhiiiiig^dceeeedcddddcccccRZ``bbcbcbbR_bb`b`bb`bbb`bbcbR]b_]]`bb_b_b +@PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 +AGCCTGACGATAACCTACTTTCACACTGGTTGCAGCACTATCATCGGCGCAAAGTCGTTTCACGGTCCTGTTCGGGATGGGAAGGGGTGGGACCGACTTGC ++PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 +bbbeeeeegggggiiiiiiiiiihiiiiigiiiiiiiiiiiiiiiiiiiihiiggggeeeeddccccccccccccccccccbbccccHXaacccc]acacb +@PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 +GCCGAAGAAGGACGTGGGAATCTGCGATAAGCCTGGTGGAGTCGATAACCGGACGTTGAGACCAGGATTTCCGAATGGGGAAACCCCGCACGACGTGTCGT ++PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 +bbbeeeeegggggifgiiiiiiiiiihiihiiiiiicfhgheghiiihihggcecccccccccccccccccccccccccccaacccaccc__[_a[a^[_Q +@PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 +GCGCTCGAGGAGGACGTGGCGCCCGAGGCTGCCGAGGCTG ++PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 +Z__cccc`ecg^_fe^[^^^Y_ffc]_`[eZ_c_W\]]b`
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sortmerna_wrapper_in1.fastq Mon Aug 03 08:18:26 2015 -0400 @@ -0,0 +1,48 @@ +@PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 +CGCGCGCACCATGCCCGTGCTCGACTGCTCGCCGCCCGCGCCGCGCCACGCCTCGCCGTTCACCTTGACCTCCACGCGCAGGCC ++PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 +__beeeecggcgfhhhhhhhhhhfhhhhhhhhhhhgececcccccccacaccccaccccac`bbbcbbbcbbc[`_a[aLT[aa +@PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 +ATGCGCTTGAGGTAGCCAAGGGCACGCAGTTCACACCGTATGGCGGCGATGCGAGGCGGCGGCAGGAGG ++PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 +bbbeeeeegggggiiiiiiiiihfhiheghihgihiiihiiiiihifc^acac^_cEHVZaccaac^__ +@PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 +CTCGCGCCACTGGCTGGCTCTCCCCTCTCTCTCTCTCTCTCCCCTCTCTCTCCCCGAGTACACCCGCGCGGGC ++PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 +___cccccggggghhhhf_YbcgecZYa^Y^ceXccacXIXaafX`_]VMHVbdhSTFKU_Z]b]WFW_aZ__ +@PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 +GCGGTCTGGAACCACCTGATCCCATCCCGAACTCAGCCGTGAAACAGACCAGCGCCGATGGTAGTGTGGCTTCTGCCCGT ++PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 +bbbeeeeegggggiiiiiiihiiihiiiiiiihiidhhigfhihiihgiiiiiggeccaccbbb_b`baccab`]_ba[T +@PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 +GCATCGCCGCCCTCGCCCTCGCCCCCGCCGCCCTCGCCCT ++PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 +___c`cc`Yeee[ePY_`UY^_eGL_F_`dUU\F\^Q\a_ +@PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 +TGACCAATGTTCTTCATAACGTCCCCGATAGACATGCCAACATTCGGTCCGTTGCGAAGATAGTCATCGCCGTCATAGTC ++PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 +_bbecdecgggggihhiiiihghhhiihiichdfghiffghfghiifiifhhhiiggaaaddddbdd`bccaQ\a`bc_b +@PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 +GCATGCCAAACGTGGAGGCGGTGGGCGCGACGCCGGTGGGGAACTTGGACTTGTACATGGCCGGCCGCTGCGTCACCATCGA ++PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 +bbbeeeeeggggegifhiiii^fgh\edgffeeccc\acccWV_ccccbccccbbdccccccccaaacaZ]acaa^bccccb +@PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 +CCCCACTCACCACGCACGGCACCGCACGGCACTCTCACGGTTTCTCTCTCTCTCTCCCATCCTTCTCACTCCTTTCTCTCTCTCCCTATCTCTCTCTCTC ++PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 +bbbeeeeeggggghiiiiihfhifhhiihiagfhiiiiig^dceeeedcddddcccccRZ``bbcbcbbR_bb`b`bb`bbb`bbcbR]b_]]`bb_b_b +@PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 +AGCCTGACGATAACCTACTTTCACACTGGTTGCAGCACTATCATCGGCGCAAAGTCGTTTCACGGTCCTGTTCGGGATGGGAAGGGGTGGGACCGACTTGC ++PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 +bbbeeeeegggggiiiiiiiiiihiiiiigiiiiiiiiiiiiiiiiiiiihiiggggeeeeddccccccccccccccccccbbccccHXaacccc]acacb +@PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 +GCCGAAGAAGGACGTGGGAATCTGCGATAAGCCTGGTGGAGTCGATAACCGGACGTTGAGACCAGGATTTCCGAATGGGGAAACCCCGCACGACGTGTCGT ++PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 +bbbeeeeegggggifgiiiiiiiiiihiihiiiiiicfhgheghiiihihggcecccccccccccccccccccccccccccaacccaccc__[_a[a^[_Q +@PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 +GTTCGATTAGTCTTTCGCCCCTATACCCAAATTTGACGATCGATTTGCACGTCAGAATCGCTAC ++PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 +baaeeeeeggggghhifhiiihfgbghhhiihihfhihfhihdfghhiiifhiihdbfgddged +@PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 +GCGCTCGAGGAGGACGTGGCGCCCGAGGCTGCCGAGGCTG ++PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 +Z__cccc`ecg^_fe^[^^^Y_ffc]_`[eZ_c_W\]]b`
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sortmerna_wrapper_other1.fastq Mon Aug 03 08:18:26 2015 -0400 @@ -0,0 +1,20 @@ +@PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 +CGCGCGCACCATGCCCGTGCTCGACTGCTCGCCGCCCGCGCCGCGCCACGCCTCGCCGTTCACCTTGACCTCCACGCGCAGGCC ++PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 +__beeeecggcgfhhhhhhhhhhfhhhhhhhhhhhgececcccccccacaccccaccccac`bbbcbbbcbbc[`_a[aLT[aa +@PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 +ATGCGCTTGAGGTAGCCAAGGGCACGCAGTTCACACCGTATGGCGGCGATGCGAGGCGGCGGCAGGAGG ++PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 +bbbeeeeegggggiiiiiiiiihfhiheghihgihiiihiiiiihifc^acac^_cEHVZaccaac^__ +@PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 +TGACCAATGTTCTTCATAACGTCCCCGATAGACATGCCAACATTCGGTCCGTTGCGAAGATAGTCATCGCCGTCATAGTC ++PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 +_bbecdecgggggihhiiiihghhhiihiichdfghiffghfghiifiifhhhiiggaaaddddbdd`bccaQ\a`bc_b +@PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 +GCATGCCAAACGTGGAGGCGGTGGGCGCGACGCCGGTGGGGAACTTGGACTTGTACATGGCCGGCCGCTGCGTCACCATCGA ++PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 +bbbeeeeeggggegifhiiii^fgh\edgffeeccc\acccWV_ccccbccccbbdccccccccaaacaZ]acaa^bccccb +@PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 +GTTCGATTAGTCTTTCGCCCCTATACCCAAATTTGACGATCGATTTGCACGTCAGAATCGCTAC ++PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 +baaeeeeeggggghhifhiiihfgbghhhiihihfhihfhihdfghhiiifhiihdbfgddged
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/rRNA_databases.loc Mon Aug 03 08:18:26 2015 -0400 @@ -0,0 +1,8 @@ +rfam-5.8s Database Rfam 5.8s $SORTMERNADIR/rRNA_databases/rfam-5.8s-database-id98.fasta +rfam-5s Database Rfam 5s $SORTMERNADIR/rRNA_databases/rfam-5s-database-id98.fasta +silva-arc-16s Database Silva-Arc 16s $SORTMERNADIR/rRNA_databases/silva-arc-16s-database-id95.fasta +silva-arc-23s Database Silva-Arc 23s $SORTMERNADIR/rRNA_databases/silva-arc-23s-database-id98.fasta +silva-bac-16s Database Silva-Bac 16s $SORTMERNADIR/rRNA_databases/silva-bac-16s-database-id85.fasta +silva-bac-23s Database Silva-Bac 23s $SORTMERNADIR/rRNA_databases/silva-bac-23s-database-id98.fasta +silva-euk-18s Databse Silva-Euk 18s $SORTMERNADIR/rRNA_databases/silva-euk-18s-database-id95.fasta +silva-euk-28s Database Silva-Euk 28s $SORTMERNADIR/rRNA_databases/silva-euk-28s-database-id98.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/rRNA_databases.loc.sample Mon Aug 03 08:18:26 2015 -0400 @@ -0,0 +1,26 @@ +#This is a sample file distributed with Galaxy that is used to define a +#list of public ribosomal databases, using three columns tab separated +#(longer whitespace are TAB characters): +# +#<unique_id> <database_caption> <base_name_path> +# +#It is important that the actual database name does not have a space in it, +#and that the first tab that appears in the line is right before the path. +# +#So, for example, if your database is rfam-5.8s and the path to your base name +#is /data/rRNA_databases/rfam-5.8s, then the rRNA_databases.loc entry would look like this: +# +#rfam-5.8s Rfam 5.8S eukarya /data/rRNA_databases/rfam-5.8s +# +#Since SortMeRNA comes bundled with eight ribosomal databases, which are ready +#for use after the tool installation, this sample file is in fact an actual file +#to save the user the trouble of setting it. +# +rfam-5.8s Rfam 5.8S eukarya $SORTMERNADIR/rRNA_databases/rfam-5.8s-database-id98.fasta +rfam-5s Rfam 5S archaea/bacteria $SORTMERNADIR/rRNA_databases/rfam-5s-database-id98.fasta +silva-arc-16s SILVA 16S archaea $SORTMERNADIR/rRNA_databases/silva-arc-16s-database-id95.fasta +silva-arc-23s SILVA 16S bacteria $SORTMERNADIR/rRNA_databases/silva-arc-23s-database-id98.fasta +silva-bac-16s SILVA 16S bacteria $SORTMERNADIR/rRNA_databases/silva-bac-16s-database-id85.fasta +silva-bac-23s SILVA 23S bacteria $SORTMERNADIR/rRNA_databases/silva-bac-23s-database-id98.fasta +silva-euk-18s SILVA 18S eukarya $SORTMERNADIR/rRNA_databases/silva-euk-18s-database-id95.fasta +silva-euk-28s SILVA 28S eukarya $SORTMERNADIR/rRNA_databases/silva-euk-28s-database-id98.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Mon Aug 03 08:18:26 2015 -0400 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of public ribosomal databases --> + <table name="rRNA_databases" comment_char="#"> + <columns>value, name, path</columns> + <file path="tool-data/rRNA_databases.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Aug 03 08:18:26 2015 -0400 @@ -0,0 +1,18 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="sortmerna" version="1.9"> + <install version="1.0"> + <actions> + <action type="download_by_url">http://bioinfo.lifl.fr/RNA/sortmerna/code/sortmerna-1.9.tar.gz</action> + <action type="autoconf"/> + <action type="set_environment"> + <environment_variable name="SORTMERNADIR" action="set_to">$INSTALL_DIR</environment_variable> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + <readme> + SortMeRNA requires g++ 4.3 or later. Installation may take a moment since ribosomal databases have to be indexed. + </readme> + </package> +</tool_dependency>