# HG changeset patch # User bonsai # Date 1367341955 14400 # Node ID 2e7f0da431e319ca1db7e92523108bf8e29a082f Uploaded version 1.0 diff -r 000000000000 -r 2e7f0da431e3 merge_paired_reads.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_paired_reads.xml Tue Apr 30 13:12:35 2013 -0400 @@ -0,0 +1,38 @@ + + + sortmerna + + Merges two fastq paired-reads files into one file. + +merge-paired-reads.sh $input_reads1 $input_reads2 output_file.txt + + + + + + + + + + + + + + + + + + + + +Merges two fastq paired-reads files into one file. + +To run merge-paired-reads:: + + bash merge-paired-reads.sh file1.fastq file2.fastq outputfile.fastq + + \ No newline at end of file diff -r 000000000000 -r 2e7f0da431e3 sortmerna.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sortmerna.txt Tue Apr 30 13:12:35 2013 -0400 @@ -0,0 +1,15 @@ +Galaxy tool to execute SortMeRNA +================================ + +This tool is a wrapper for SortMeRNA, a fast and accurate filtering of +ribosomal RNAs in metatranscriptomic data. + +See http://bioinfo.lifl.fr/RNA/sortmerna/ for more information. + + +Automated Installation +====================== + +Galaxy should automatically download and install SortMeRNA. + + diff -r 000000000000 -r 2e7f0da431e3 sortmerna_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sortmerna_wrapper.py Tue Apr 30 13:12:35 2013 -0400 @@ -0,0 +1,72 @@ +#!/usr/bin/env python + +""" +Runs SortMeRNA +""" + +import subprocess +import optparse +import shlex + + +def main(): + """Parse the command line, exectutes SortMeRNA and buildtrie if neeeded.""" + #TODO: Put all SortMeRNA options in the command-line parser + parser = optparse.OptionParser() + parser.add_option('--sortmerna', dest='sortmerna_cmd', help='') + parser.add_option('--buildtrie', dest='buildtrie', + default=False, action='store_true', help='') + (options, args) = parser.parse_args() + if not args: + raise Exception('Please provide at least one database') + + if options.buildtrie: + buildtrie = 'buildtrie' + for database in args: + run_buildtrie([buildtrie, '--db', database]) + + if options.sortmerna_cmd: + sortmerna = 'sortmerna' + run_sortmerna([sortmerna] + + shlex.split(options.sortmerna_cmd) + + ['-m', '262144', '-n', str(len(args)), '--db'] + + args) + + +def run_buildtrie(cmd): + """Run the BuildTrie program.""" + try: + stdout_arg = subprocess.PIPE + stderr_arg = subprocess.PIPE + child_process = subprocess.Popen(args=" ".join(cmd), shell=True, + stdin=None, stdout=stdout_arg, + stderr=stderr_arg) + stdout_str, stderr_str = child_process.communicate() + return_code = child_process.returncode + if return_code is not 0: + raise Exception(stderr_str) + + except Exception, error: + raise Exception('Error while running Buildtrie:\n' + + '\n'.join([str(error), stdout_str, stderr_str])) + + +def run_sortmerna(cmd): + """Run the SortMeRNA program.""" + try: + stdout_arg = subprocess.PIPE + stderr_arg = subprocess.PIPE + child_process = subprocess.Popen(args=" ".join(cmd), shell=True, + stdin=None, stdout=stdout_arg, + stderr=stderr_arg) + stdout_str, stderr_str = child_process.communicate() + return_code = child_process.returncode + if return_code is not 0: + raise Exception(stderr_str) + except Exception, error: + raise Exception('Error while running SortMeRNA:\n' + + '\n'.join([str(error), stdout_str, stderr_str])) + + +if __name__ == "__main__": + main() diff -r 000000000000 -r 2e7f0da431e3 sortmerna_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sortmerna_wrapper.xml Tue Apr 30 13:12:35 2013 -0400 @@ -0,0 +1,354 @@ + + + + sortmerna + + Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data + +sortmerna_wrapper.py +--sortmerna " +$strand_search +#if str( $read_family.read_family_selector ) == 'other': + --I $input_reads -r $read_family.ratio_parameter +#else: + $read_family.read_family_selector $input_reads +#end if +#if str( $sequencing_type.sequencing_type_selector ) == 'paired': + $sequencing_type.paired_type +#end if + +#if $outputs_selected: + #if 'accept' in $outputs_selected.value: + --accept accept_file + #end if + #if 'other' in $outputs_selected.value: + --other other_file + #end if +#end if +$log +#if str( $options.options_type_selector ) == 'more': + -a $options.number_of_threads +#end if +" +#if str( $databases_type.databases_selector ) == 'history': + --buildtrie + #for $db in $databases_type.input_databases + $db.database_name + #end for +#else: + ## databases path is not directly accessible, must match by hand with LOC file contents + ${' '.join([dict([(x[0], x[2]) for x in $databases_type.input_databases.input.options.tool_data_table.data])[y] + for y in $databases_type.input_databases.value])} +#end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + outputs_selected and 'accept' in outputs_selected + + + outputs_selected and 'other' in outputs_selected + + + log + + + + + + + + + + + + + + + + + + + + + + +**Overview** + +SortMeRNA_ is a software designed to rapidly filter ribosomal RNA fragments +from metatransriptomic data produced by next-generation sequencers. +It is capable of handling large RNA databases and sorting out all fragments +matching to the database with high accuracy and specificity. + +.. _SortMeRNA: http://bioinfo.lifl.fr/RNA/sortmerna/ + +If you use this tool, please cite Kopylova E., Noé L. and Touzet H., +`"SortMeRNA: Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data"`__, +Bioinformatics (2012), doi: 10.1093/bioinformatics/bts611. + +.. __: http://bioinformatics.oxfordjournals.org/content/28/24/3211 + +------ + +**Input** + +The input is one file of reads in FASTA or FASTQ format and any number of rRNA databases to search against. +If the user has two foward-reverse paired-sequencing reads files, they may use +the script "merge_paired_reads.sh" to interleave the reads into one file, preserving their order. + +If the sequencing type for the reads is paired-ended, the user has two options under +"Sequencing type" to filter the reads and preserve their order in the file. +For a further example of each option, please refer to Section 4.2.3 in the `SortMeRNA User Manual`_. + +.. _sortmerna user manual: http://bioinfo.lifl.fr/RNA/sortmerna/code/SortMeRNA-user-manual-v1.7.pdf + +------ + +**Output** + +The output will follow the same format (FASTA or FASTQ) as the reads. + +In the standalone version of SortMeRNA, the user may output the matching reads in a separate file per database (--bydbs option). This option will be made available in a future version of Galaxy. + +------ + +**rRNA databases** + +SortMeRNA is distributed with 8 representative rRNA databases, which were +all constructed from the SILVA SSU,LSU (version 111) and the RFAM 5/5.8S +(version 11.0) databases using the tool UCLUST. + ++--------------------------+------+--------------+-------+------------------------+--------+--------------------+ +| Representative database | id % | avergage id% | # seq | Origin | # seq | filtered to remove | ++==========================+======+==============+=======+========================+========+====================+ +| SILVA 16S bacteria | 85 | 91.6 | 8174 | SILVA SSU Ref NR v.111 | 244077 | 23s | ++--------------------------+------+--------------+-------+------------------------+--------+--------------------+ +| SILVA 16S archaea | 95 | 96.7 | 3845 | SILVA SSU Ref NR v.111 | 10919 | 23s | ++--------------------------+------+--------------+-------+------------------------+--------+--------------------+ +| SILVA 18S eukarya | 95 | 96.7 | 4512 | SILVA SSU Ref NR v.111 | 31862 | 26s,28s,23s | ++--------------------------+------+--------------+-------+------------------------+--------+--------------------+ +| | ++--------------------------+------+--------------+-------+------------------------+--------+--------------------+ +| SILVA 23S bacteria | 98 | 99.4 | 3055 | SILVA LSU Ref v.111 | 19580 | 16s,26s,28s | ++--------------------------+------+--------------+-------+------------------------+--------+--------------------+ +| SILVA 23s archaea | 98 | 99.5 | 164 | SILVA LSU Ref v.111 | 405 | 16s,26s,28s | ++--------------------------+------+--------------+-------+------------------------+--------+--------------------+ +| SILVA 28S eukarya | 98 | 99.1 | 4578 | SILVA LSU Ref v.111 | 9321 | 18s | ++--------------------------+------+--------------+-------+------------------------+--------+--------------------+ +| | ++--------------------------+------+--------------+-------+------------------------+--------+--------------------+ +| Rfam 5S archaea/bacteria | 98 | 99.2 | 59513 | RFAM | 116760 | | ++--------------------------+------+--------------+-------+------------------------+--------+--------------------+ +| Rfam 5.8S eukarya | 98 | 98.9 | 13034 | RFAM | 225185 | | ++--------------------------+------+--------------+-------+------------------------+--------+--------------------+ + + +id % : + members of the cluster must have identity at least 'id %' identity with the representative sequence + +average id % : + average identity of a cluster member to the representative sequence + +The user may also choose to use their own rRNA databases. + +.. class:: warningmark + +Note that your personal databases are indexed each time, and that +this may take some time depending on the size of the given database. + +------ + +**SortMeRNA parameter list** + +The standalone, command-line version of SortMeRNA uses the following parameters. + +For indexing (buildtrie): + +This program builds a Burst trie on an input rRNA database file in fasta format +and stores the material in binary files under the folder '/automata':: + + ./buildtrie --db [path to rrnas database file name {.fasta}] {OPTIONS} + +The list of OPTIONS can be left blank, the default values will be used:: + + -L length of the sliding window (the seed) + (default: 18) + + -F search only the forward strand + -R search only the reverse-complementary strand + (default: both strands are searched) + + -h help + + + + +For sorting (sortmerna): + +To run SortMeRNA, type in any order after 'sortmerna':: + + --I [illumina reads file name {fasta/fastq}] + + --454 [roche 454 reads file name {fasta/fastq}] + + -n number of databases to use (must precede --db) + + --db [rrnas database name(s)] + + One database, + ex 1. -n 1 --db /path1/database1.fasta + + Multiple databases, + ex 2. -n 2 --db /path2/database2.fasta /path3/database3.fasta + + {OPTIONS} + +The list of OPTIONS can be left blank, the default values will be used:: + + --accept [accepted reads file name] + --other [rejected reads file name] + (default: no output file is created) + + --bydbs output the accepted reads by database + (default: concatenated file of reads) + + --log [overall statistics file name] + (default: no statistics file created) + + --paired-in put both paired-end reads into --accept file + --paired-out put both paired-end reads into --other file + (default: if one read is accepted and the other is not, + separate the reads into --accept and --other files) + + -r ratio of the number of hits on the read / read length + (default Illumina: 0.25, Roche 454: 0.15) + + -F search only the forward strand + -R search only the reverse-complementary strand + (default: both strands are searched) + + -a number of threads to use + (default: 1) + + -m (m x 4096 bytes) for loading the reads into memory + ex. '-m 4' means 4*4096 = 16384 bytes will be allocated for the reads + note: maximum -m is 1020039 + (default: m = 262144 = 1GB) + + -v verbose + (default: deactivated) + + -h help + + --version version number + +------ + +**Bibliography** + +[1] Quast C, Pruesse E, Yilmaz P, Gerken J, Schweer T, Yarza P, Peplies J, Glöckner FO (2013) The SILVA ribosomal RNA gene database project: improved data processing and web-based tools, Nucleic Acids Research, 41 (D1): D590-D596. + +[2] Rfam 11.0: 10 years of RNA families. S.W. Burge, J. Daub, R. Eberhardt, J. Tate, L. Barquist, E.P. Nawrocki, S.R. Eddy, P.P. Gardner, A. Bateman. Nucleic Acids Research (2012), doi: 10.1093/nar/gks1005 + +[3] Edgar, R.C. (2010) Search and clustering orders of magnitude faster than BLAST, Bioinformatics 26(19), 2460-2461, doi: 10.1093/bioinformatics/btq461 + +[4] Loman, N. J. and Misra, Raju V and Dallman, Timothy J and Constantinidou, Chrystala and Gharbia, Saheer E and Wain, John and Pallen, Mark J., Performance comparison of benchtop high-throughput sequencing platforms (2012), Nature Biotechnology, 30 (5). pp. 434-439 + + diff -r 000000000000 -r 2e7f0da431e3 test-data/merged-paired-reads_output.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/merged-paired-reads_output.fastq Tue Apr 30 13:12:35 2013 -0400 @@ -0,0 +1,50 @@ +@PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 +CTCGCGCCACTGGCTGGCTCTCCCCTCTCTCTCTCTCTCTCCCCTCTCTCTCCCCGAGTACACCCGCGCGGGC ++PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 +___cccccggggghhhhf_YbcgecZYa^Y^ceXccacXIXaafX`_]VMHVbdhSTFKU_Z]b]WFW_aZ__ +@PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 +CGCGCGCACCATGCCCGTGCTCGACTGCTCGCCGCCCGCGCCGCGCCACGCCTCGCCGTTCACCTTGACCTCCACGCGCAGGCC ++PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 +__beeeecggcgfhhhhhhhhhhfhhhhhhhhhhhgececcccccccacaccccaccccac`bbbcbbbcbbc[`_a[aLT[aa +@PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 +GCGGTCTGGAACCACCTGATCCCATCCCGAACTCAGCCGTGAAACAGACCAGCGCCGATGGTAGTGTGGCTTCTGCCCGT ++PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 +bbbeeeeegggggiiiiiiihiiihiiiiiiihiidhhigfhihiihgiiiiiggeccaccbbb_b`baccab`]_ba[T +@PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 +ATGCGCTTGAGGTAGCCAAGGGCACGCAGTTCACACCGTATGGCGGCGATGCGAGGCGGCGGCAGGAGG ++PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 +bbbeeeeegggggiiiiiiiiihfhiheghihgihiiihiiiiihifc^acac^_cEHVZaccaac^__ +@PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 +GCATCGCCGCCCTCGCCCTCGCCCCCGCCGCCCTCGCCCT ++PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 +___c`cc`Yeee[ePY_`UY^_eGL_F_`dUU\F\^Q\a_ +@PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 +TGACCAATGTTCTTCATAACGTCCCCGATAGACATGCCAACATTCGGTCCGTTGCGAAGATAGTCATCGCCGTCATAGTC ++PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 +_bbecdecgggggihhiiiihghhhiihiichdfghiffghfghiifiifhhhiiggaaaddddbdd`bccaQ\a`bc_b +@PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 +CCCCACTCACCACGCACGGCACCGCACGGCACTCTCACGGTTTCTCTCTCTCTCTCCCATCCTTCTCACTCCTTTCTCTCTCTCCCTATCTCTCTCTCTC ++PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 +bbbeeeeeggggghiiiiihfhifhhiihiagfhiiiiig^dceeeedcddddcccccRZ``bbcbcbbR_bb`b`bb`bbb`bbcbR]b_]]`bb_b_b +@PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 +GCATGCCAAACGTGGAGGCGGTGGGCGCGACGCCGGTGGGGAACTTGGACTTGTACATGGCCGGCCGCTGCGTCACCATCGA ++PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 +bbbeeeeeggggegifhiiii^fgh\edgffeeccc\acccWV_ccccbccccbbdccccccccaaacaZ]acaa^bccccb +@PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 +AGCCTGACGATAACCTACTTTCACACTGGTTGCAGCACTATCATCGGCGCAAAGTCGTTTCACGGTCCTGTTCGGGATGGGAAGGGGTGGGACCGACTTGC ++PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 +bbbeeeeegggggiiiiiiiiiihiiiiigiiiiiiiiiiiiiiiiiiiihiiggggeeeeddccccccccccccccccccbbccccHXaacccc]acacb +@PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 +GTTCGATTAGTCTTTCGCCCCTATACCCAAATTTGACGATCGATTTGCACGTCAGAATCGCTAC ++PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 +baaeeeeeggggghhifhiiihfgbghhhiihihfhihfhihdfghhiiifhiihdbfgddged +@PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 +GCCGAAGAAGGACGTGGGAATCTGCGATAAGCCTGGTGGAGTCGATAACCGGACGTTGAGACCAGGATTTCCGAATGGGGAAACCCCGCACGACGTGTCGT ++PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 +bbbeeeeegggggifgiiiiiiiiiihiihiiiiiicfhgheghiiihihggcecccccccccccccccccccccccccccaacccaccc__[_a[a^[_Q + +@PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 +GCGCTCGAGGAGGACGTGGCGCCCGAGGCTGCCGAGGCTG ++PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 +Z__cccc`ecg^_fe^[^^^Y_ffc]_`[eZ_c_W\]]b` + diff -r 000000000000 -r 2e7f0da431e3 test-data/sortmerna_wrapper_accept1.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sortmerna_wrapper_accept1.fastq Tue Apr 30 13:12:35 2013 -0400 @@ -0,0 +1,28 @@ +@PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 +CTCGCGCCACTGGCTGGCTCTCCCCTCTCTCTCTCTCTCTCCCCTCTCTCTCCCCGAGTACACCCGCGCGGGC ++PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 +___cccccggggghhhhf_YbcgecZYa^Y^ceXccacXIXaafX`_]VMHVbdhSTFKU_Z]b]WFW_aZ__ +@PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 +GCGGTCTGGAACCACCTGATCCCATCCCGAACTCAGCCGTGAAACAGACCAGCGCCGATGGTAGTGTGGCTTCTGCCCGT ++PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 +bbbeeeeegggggiiiiiiihiiihiiiiiiihiidhhigfhihiihgiiiiiggeccaccbbb_b`baccab`]_ba[T +@PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 +GCATCGCCGCCCTCGCCCTCGCCCCCGCCGCCCTCGCCCT ++PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 +___c`cc`Yeee[ePY_`UY^_eGL_F_`dUU\F\^Q\a_ +@PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 +CCCCACTCACCACGCACGGCACCGCACGGCACTCTCACGGTTTCTCTCTCTCTCTCCCATCCTTCTCACTCCTTTCTCTCTCTCCCTATCTCTCTCTCTC ++PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 +bbbeeeeeggggghiiiiihfhifhhiihiagfhiiiiig^dceeeedcddddcccccRZ``bbcbcbbR_bb`b`bb`bbb`bbcbR]b_]]`bb_b_b +@PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 +AGCCTGACGATAACCTACTTTCACACTGGTTGCAGCACTATCATCGGCGCAAAGTCGTTTCACGGTCCTGTTCGGGATGGGAAGGGGTGGGACCGACTTGC ++PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 +bbbeeeeegggggiiiiiiiiiihiiiiigiiiiiiiiiiiiiiiiiiiihiiggggeeeeddccccccccccccccccccbbccccHXaacccc]acacb +@PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 +GCCGAAGAAGGACGTGGGAATCTGCGATAAGCCTGGTGGAGTCGATAACCGGACGTTGAGACCAGGATTTCCGAATGGGGAAACCCCGCACGACGTGTCGT ++PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 +bbbeeeeegggggifgiiiiiiiiiihiihiiiiiicfhgheghiiihihggcecccccccccccccccccccccccccccaacccaccc__[_a[a^[_Q +@PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 +GCGCTCGAGGAGGACGTGGCGCCCGAGGCTGCCGAGGCTG ++PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 +Z__cccc`ecg^_fe^[^^^Y_ffc]_`[eZ_c_W\]]b` diff -r 000000000000 -r 2e7f0da431e3 test-data/sortmerna_wrapper_in1.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sortmerna_wrapper_in1.fastq Tue Apr 30 13:12:35 2013 -0400 @@ -0,0 +1,48 @@ +@PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 +CGCGCGCACCATGCCCGTGCTCGACTGCTCGCCGCCCGCGCCGCGCCACGCCTCGCCGTTCACCTTGACCTCCACGCGCAGGCC ++PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 +__beeeecggcgfhhhhhhhhhhfhhhhhhhhhhhgececcccccccacaccccaccccac`bbbcbbbcbbc[`_a[aLT[aa +@PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 +ATGCGCTTGAGGTAGCCAAGGGCACGCAGTTCACACCGTATGGCGGCGATGCGAGGCGGCGGCAGGAGG ++PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 +bbbeeeeegggggiiiiiiiiihfhiheghihgihiiihiiiiihifc^acac^_cEHVZaccaac^__ +@PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 +CTCGCGCCACTGGCTGGCTCTCCCCTCTCTCTCTCTCTCTCCCCTCTCTCTCCCCGAGTACACCCGCGCGGGC ++PHOSPHORE_0118:4:1101:5073:2473#ATCACG/1 +___cccccggggghhhhf_YbcgecZYa^Y^ceXccacXIXaafX`_]VMHVbdhSTFKU_Z]b]WFW_aZ__ +@PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 +GCGGTCTGGAACCACCTGATCCCATCCCGAACTCAGCCGTGAAACAGACCAGCGCCGATGGTAGTGTGGCTTCTGCCCGT ++PHOSPHORE_0118:4:1101:17780:2432#ATCACG/1 +bbbeeeeegggggiiiiiiihiiihiiiiiiihiidhhigfhihiihgiiiiiggeccaccbbb_b`baccab`]_ba[T +@PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 +GCATCGCCGCCCTCGCCCTCGCCCCCGCCGCCCTCGCCCT ++PHOSPHORE_0118:4:1101:7859:3729#ATCACG/1 +___c`cc`Yeee[ePY_`UY^_eGL_F_`dUU\F\^Q\a_ +@PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 +TGACCAATGTTCTTCATAACGTCCCCGATAGACATGCCAACATTCGGTCCGTTGCGAAGATAGTCATCGCCGTCATAGTC ++PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 +_bbecdecgggggihhiiiihghhhiihiichdfghiffghfghiifiifhhhiiggaaaddddbdd`bccaQ\a`bc_b +@PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 +GCATGCCAAACGTGGAGGCGGTGGGCGCGACGCCGGTGGGGAACTTGGACTTGTACATGGCCGGCCGCTGCGTCACCATCGA ++PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 +bbbeeeeeggggegifhiiii^fgh\edgffeeccc\acccWV_ccccbccccbbdccccccccaaacaZ]acaa^bccccb +@PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 +CCCCACTCACCACGCACGGCACCGCACGGCACTCTCACGGTTTCTCTCTCTCTCTCCCATCCTTCTCACTCCTTTCTCTCTCTCCCTATCTCTCTCTCTC ++PHOSPHORE_0118:4:1101:7007:4197#ATCACG/1 +bbbeeeeeggggghiiiiihfhifhhiihiagfhiiiiig^dceeeedcddddcccccRZ``bbcbcbbR_bb`b`bb`bbb`bbcbR]b_]]`bb_b_b +@PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 +AGCCTGACGATAACCTACTTTCACACTGGTTGCAGCACTATCATCGGCGCAAAGTCGTTTCACGGTCCTGTTCGGGATGGGAAGGGGTGGGACCGACTTGC ++PHOSPHORE_0118:4:1101:7113:4916#ATCACG/1 +bbbeeeeegggggiiiiiiiiiihiiiiigiiiiiiiiiiiiiiiiiiiihiiggggeeeeddccccccccccccccccccbbccccHXaacccc]acacb +@PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 +GCCGAAGAAGGACGTGGGAATCTGCGATAAGCCTGGTGGAGTCGATAACCGGACGTTGAGACCAGGATTTCCGAATGGGGAAACCCCGCACGACGTGTCGT ++PHOSPHORE_0118:4:1101:11597:5204#ATCACG/1 +bbbeeeeegggggifgiiiiiiiiiihiihiiiiiicfhgheghiiihihggcecccccccccccccccccccccccccccaacccaccc__[_a[a^[_Q +@PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 +GTTCGATTAGTCTTTCGCCCCTATACCCAAATTTGACGATCGATTTGCACGTCAGAATCGCTAC ++PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 +baaeeeeeggggghhifhiiihfgbghhhiihihfhihfhihdfghhiiifhiihdbfgddged +@PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 +GCGCTCGAGGAGGACGTGGCGCCCGAGGCTGCCGAGGCTG ++PHOSPHORE_0118:4:1101:13427:6518#ATCACG/1 +Z__cccc`ecg^_fe^[^^^Y_ffc]_`[eZ_c_W\]]b` diff -r 000000000000 -r 2e7f0da431e3 test-data/sortmerna_wrapper_other1.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sortmerna_wrapper_other1.fastq Tue Apr 30 13:12:35 2013 -0400 @@ -0,0 +1,20 @@ +@PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 +CGCGCGCACCATGCCCGTGCTCGACTGCTCGCCGCCCGCGCCGCGCCACGCCTCGCCGTTCACCTTGACCTCCACGCGCAGGCC ++PHOSPHORE_0118:4:1101:1143:2236#ATCACG/1 +__beeeecggcgfhhhhhhhhhhfhhhhhhhhhhhgececcccccccacaccccaccccac`bbbcbbbcbbc[`_a[aLT[aa +@PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 +ATGCGCTTGAGGTAGCCAAGGGCACGCAGTTCACACCGTATGGCGGCGATGCGAGGCGGCGGCAGGAGG ++PHOSPHORE_0118:4:1101:1397:2156#ATCACG/1 +bbbeeeeegggggiiiiiiiiihfhiheghihgihiiihiiiiihifc^acac^_cEHVZaccaac^__ +@PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 +TGACCAATGTTCTTCATAACGTCCCCGATAGACATGCCAACATTCGGTCCGTTGCGAAGATAGTCATCGCCGTCATAGTC ++PHOSPHORE_0118:4:1101:1633:2146#ATCACG/1 +_bbecdecgggggihhiiiihghhhiihiichdfghiffghfghiifiifhhhiiggaaaddddbdd`bccaQ\a`bc_b +@PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 +GCATGCCAAACGTGGAGGCGGTGGGCGCGACGCCGGTGGGGAACTTGGACTTGTACATGGCCGGCCGCTGCGTCACCATCGA ++PHOSPHORE_0118:4:1101:1719:2154#ATCACG/1 +bbbeeeeeggggegifhiiii^fgh\edgffeeccc\acccWV_ccccbccccbbdccccccccaaacaZ]acaa^bccccb +@PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 +GTTCGATTAGTCTTTCGCCCCTATACCCAAATTTGACGATCGATTTGCACGTCAGAATCGCTAC ++PHOSPHORE_0118:4:1101:1730:2193#ATCACG/1 +baaeeeeeggggghhifhiiihfgbghhhiihihfhihfhihdfghhiiifhiihdbfgddged diff -r 000000000000 -r 2e7f0da431e3 tool-data/rRNA_databases.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/rRNA_databases.loc Tue Apr 30 13:12:35 2013 -0400 @@ -0,0 +1,8 @@ +rfam-5.8s Database Rfam 5.8s $SORTMERNADIR/rRNA_databases/rfam-5.8s-database-id98.fasta +rfam-5s Database Rfam 5s $SORTMERNADIR/rRNA_databases/rfam-5s-database-id98.fasta +silva-arc-16s Database Silva-Arc 16s $SORTMERNADIR/rRNA_databases/silva-arc-16s-database-id95.fasta +silva-arc-23s Database Silva-Arc 23s $SORTMERNADIR/rRNA_databases/silva-arc-23s-database-id98.fasta +silva-bac-16s Database Silva-Bac 16s $SORTMERNADIR/rRNA_databases/silva-bac-16s-database-id85.fasta +silva-bac-23s Database Silva-Bac 23s $SORTMERNADIR/rRNA_databases/silva-bac-23s-database-id98.fasta +silva-euk-18s Databse Silva-Euk 18s $SORTMERNADIR/rRNA_databases/silva-euk-18s-database-id95.fasta +silva-euk-28s Database Silva-Euk 28s $SORTMERNADIR/rRNA_databases/silva-euk-28s-database-id98.fasta diff -r 000000000000 -r 2e7f0da431e3 tool-data/rRNA_databases.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/rRNA_databases.loc.sample Tue Apr 30 13:12:35 2013 -0400 @@ -0,0 +1,26 @@ +#This is a sample file distributed with Galaxy that is used to define a +#list of public ribosomal databases, using three columns tab separated +#(longer whitespace are TAB characters): +# +# +# +#It is important that the actual database name does not have a space in it, +#and that the first tab that appears in the line is right before the path. +# +#So, for example, if your database is rfam-5.8s and the path to your base name +#is /data/rRNA_databases/rfam-5.8s, then the rRNA_databases.loc entry would look like this: +# +#rfam-5.8s Rfam 5.8S eukarya /data/rRNA_databases/rfam-5.8s +# +#Since SortMeRNA comes bundled with eight ribosomal databases, which are ready +#for use after the tool installation, this sample file is in fact an actual file +#to save the user the trouble of setting it. +# +rfam-5.8s Rfam 5.8S eukarya $SORTMERNADIR/rRNA_databases/rfam-5.8s-database-id98.fasta +rfam-5s Rfam 5S archaea/bacteria $SORTMERNADIR/rRNA_databases/rfam-5s-database-id98.fasta +silva-arc-16s SILVA 16S archaea $SORTMERNADIR/rRNA_databases/silva-arc-16s-database-id95.fasta +silva-arc-23s SILVA 16S bacteria $SORTMERNADIR/rRNA_databases/silva-arc-23s-database-id98.fasta +silva-bac-16s SILVA 16S bacteria $SORTMERNADIR/rRNA_databases/silva-bac-16s-database-id85.fasta +silva-bac-23s SILVA 23S bacteria $SORTMERNADIR/rRNA_databases/silva-bac-23s-database-id98.fasta +silva-euk-18s SILVA 18S eukarya $SORTMERNADIR/rRNA_databases/silva-euk-18s-database-id95.fasta +silva-euk-28s SILVA 28S eukarya $SORTMERNADIR/rRNA_databases/silva-euk-28s-database-id98.fasta diff -r 000000000000 -r 2e7f0da431e3 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Apr 30 13:12:35 2013 -0400 @@ -0,0 +1,7 @@ + + + + value, name, path + +
+
diff -r 000000000000 -r 2e7f0da431e3 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Apr 30 13:12:35 2013 -0400 @@ -0,0 +1,42 @@ + + + + + + http://bioinfo.lifl.fr/RNA/sortmerna/code/sortmerna-1.7.tar.gz + ./configure + make + make check + + sortmerna + $INSTALL_DIR/bin + + + buildtrie + $INSTALL_DIR/bin + + + scripts/merge-paired-reads.sh + $INSTALL_DIR/bin + + + rRNA_databases + $INSTALL_DIR/rRNA_databases + + + automata + $INSTALL_DIR/automata + + + $INSTALL_DIR + + + $INSTALL_DIR/bin + + + + +SortMeRNA requires g++ 4.3 or later. Installation may take a moment since ribosomal databases have to be indexed. + + +