Mercurial > repos > rnateam > segemehl
changeset 4:db367d012fa3 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/segemehl commit b193689f9f30ce65a77be2d2c00929e3335a7d82
author | bgruening |
---|---|
date | Wed, 26 Jul 2017 15:32:09 -0400 |
parents | 039547ad8fb8 |
children | 9c0d4ec99ba9 |
files | segemehl.xml test-data/testmap.sam test-data/testmap2.sam tool_dependencies.xml |
diffstat | 4 files changed, 154 insertions(+), 132 deletions(-) [+] |
line wrap: on
line diff
--- a/segemehl.xml Fri Dec 16 07:37:24 2016 -0500 +++ b/segemehl.xml Wed Jul 26 15:32:09 2017 -0400 @@ -1,61 +1,59 @@ -<tool id="segemehl" name="segemehl" version="0.2.0"> - <description>based short read aligner</description> - <requirements> - <requirement type="package" version="0.2.0">segemehl</requirement> - </requirements> - <stdio> - <regex match="Exit forced" +<tool id="segemehl" name="segemehl" version="0.2.0.3"> + <description>short read mapping with gaps</description> + <requirements> + <requirement type="package" version="0.2.0">segemehl</requirement> + </requirements> + <stdio> + <regex match="Exit forced" source="both" level="fatal" description="Execution halted." /> - </stdio> - <command> -<![CDATA[ + </stdio> + <command> + <![CDATA[ ## prepare segemehl index if no reference genome is supplied #if $refGenomeSource.genomeSource == "history": mkdir ./temp_index/ && - #set $temp_index = './temp_index/temp.idx' - segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome && + #set $temp_index = './temp_index/temp.idx' + segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome && #else: #set $temp_index = $refGenomeSource.index.fields.index_path #end if - - + ## execute segemehl - segemehl.x - + segemehl.x + ## number of threads - -t "\${GALAXY_SLOTS:-12}" - + -t "\${GALAXY_SLOTS:-12}" + #if $refGenomeSource.genomeSource == "history": - -d $refGenomeSource.own_reference_genome + -d $refGenomeSource.own_reference_genome #else: -d ${refGenomeSource.index.fields.db_path} #end if - + -i $temp_index - + ## check for single/pair-end #if str( $library.type ) == "single": #set $query_list = list() - ## prepare inputs - #for $fastq in $library.input_query: - $query_list.append('%s' % $fastq ) - #end for - -q "#echo ' '.join( $query_list )#" + ## prepare inputs + #for $fastq in $library.input_query: + $query_list.append('%s' % $fastq ) + #end for + -q "#echo ' '.join( $query_list )#" #else - ## prepare inputs - + ## prepare inputs #set $mate1 = list() #set $mate2 = list() #for $mate_pair in $library.mate_list: $mate1.append( str($mate_pair.first_strand_query) ) $mate2.append( str($mate_pair.second_strand_query) ) #end for - + -q #echo ','.join($mate1) -p #echo ','.join($mate2) - + -I $library.maxinsertsize #end if -m $minsize @@ -67,105 +65,128 @@ #if str( $prime3 ).strip(): -Q "$prime3" #end if - $polyA - $autoclip - $hardclip - $order - $splits + $polyA + $autoclip + $hardclip + $order #if $maxout: --maxout $maxout #end if + #if str( $splitreads.splits ) == "splits": + --splits + --minsplicecover $splitreads.minsplicecover + --minfragscore $splitreads.minfragscore + --minfraglen $splitreads.minfraglen + --splicescorescale $splitreads.splicescorescale + #end if + -M $maxinterval + -E $evalue + -D $differences -s - - --minsplicecover $minsplicecover - --minfragscore $minfragscore - --minfraglen $minfraglen - --splicescorescale $splicescorescale - -o '$segemehl_out' -]]> - </command> - <inputs> - <conditional name="refGenomeSource"> - <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> - <option value="indexed">Use a built-in index</option> - <option value="history">Use one from the history</option> - </param> - <when value="indexed"> - <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin"> - <options from_data_table="segemehl_indexes"> - <column name="value" index="0"/> - <column name="dbkey" index="1"/> - <column name="name" index="2"/> - <column name="db_path" index="3"/> - <column name="index_path" index="4"/> - <filter type="sort_by" column="2"/> - <validator type="no_options" message="No indexes are available for the selected input dataset"/> - </options> - </param> - </when> <!-- build-in --> - <when value="history"> - <param name="own_reference_genome" type="data" format="fasta" label="Select the reference genome" /> - </when> <!-- history --> - </conditional> <!-- refGenomeSource --> + ]]> + </command> + <inputs> + <conditional name="refGenomeSource"> + <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin"> + <options from_data_table="segemehl_indexes"> + <column name="value" index="0"/> + <column name="dbkey" index="1"/> + <column name="name" index="2"/> + <column name="db_path" index="3"/> + <column name="index_path" index="4"/> + <filter type="sort_by" column="2"/> + <validator type="no_options" message="No indexes are available for the selected input dataset"/> + </options> + </param> + </when> <!-- build-in --> + <when value="history"> + <param name="own_reference_genome" type="data" format="fasta" label="Select the reference genome" /> + </when> <!-- history --> + </conditional> <!-- refGenomeSource --> - <conditional name="library"> - <param name="type" type="select" label="Is this library paired-end?"> - <option value="single">Single-end</option> - <option value="paired">Paired-end</option> - </param> - <when value="single"> - <param name="input_query" type="data" multiple="True" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads in FASTQ/FASTA files" /> - </when> - <when value="paired"> - <!-- ToDo paired coolections --> - <repeat name="mate_list" title="Paired End Pairs" min="1"> - <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" /> - <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" /> - </repeat> - <param name="maxinsertsize" type="integer" value="5000" label="Maximum size of the inserts (paired end)" help="default: 5000 (-I)" /> - </when> - </conditional> + <conditional name="library"> + <param name="type" type="select" label="Is this library paired-end?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> + <when value="single"> + <param name="input_query" type="data" multiple="True" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads in FASTQ/FASTA files" /> + </when> + <when value="paired"> + <!-- ToDo paired coolections --> + <repeat name="mate_list" title="Paired End Pairs" min="1"> + <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" /> + <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" /> + </repeat> + <param name="maxinsertsize" type="integer" value="5000" label="Maximum size of the inserts (paired end)" help="default: 5000 (-I)" /> + </when> + </conditional> + <conditional name="splitreads"> + <param name="splits" type="select" label="Detect split/spliced reads" help="(--splits)"> + <option value="nosplit">No splits</option> + <option value="splits">Split reads</option> + </param> + <when value="splits"> <param name="minsplicecover" type="integer" value="80" label="Min coverage for spliced transcripts" help="(--minsplicecover)" /> <param name="minfragscore" type="integer" value="18" label="Min coverage for spliced transcripts" help="(--minfragscore)" /> <param name="minfraglen" type="integer" value="20" label="Min length of a spliced fragment" help="(--minfraglen)" /> <param name="splicescorescale" type="float" value="1.0" label="Report spliced alignment with score greater than this scale times the score" - help="Report only if this value x score is larger than next best spliced alignment (--splicescorescale)" /> - - <param name="minsize" type="integer" value="12" min="1" label="Minimum size of queries" help="(-m)" /> - - <param name="maxout" type="integer" min="0" value="0" optional="True" - label="Maximum number of alignments that will be reported" help="(--maxout)" /> - <param name="accuracy" type="integer" value="85" min="1" max="100" label="Min percentage of matches per read in semi-global alignment" help="(-A)" /> - - <param name="hitstrategy" type="select" label="Hits to report?" help="(-H)"> - <option value="1">report only best scoring hits</option> - <option value="0">report all scoring hits</option> - </param> - <param name="prime5" type="text" label="add 5' adapter" help="default: none (-Q)" /> - <param name="prime3" type="text" label="add 3' adapter" help="default: none (-P)"/> - <param name="polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail" help="(-T)"/> - <param name="autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter" help="(-Y)"/> - <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="(-C)"/> - <param name="order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position" help="(-O)"/> - <param name="splits" type="boolean" truevalue="--splits" falsevalue="" checked="false" label="Detect split/spliced reads" help="(--splits)"/> - </inputs> - <outputs> - <data format="sam" name="segemehl_out" label="Read alignments on ${on_string}"/> - </outputs> - <tests> - <test> - <param name="genomeSource" value="history" /> - <param name="own_reference_genome" value="chr1.fa" /> - <param name="library" value="single" /> - <param name="input_query" value="test.fastq" /> - <param name="splits" value="true" /> - <output name="segemehl_out" file="testmap.sam" lines_diff="2" /> - </test> - </tests> - <help> -<![CDATA[ + help="Report only if this value x score is larger than next best spliced alignment (--splicescorescale)" /> + <param name="sevalue" type="float" min="0" value="50.000000" label="max split evalue" help="(--maxsplitevalue)"/> + </when> + <when value="nosplit"> + </when> + </conditional> + + <param name="minsize" type="integer" value="12" min="1" label="Minimum size of queries" help="(-m)" /> + <param name="maxout" type="integer" min="0" value="0" optional="True" + label="Maximum number of alignments that will be reported" help="(--maxout)" /> + <param name="accuracy" type="integer" value="85" min="1" max="100" label="Min percentage of matches per read in semi-global alignment" help="(-A)" /> + <param name="hitstrategy" type="select" label="Hits to report?" help="(-H)"> + <option value="1">report only best scoring hits</option> + <option value="0">report all scoring hits</option> + </param> + <param name="prime5" type="text" label="add 5' adapter" help="default: none (-Q)" /> + <param name="prime3" type="text" label="add 3' adapter" help="default: none (-P)"/> + <param name="polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail" help="(-T)"/> + <param name="autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter" help="(-Y)"/> + <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="(-C)"/> + <param name="order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position" help="(-O)"/> + <param name="differences" type="integer" min="0" value="1" label="search seeds initially with n differences" help="(--differences)"/> + <param name="evalue" type="float" min="0" value="5.000000" label="max evalue" help="(--evalue)"/> + <param name="maxinterval" type="integer" min="1" value="100" label="maximum width of a suffix array interval, i.e. a query seed will be omitted if it matches more than n times" help="(--maxinterval)"/> + </inputs> + <outputs> + <data format="sam" name="segemehl_out" label="Read alignments on ${on_string}"/> + </outputs> + <tests> + <test> + <param name="genomeSource" value="history" /> + <param name="own_reference_genome" value="chr1.fa" /> + <param name="library" value="single" /> + <param name="input_query" value="test.fastq" /> + <param name="splits" value="nosplit" /> + <output name="segemehl_out" file="testmap.sam" lines_diff="2" /> + </test> + <test> + <param name="genomeSource" value="history" /> + <param name="own_reference_genome" value="chr1.fa" /> + <param name="library" value="single" /> + <param name="input_query" value="test.fastq" /> + <param name="splits" value="splits" /> + <param name="minsplicecover" value="40" /> + <output name="segemehl_out" file="testmap2.sam" lines_diff="2" /> + </test> + </tests> + <help> + <![CDATA[ .. class:: infomark @@ -181,9 +202,9 @@ .. _Segemehl: http://www.bioinf.uni-leipzig.de/Software/segemehl/ -]]> - </help> - <citations> - <citation type="doi">10.1371/journal.pcbi.1000502</citation> - </citations> + ]]> + </help> + <citations> + <citation type="doi">10.1371/journal.pcbi.1000502</citation> + </citations> </tool>
--- a/test-data/testmap.sam Fri Dec 16 07:37:24 2016 -0500 +++ b/test-data/testmap.sam Wed Jul 26 15:32:09 2017 -0400 @@ -1,9 +1,7 @@ @HD VN:1.0 @SQ SN:TestChromosomeForGalaxy LN:3459 -@PG ID:segemehl VN:0.2.0-$Rev: 418 $ ($Date: 2015-01-05 05:17:35 -0500 (Mon, 05 Jan 2015) $) CL:segemehl.x -i chr1.idx -d chr1.fa -q test.fastq -S -m 12 -A 85 -H 1 --minsplicecover 80 --minfragscore 18 --minfraglen 20 --splicescorescale 1.0 +@PG ID:segemehl VN:0.2.0-$Rev: 418 $ ($Date: 2015-01-05 05:17:35 -0500 (Mon, 05 Jan 2015) $) CL:segemehl.x -t 2 -d test-data/chr1.fa -i test-data/chr1.idx -q test-data/test.fastq -m 12 -A 85 -H 1 -M 100 -E 5.0 -D 1 -s -o testout.sam 10.516 HWI-EAS100R:1:1:550:1622/1 0 TestChromosomeForGalaxy 182 255 70M * 0 0 CATGTACTGTTAAAGCGTGCGTTTATTTCAAACATTAATGAAATTTGCAGAACCCAAACTAAAGAGAGAG 3MIa!,$)8EA)!1>tMJ{:2WrL`s|`gg{]'0+Op!6RxNw;V)XKV#Go5}b!`_V]A?!F>{LM(z NM:i:0 MD:Z:70 NH:i:1 XI:i:0 XA:Z:Q 10.2869 HWI-EAS100R:1:1:1698:585/1 0 TestChromosomeForGalaxy 661 255 70M * 0 0 AACCATGCATAAAAGGGGTTCGCCGTTCTCGGAGAGCCACAGAGCCCGGGCCACAGGCAGCTCCTTGCCA Q-a;@)*!F]Za^4!P*B?&!!No!^76b+X[6eOgr1$3:-Ywg;!Vzj!`=+e>YV|ok_z!D<2+jx NM:i:0 MD:Z:70 NH:i:1 XI:i:0 XA:Z:Q 10.2085 HWI-EAS100R:1:1:32:109/2 0 TestChromosomeForGalaxy 1021 255 70M * 0 0 GGGAATTCACCTCAAGAACATCCAAAGTGTGAAGGTGAAGTCCCCCGGACCCCACTGCGCCCAAACCGAA V:e@~!I\GQ>>]?)-qpe!nVI4IJ+4!wE{YoSsVrr~P;PnY/.!a;~!S"n+J#St-g!lQdGA9; NM:i:0 MD:Z:70 NH:i:1 XI:i:0 XA:Z:Q -10.2869 HWI-EAS100R:1:1:1698:585/2 0 TestChromosomeForGalaxy 1321 255 43M * 0 0 CGACTGGAGCTGTTGGTCAGAAATACTGGCGTCTGCCCCCTAA btOb!D1"=hSm"'G_#I{b!!l#6JQ&iq4A`F%Uug!x!'h NM:i:0 MD:Z:43 NH:i:1 XI:i:0 XL:i:2 XA:Z:Q XX:i:1 XY:i:43 XQ:i:0 XC:Z:TestChromosomeForGalaxy XV:i:2123 XT:i:32 -10.2869 HWI-EAS100R:1:1:1698:585/2 0 TestChromosomeForGalaxy 2123 255 27M * 0 0 TGGCAAATCCAACTGACCAGAAGGAAG 7o<%qCKQEtM)!bP>!."DvsX9T}= NM:i:0 MD:Z:27 NH:i:1 XI:i:0 XL:i:2 XA:Z:Q XX:i:44 XY:i:70 XQ:i:1 XP:Z:TestChromosomeForGalaxy XU:i:1363 XS:i:64 10.516 HWI-EAS100R:1:1:550:1623/1 0 TestChromosomeForGalaxy 182 255 70M * 0 0 CATGTACTGTTAAAGCGTGCGTTTATTTCAAACATTAATGAAATTTGCAGAACCCAAACTAAAGAGAGAG 3MIa!,$)8EA)!1>tMJ{:2WrL`s|`gg{]'0+Op!6RxNw;V)XKV#Go5}b!`_V]A?!F>{LM(z NM:i:0 MD:Z:70 NH:i:1 XI:i:0 XA:Z:Q
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/testmap2.sam Wed Jul 26 15:32:09 2017 -0400 @@ -0,0 +1,9 @@ +@HD VN:1.0 +@SQ SN:TestChromosomeForGalaxy LN:3459 +@PG ID:segemehl VN:0.2.0-$Rev: 418 $ ($Date: 2015-01-05 05:17:35 -0500 (Mon, 05 Jan 2015) $) CL:segemehl.x -t 2 -d test-data/chr1.fa -i test-data/chr1.idx -q test-data/test.fastq -m 12 -A 85 -H 1 -M 100 -E 5.0 -D 1 -s --splits --minsplicecover 40 -o testout2.sam +10.516 HWI-EAS100R:1:1:550:1622/1 0 TestChromosomeForGalaxy 182 255 70M * 0 0 CATGTACTGTTAAAGCGTGCGTTTATTTCAAACATTAATGAAATTTGCAGAACCCAAACTAAAGAGAGAG 3MIa!,$)8EA)!1>tMJ{:2WrL`s|`gg{]'0+Op!6RxNw;V)XKV#Go5}b!`_V]A?!F>{LM(z NM:i:0 MD:Z:70 NH:i:1 XI:i:0 XA:Z:Q +10.2869 HWI-EAS100R:1:1:1698:585/1 0 TestChromosomeForGalaxy 661 255 70M * 0 0 AACCATGCATAAAAGGGGTTCGCCGTTCTCGGAGAGCCACAGAGCCCGGGCCACAGGCAGCTCCTTGCCA Q-a;@)*!F]Za^4!P*B?&!!No!^76b+X[6eOgr1$3:-Ywg;!Vzj!`=+e>YV|ok_z!D<2+jx NM:i:0 MD:Z:70 NH:i:1 XI:i:0 XA:Z:Q +10.2085 HWI-EAS100R:1:1:32:109/2 0 TestChromosomeForGalaxy 1021 255 70M * 0 0 GGGAATTCACCTCAAGAACATCCAAAGTGTGAAGGTGAAGTCCCCCGGACCCCACTGCGCCCAAACCGAA V:e@~!I\GQ>>]?)-qpe!nVI4IJ+4!wE{YoSsVrr~P;PnY/.!a;~!S"n+J#St-g!lQdGA9; NM:i:0 MD:Z:70 NH:i:1 XI:i:0 XA:Z:Q +10.2869 HWI-EAS100R:1:1:1698:585/2 0 TestChromosomeForGalaxy 1321 255 43M * 0 0 CGACTGGAGCTGTTGGTCAGAAATACTGGCGTCTGCCCCCTAA btOb!D1"=hSm"'G_#I{b!!l#6JQ&iq4A`F%Uug!x!'h NM:i:0 MD:Z:43 NH:i:1 XI:i:0 XL:i:2 XA:Z:Q XX:i:1 XY:i:43 XQ:i:0 XC:Z:TestChromosomeForGalaxy XV:i:2123 XT:i:32 +10.2869 HWI-EAS100R:1:1:1698:585/2 0 TestChromosomeForGalaxy 2123 255 27M * 0 0 TGGCAAATCCAACTGACCAGAAGGAAG 7o<%qCKQEtM)!bP>!."DvsX9T}= NM:i:0 MD:Z:27 NH:i:1 XI:i:0 XL:i:2 XA:Z:Q XX:i:44 XY:i:70 XQ:i:1 XP:Z:TestChromosomeForGalaxy XU:i:1363 XS:i:64 +10.516 HWI-EAS100R:1:1:550:1623/1 0 TestChromosomeForGalaxy 182 255 70M * 0 0 CATGTACTGTTAAAGCGTGCGTTTATTTCAAACATTAATGAAATTTGCAGAACCCAAACTAAAGAGAGAG 3MIa!,$)8EA)!1>tMJ{:2WrL`s|`gg{]'0+Op!6RxNw;V)XKV#Go5}b!`_V]A?!F>{LM(z NM:i:0 MD:Z:70 NH:i:1 XI:i:0 XA:Z:Q
--- a/tool_dependencies.xml Fri Dec 16 07:37:24 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="segemehl" version="0.2.0"> - <repository changeset_revision="11cce9ae6cc6" name="package_segemehl_0_2_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>