changeset 4:db367d012fa3 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/segemehl commit b193689f9f30ce65a77be2d2c00929e3335a7d82
author bgruening
date Wed, 26 Jul 2017 15:32:09 -0400
parents 039547ad8fb8
children 9c0d4ec99ba9
files segemehl.xml test-data/testmap.sam test-data/testmap2.sam tool_dependencies.xml
diffstat 4 files changed, 154 insertions(+), 132 deletions(-) [+]
line wrap: on
line diff
--- a/segemehl.xml	Fri Dec 16 07:37:24 2016 -0500
+++ b/segemehl.xml	Wed Jul 26 15:32:09 2017 -0400
@@ -1,61 +1,59 @@
-<tool id="segemehl" name="segemehl" version="0.2.0">
-    <description>based short read aligner</description>
-    <requirements>
-        <requirement type="package" version="0.2.0">segemehl</requirement>
-    </requirements>
-    <stdio>
-        <regex match="Exit forced"
+<tool id="segemehl" name="segemehl" version="0.2.0.3">
+  <description>short read mapping with gaps</description>
+  <requirements>
+    <requirement type="package" version="0.2.0">segemehl</requirement>
+  </requirements>
+  <stdio>
+    <regex match="Exit forced"
            source="both"
            level="fatal"
            description="Execution halted." />
-    </stdio>
-    <command>
-<![CDATA[
+  </stdio>
+  <command>
+    <![CDATA[
         ## prepare segemehl index if no reference genome is supplied
         #if $refGenomeSource.genomeSource == "history":
             mkdir ./temp_index/ &&
-	    #set $temp_index = './temp_index/temp.idx'
-	    segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome &&
+            #set $temp_index = './temp_index/temp.idx'
+            segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome &&
         #else:
             #set $temp_index = $refGenomeSource.index.fields.index_path
         #end if
-
-
+        
         ## execute segemehl
-        segemehl.x
-
+            segemehl.x
+        
         ## number of threads
-        -t "\${GALAXY_SLOTS:-12}"
-
+            -t "\${GALAXY_SLOTS:-12}"
+        
         #if $refGenomeSource.genomeSource == "history":
-	    -d $refGenomeSource.own_reference_genome
+            -d $refGenomeSource.own_reference_genome
         #else:
             -d ${refGenomeSource.index.fields.db_path}
         #end if
-
+        
         -i $temp_index
-
+        
         ## check for single/pair-end
         #if str( $library.type ) == "single":
             #set $query_list = list()
-            ## prepare inputs
-            #for $fastq in $library.input_query:
-                $query_list.append('%s' % $fastq )
-            #end for
-            -q "#echo ' '.join( $query_list )#"
+        ## prepare inputs
+        #for $fastq in $library.input_query:
+            $query_list.append('%s' % $fastq )
+        #end for
+        -q "#echo ' '.join( $query_list )#"
         #else
-            ## prepare inputs
-
+            ## prepare inputs        
             #set $mate1 = list()
             #set $mate2 = list()
             #for $mate_pair in $library.mate_list:
                 $mate1.append( str($mate_pair.first_strand_query) )
                 $mate2.append( str($mate_pair.second_strand_query) )
             #end for
-
+        
             -q #echo ','.join($mate1)
             -p #echo ','.join($mate2)
-
+        
             -I $library.maxinsertsize
         #end if
         -m $minsize
@@ -67,105 +65,128 @@
         #if str( $prime3 ).strip():
             -Q "$prime3"
         #end if
-        $polyA
-        $autoclip
-        $hardclip
-        $order
-	$splits
+            $polyA
+            $autoclip
+            $hardclip
+            $order
         #if $maxout:
             --maxout $maxout
         #end if
+        #if str( $splitreads.splits ) == "splits":
+            --splits
+            --minsplicecover $splitreads.minsplicecover
+            --minfragscore $splitreads.minfragscore
+            --minfraglen $splitreads.minfraglen
+            --splicescorescale $splitreads.splicescorescale
+        #end if
+        -M $maxinterval
+        -E $evalue
+        -D $differences
         -s
-
-        --minsplicecover $minsplicecover
-        --minfragscore $minfragscore
-        --minfraglen $minfraglen
-        --splicescorescale $splicescorescale
-
         -o '$segemehl_out'
-]]>
-    </command>
-    <inputs>
-        <conditional name="refGenomeSource">
-          <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
-            <option value="indexed">Use a built-in index</option>
-            <option value="history">Use one from the history</option>
-          </param>
-          <when value="indexed">
-            <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin">
-              <options from_data_table="segemehl_indexes">
-                <column name="value" index="0"/>
-                <column name="dbkey" index="1"/>
-                <column name="name" index="2"/>
-                <column name="db_path" index="3"/>
-                <column name="index_path" index="4"/>
-                <filter type="sort_by" column="2"/>
-                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
-              </options>
-            </param>
-          </when>  <!-- build-in -->
-          <when value="history">
-              <param name="own_reference_genome" type="data" format="fasta" label="Select the reference genome" />
-          </when>  <!-- history -->
-        </conditional>  <!-- refGenomeSource -->
+    ]]>
+  </command>
+  <inputs>
+    <conditional name="refGenomeSource">
+      <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
+        <option value="indexed">Use a built-in index</option>
+        <option value="history">Use one from the history</option>
+      </param>
+      <when value="indexed">
+        <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin">
+          <options from_data_table="segemehl_indexes">
+            <column name="value" index="0"/>
+            <column name="dbkey" index="1"/>
+            <column name="name" index="2"/>
+            <column name="db_path" index="3"/>
+            <column name="index_path" index="4"/>
+            <filter type="sort_by" column="2"/>
+            <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+          </options>
+        </param>
+      </when>  <!-- build-in -->
+      <when value="history">
+        <param name="own_reference_genome" type="data" format="fasta" label="Select the reference genome" />
+      </when>  <!-- history -->
+    </conditional>  <!-- refGenomeSource -->
 
-        <conditional name="library">
-            <param name="type" type="select" label="Is this library paired-end?">
-                <option value="single">Single-end</option>
-                <option value="paired">Paired-end</option>
-            </param>
-            <when value="single">
-                <param name="input_query" type="data" multiple="True" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads in FASTQ/FASTA files" />
-            </when>
-            <when value="paired">
-                <!-- ToDo paired coolections -->
-                <repeat name="mate_list" title="Paired End Pairs" min="1">
-                    <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" />
-                    <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" />
-                </repeat>
-                <param name="maxinsertsize" type="integer" value="5000" label="Maximum size of the inserts (paired end)" help="default: 5000 (-I)" />
-            </when>
-        </conditional>
+    <conditional name="library">
+      <param name="type" type="select" label="Is this library paired-end?">
+        <option value="single">Single-end</option>
+        <option value="paired">Paired-end</option>
+      </param>
+      <when value="single">
+        <param name="input_query" type="data" multiple="True" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads in FASTQ/FASTA files" />
+      </when>
+      <when value="paired">
+        <!-- ToDo paired coolections -->
+        <repeat name="mate_list" title="Paired End Pairs" min="1">
+          <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" />
+          <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" />
+        </repeat>
+        <param name="maxinsertsize" type="integer" value="5000" label="Maximum size of the inserts (paired end)" help="default: 5000 (-I)" />
+      </when>
+    </conditional>
 
+    <conditional name="splitreads">
+      <param name="splits" type="select" label="Detect split/spliced reads" help="(--splits)">
+        <option value="nosplit">No splits</option>
+        <option value="splits">Split reads</option>
+      </param>
+      <when value="splits">
         <param name="minsplicecover" type="integer" value="80" label="Min coverage for spliced transcripts" help="(--minsplicecover)" />
         <param name="minfragscore" type="integer" value="18" label="Min coverage for spliced transcripts" help="(--minfragscore)" />
         <param name="minfraglen" type="integer" value="20" label="Min length of a spliced fragment" help="(--minfraglen)" />
         <param name="splicescorescale" type="float" value="1.0" label="Report spliced alignment with score greater than this scale times the score"
-            help="Report only if this value x score is larger than next best spliced alignment (--splicescorescale)" />
-
-        <param name="minsize" type="integer" value="12" min="1" label="Minimum size of queries" help="(-m)" />
-
-        <param name="maxout" type="integer" min="0" value="0" optional="True" 
-            label="Maximum number of alignments that will be reported" help="(--maxout)" />
-        <param name="accuracy" type="integer" value="85" min="1" max="100" label="Min percentage of matches per read in semi-global alignment" help="(-A)" />
-
-        <param name="hitstrategy" type="select" label="Hits to report?" help="(-H)">
-            <option value="1">report only best scoring hits</option>
-            <option value="0">report all scoring hits</option>
-        </param>
-        <param name="prime5" type="text" label="add 5' adapter" help="default: none (-Q)" />
-        <param name="prime3" type="text" label="add 3' adapter" help="default: none (-P)"/>
-        <param name="polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail" help="(-T)"/>
-        <param name="autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter" help="(-Y)"/>
-        <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="(-C)"/>
-        <param name="order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position" help="(-O)"/>
-        <param name="splits" type="boolean" truevalue="--splits" falsevalue="" checked="false" label="Detect split/spliced reads" help="(--splits)"/>
-    </inputs>
-    <outputs>
-        <data format="sam" name="segemehl_out" label="Read alignments on ${on_string}"/>
-    </outputs>
-    <tests>
-      <test>
-	<param name="genomeSource" value="history" />
-        <param name="own_reference_genome" value="chr1.fa" />
-	<param name="library" value="single" />
-	<param name="input_query" value="test.fastq" />	
-	<param name="splits" value="true" />
-        <output name="segemehl_out" file="testmap.sam" lines_diff="2" />
-      </test>
-    </tests>
-    <help>
-<![CDATA[
+               help="Report only if this value x score is larger than next best spliced alignment (--splicescorescale)" />
+        <param name="sevalue" type="float" min="0" value="50.000000" label="max split evalue" help="(--maxsplitevalue)"/>
+      </when>
+      <when value="nosplit">
+      </when>     
+    </conditional>
+    
+    <param name="minsize" type="integer" value="12" min="1" label="Minimum size of queries" help="(-m)" />
+    <param name="maxout" type="integer" min="0" value="0" optional="True" 
+           label="Maximum number of alignments that will be reported" help="(--maxout)" />
+    <param name="accuracy" type="integer" value="85" min="1" max="100" label="Min percentage of matches per read in semi-global alignment" help="(-A)" />
+    <param name="hitstrategy" type="select" label="Hits to report?" help="(-H)">
+      <option value="1">report only best scoring hits</option>
+      <option value="0">report all scoring hits</option>
+    </param>
+    <param name="prime5" type="text" label="add 5' adapter" help="default: none (-Q)" />
+    <param name="prime3" type="text" label="add 3' adapter" help="default: none (-P)"/>
+    <param name="polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail" help="(-T)"/>
+    <param name="autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter" help="(-Y)"/>
+    <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="(-C)"/>
+    <param name="order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position" help="(-O)"/>
+    <param name="differences" type="integer" min="0" value="1" label="search seeds initially with n differences" help="(--differences)"/>
+    <param name="evalue" type="float" min="0" value="5.000000" label="max evalue" help="(--evalue)"/>
+    <param name="maxinterval" type="integer" min="1" value="100" label="maximum width of a suffix array interval, i.e. a query seed will be omitted if it matches more than n times" help="(--maxinterval)"/>
+  </inputs>
+  <outputs>
+    <data format="sam" name="segemehl_out" label="Read alignments on ${on_string}"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="genomeSource" value="history" />
+      <param name="own_reference_genome" value="chr1.fa" />
+      <param name="library" value="single" />
+      <param name="input_query" value="test.fastq" />   
+      <param name="splits" value="nosplit" />
+      <output name="segemehl_out" file="testmap.sam" lines_diff="2" />
+    </test>
+    <test>
+      <param name="genomeSource" value="history" />
+      <param name="own_reference_genome" value="chr1.fa" />
+      <param name="library" value="single" />
+      <param name="input_query" value="test.fastq" />   
+      <param name="splits" value="splits" />
+	  <param name="minsplicecover" value="40" />
+      <output name="segemehl_out" file="testmap2.sam" lines_diff="2" />
+    </test>
+  </tests>
+  <help>
+    <![CDATA[
 
 .. class:: infomark
 
@@ -181,9 +202,9 @@
 .. _Segemehl: http://www.bioinf.uni-leipzig.de/Software/segemehl/
 
 
-]]>
-    </help>
-    <citations>
-        <citation type="doi">10.1371/journal.pcbi.1000502</citation>
-    </citations>
+    ]]>
+  </help>
+  <citations>
+    <citation type="doi">10.1371/journal.pcbi.1000502</citation>
+  </citations>
 </tool>
--- a/test-data/testmap.sam	Fri Dec 16 07:37:24 2016 -0500
+++ b/test-data/testmap.sam	Wed Jul 26 15:32:09 2017 -0400
@@ -1,9 +1,7 @@
 @HD	VN:1.0
 @SQ	SN:TestChromosomeForGalaxy	LN:3459
-@PG	ID:segemehl	VN:0.2.0-$Rev: 418 $ ($Date: 2015-01-05 05:17:35 -0500 (Mon, 05 Jan 2015) $)	CL:segemehl.x -i chr1.idx -d chr1.fa -q test.fastq -S -m 12 -A 85 -H 1 --minsplicecover 80 --minfragscore 18 --minfraglen 20 --splicescorescale 1.0
+@PG	ID:segemehl	VN:0.2.0-$Rev: 418 $ ($Date: 2015-01-05 05:17:35 -0500 (Mon, 05 Jan 2015) $)	CL:segemehl.x -t 2 -d test-data/chr1.fa -i test-data/chr1.idx -q test-data/test.fastq -m 12 -A 85 -H 1 -M 100 -E 5.0 -D 1 -s -o testout.sam
 10.516 HWI-EAS100R:1:1:550:1622/1	0	TestChromosomeForGalaxy	182	255	70M	*	0	0	CATGTACTGTTAAAGCGTGCGTTTATTTCAAACATTAATGAAATTTGCAGAACCCAAACTAAAGAGAGAG	3MIa!,$)8EA)!1>tMJ{:2WrL`s|`gg{]'0+Op!6RxNw;V)XKV#Go5}b!`_V]A?!F>{LM(z	NM:i:0	MD:Z:70	NH:i:1	XI:i:0	XA:Z:Q
 10.2869 HWI-EAS100R:1:1:1698:585/1	0	TestChromosomeForGalaxy	661	255	70M	*	0	0	AACCATGCATAAAAGGGGTTCGCCGTTCTCGGAGAGCCACAGAGCCCGGGCCACAGGCAGCTCCTTGCCA	Q-a;@)*!F]Za^4!P*B?&!!No!^76b+X[6eOgr1$3:-Ywg;!Vzj!`=+e>YV|ok_z!D<2+jx	NM:i:0	MD:Z:70	NH:i:1	XI:i:0	XA:Z:Q
 10.2085 HWI-EAS100R:1:1:32:109/2	0	TestChromosomeForGalaxy	1021	255	70M	*	0	0	GGGAATTCACCTCAAGAACATCCAAAGTGTGAAGGTGAAGTCCCCCGGACCCCACTGCGCCCAAACCGAA	V:e@~!I\GQ>>]?)-qpe!nVI4IJ+4!wE{YoSsVrr~P;PnY/.!a;~!S"n+J#St-g!lQdGA9;	NM:i:0	MD:Z:70	NH:i:1	XI:i:0	XA:Z:Q
-10.2869 HWI-EAS100R:1:1:1698:585/2	0	TestChromosomeForGalaxy	1321	255	43M	*	0	0	CGACTGGAGCTGTTGGTCAGAAATACTGGCGTCTGCCCCCTAA	btOb!D1"=hSm"'G_#I{b!!l#6JQ&iq4A`F%Uug!x!'h	NM:i:0	MD:Z:43	NH:i:1	XI:i:0	XL:i:2	XA:Z:Q	XX:i:1	XY:i:43	XQ:i:0	XC:Z:TestChromosomeForGalaxy	XV:i:2123	XT:i:32
-10.2869 HWI-EAS100R:1:1:1698:585/2	0	TestChromosomeForGalaxy	2123	255	27M	*	0	0	TGGCAAATCCAACTGACCAGAAGGAAG	7o<%qCKQEtM)!bP>!."DvsX9T}=	NM:i:0	MD:Z:27	NH:i:1	XI:i:0	XL:i:2	XA:Z:Q	XX:i:44	XY:i:70	XQ:i:1	XP:Z:TestChromosomeForGalaxy	XU:i:1363	XS:i:64
 10.516 HWI-EAS100R:1:1:550:1623/1	0	TestChromosomeForGalaxy	182	255	70M	*	0	0	CATGTACTGTTAAAGCGTGCGTTTATTTCAAACATTAATGAAATTTGCAGAACCCAAACTAAAGAGAGAG	3MIa!,$)8EA)!1>tMJ{:2WrL`s|`gg{]'0+Op!6RxNw;V)XKV#Go5}b!`_V]A?!F>{LM(z	NM:i:0	MD:Z:70	NH:i:1	XI:i:0	XA:Z:Q
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/testmap2.sam	Wed Jul 26 15:32:09 2017 -0400
@@ -0,0 +1,9 @@
+@HD	VN:1.0
+@SQ	SN:TestChromosomeForGalaxy	LN:3459
+@PG	ID:segemehl	VN:0.2.0-$Rev: 418 $ ($Date: 2015-01-05 05:17:35 -0500 (Mon, 05 Jan 2015) $)	CL:segemehl.x -t 2 -d test-data/chr1.fa -i test-data/chr1.idx -q test-data/test.fastq -m 12 -A 85 -H 1 -M 100 -E 5.0 -D 1 -s --splits --minsplicecover 40 -o testout2.sam
+10.516 HWI-EAS100R:1:1:550:1622/1	0	TestChromosomeForGalaxy	182	255	70M	*	0	0	CATGTACTGTTAAAGCGTGCGTTTATTTCAAACATTAATGAAATTTGCAGAACCCAAACTAAAGAGAGAG	3MIa!,$)8EA)!1>tMJ{:2WrL`s|`gg{]'0+Op!6RxNw;V)XKV#Go5}b!`_V]A?!F>{LM(z	NM:i:0	MD:Z:70	NH:i:1	XI:i:0	XA:Z:Q
+10.2869 HWI-EAS100R:1:1:1698:585/1	0	TestChromosomeForGalaxy	661	255	70M	*	0	0	AACCATGCATAAAAGGGGTTCGCCGTTCTCGGAGAGCCACAGAGCCCGGGCCACAGGCAGCTCCTTGCCA	Q-a;@)*!F]Za^4!P*B?&!!No!^76b+X[6eOgr1$3:-Ywg;!Vzj!`=+e>YV|ok_z!D<2+jx	NM:i:0	MD:Z:70	NH:i:1	XI:i:0	XA:Z:Q
+10.2085 HWI-EAS100R:1:1:32:109/2	0	TestChromosomeForGalaxy	1021	255	70M	*	0	0	GGGAATTCACCTCAAGAACATCCAAAGTGTGAAGGTGAAGTCCCCCGGACCCCACTGCGCCCAAACCGAA	V:e@~!I\GQ>>]?)-qpe!nVI4IJ+4!wE{YoSsVrr~P;PnY/.!a;~!S"n+J#St-g!lQdGA9;	NM:i:0	MD:Z:70	NH:i:1	XI:i:0	XA:Z:Q
+10.2869 HWI-EAS100R:1:1:1698:585/2	0	TestChromosomeForGalaxy	1321	255	43M	*	0	0	CGACTGGAGCTGTTGGTCAGAAATACTGGCGTCTGCCCCCTAA	btOb!D1"=hSm"'G_#I{b!!l#6JQ&iq4A`F%Uug!x!'h	NM:i:0	MD:Z:43	NH:i:1	XI:i:0	XL:i:2	XA:Z:Q	XX:i:1	XY:i:43	XQ:i:0	XC:Z:TestChromosomeForGalaxy	XV:i:2123	XT:i:32
+10.2869 HWI-EAS100R:1:1:1698:585/2	0	TestChromosomeForGalaxy	2123	255	27M	*	0	0	TGGCAAATCCAACTGACCAGAAGGAAG	7o<%qCKQEtM)!bP>!."DvsX9T}=	NM:i:0	MD:Z:27	NH:i:1	XI:i:0	XL:i:2	XA:Z:Q	XX:i:44	XY:i:70	XQ:i:1	XP:Z:TestChromosomeForGalaxy	XU:i:1363	XS:i:64
+10.516 HWI-EAS100R:1:1:550:1623/1	0	TestChromosomeForGalaxy	182	255	70M	*	0	0	CATGTACTGTTAAAGCGTGCGTTTATTTCAAACATTAATGAAATTTGCAGAACCCAAACTAAAGAGAGAG	3MIa!,$)8EA)!1>tMJ{:2WrL`s|`gg{]'0+Op!6RxNw;V)XKV#Go5}b!`_V]A?!F>{LM(z	NM:i:0	MD:Z:70	NH:i:1	XI:i:0	XA:Z:Q
--- a/tool_dependencies.xml	Fri Dec 16 07:37:24 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="segemehl" version="0.2.0">
-        <repository changeset_revision="11cce9ae6cc6" name="package_segemehl_0_2_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>