comparison segemehl.xml @ 4:db367d012fa3 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/segemehl commit b193689f9f30ce65a77be2d2c00929e3335a7d82
author bgruening
date Wed, 26 Jul 2017 15:32:09 -0400
parents 039547ad8fb8
children 9c0d4ec99ba9
comparison
equal deleted inserted replaced
3:039547ad8fb8 4:db367d012fa3
1 <tool id="segemehl" name="segemehl" version="0.2.0"> 1 <tool id="segemehl" name="segemehl" version="0.2.0.3">
2 <description>based short read aligner</description> 2 <description>short read mapping with gaps</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="0.2.0">segemehl</requirement> 4 <requirement type="package" version="0.2.0">segemehl</requirement>
5 </requirements> 5 </requirements>
6 <stdio> 6 <stdio>
7 <regex match="Exit forced" 7 <regex match="Exit forced"
8 source="both" 8 source="both"
9 level="fatal" 9 level="fatal"
10 description="Execution halted." /> 10 description="Execution halted." />
11 </stdio> 11 </stdio>
12 <command> 12 <command>
13 <![CDATA[ 13 <![CDATA[
14 ## prepare segemehl index if no reference genome is supplied 14 ## prepare segemehl index if no reference genome is supplied
15 #if $refGenomeSource.genomeSource == "history": 15 #if $refGenomeSource.genomeSource == "history":
16 mkdir ./temp_index/ && 16 mkdir ./temp_index/ &&
17 #set $temp_index = './temp_index/temp.idx' 17 #set $temp_index = './temp_index/temp.idx'
18 segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome && 18 segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome &&
19 #else: 19 #else:
20 #set $temp_index = $refGenomeSource.index.fields.index_path 20 #set $temp_index = $refGenomeSource.index.fields.index_path
21 #end if 21 #end if
22 22
23
24 ## execute segemehl 23 ## execute segemehl
25 segemehl.x 24 segemehl.x
26 25
27 ## number of threads 26 ## number of threads
28 -t "\${GALAXY_SLOTS:-12}" 27 -t "\${GALAXY_SLOTS:-12}"
29 28
30 #if $refGenomeSource.genomeSource == "history": 29 #if $refGenomeSource.genomeSource == "history":
31 -d $refGenomeSource.own_reference_genome 30 -d $refGenomeSource.own_reference_genome
32 #else: 31 #else:
33 -d ${refGenomeSource.index.fields.db_path} 32 -d ${refGenomeSource.index.fields.db_path}
34 #end if 33 #end if
35 34
36 -i $temp_index 35 -i $temp_index
37 36
38 ## check for single/pair-end 37 ## check for single/pair-end
39 #if str( $library.type ) == "single": 38 #if str( $library.type ) == "single":
40 #set $query_list = list() 39 #set $query_list = list()
41 ## prepare inputs 40 ## prepare inputs
42 #for $fastq in $library.input_query: 41 #for $fastq in $library.input_query:
43 $query_list.append('%s' % $fastq ) 42 $query_list.append('%s' % $fastq )
44 #end for 43 #end for
45 -q "#echo ' '.join( $query_list )#" 44 -q "#echo ' '.join( $query_list )#"
46 #else 45 #else
47 ## prepare inputs 46 ## prepare inputs
48
49 #set $mate1 = list() 47 #set $mate1 = list()
50 #set $mate2 = list() 48 #set $mate2 = list()
51 #for $mate_pair in $library.mate_list: 49 #for $mate_pair in $library.mate_list:
52 $mate1.append( str($mate_pair.first_strand_query) ) 50 $mate1.append( str($mate_pair.first_strand_query) )
53 $mate2.append( str($mate_pair.second_strand_query) ) 51 $mate2.append( str($mate_pair.second_strand_query) )
54 #end for 52 #end for
55 53
56 -q #echo ','.join($mate1) 54 -q #echo ','.join($mate1)
57 -p #echo ','.join($mate2) 55 -p #echo ','.join($mate2)
58 56
59 -I $library.maxinsertsize 57 -I $library.maxinsertsize
60 #end if 58 #end if
61 -m $minsize 59 -m $minsize
62 -A $accuracy 60 -A $accuracy
63 -H $hitstrategy 61 -H $hitstrategy
65 -P "$prime5" 63 -P "$prime5"
66 #end if 64 #end if
67 #if str( $prime3 ).strip(): 65 #if str( $prime3 ).strip():
68 -Q "$prime3" 66 -Q "$prime3"
69 #end if 67 #end if
70 $polyA 68 $polyA
71 $autoclip 69 $autoclip
72 $hardclip 70 $hardclip
73 $order 71 $order
74 $splits
75 #if $maxout: 72 #if $maxout:
76 --maxout $maxout 73 --maxout $maxout
77 #end if 74 #end if
75 #if str( $splitreads.splits ) == "splits":
76 --splits
77 --minsplicecover $splitreads.minsplicecover
78 --minfragscore $splitreads.minfragscore
79 --minfraglen $splitreads.minfraglen
80 --splicescorescale $splitreads.splicescorescale
81 #end if
82 -M $maxinterval
83 -E $evalue
84 -D $differences
78 -s 85 -s
79
80 --minsplicecover $minsplicecover
81 --minfragscore $minfragscore
82 --minfraglen $minfraglen
83 --splicescorescale $splicescorescale
84
85 -o '$segemehl_out' 86 -o '$segemehl_out'
86 ]]> 87 ]]>
87 </command> 88 </command>
88 <inputs> 89 <inputs>
89 <conditional name="refGenomeSource"> 90 <conditional name="refGenomeSource">
90 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> 91 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
91 <option value="indexed">Use a built-in index</option> 92 <option value="indexed">Use a built-in index</option>
92 <option value="history">Use one from the history</option> 93 <option value="history">Use one from the history</option>
93 </param> 94 </param>
94 <when value="indexed"> 95 <when value="indexed">
95 <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin"> 96 <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin">
96 <options from_data_table="segemehl_indexes"> 97 <options from_data_table="segemehl_indexes">
97 <column name="value" index="0"/> 98 <column name="value" index="0"/>
98 <column name="dbkey" index="1"/> 99 <column name="dbkey" index="1"/>
99 <column name="name" index="2"/> 100 <column name="name" index="2"/>
100 <column name="db_path" index="3"/> 101 <column name="db_path" index="3"/>
101 <column name="index_path" index="4"/> 102 <column name="index_path" index="4"/>
102 <filter type="sort_by" column="2"/> 103 <filter type="sort_by" column="2"/>
103 <validator type="no_options" message="No indexes are available for the selected input dataset"/> 104 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
104 </options> 105 </options>
105 </param> 106 </param>
106 </when> <!-- build-in --> 107 </when> <!-- build-in -->
107 <when value="history"> 108 <when value="history">
108 <param name="own_reference_genome" type="data" format="fasta" label="Select the reference genome" /> 109 <param name="own_reference_genome" type="data" format="fasta" label="Select the reference genome" />
109 </when> <!-- history --> 110 </when> <!-- history -->
110 </conditional> <!-- refGenomeSource --> 111 </conditional> <!-- refGenomeSource -->
111 112
112 <conditional name="library"> 113 <conditional name="library">
113 <param name="type" type="select" label="Is this library paired-end?"> 114 <param name="type" type="select" label="Is this library paired-end?">
114 <option value="single">Single-end</option> 115 <option value="single">Single-end</option>
115 <option value="paired">Paired-end</option> 116 <option value="paired">Paired-end</option>
116 </param> 117 </param>
117 <when value="single"> 118 <when value="single">
118 <param name="input_query" type="data" multiple="True" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads in FASTQ/FASTA files" /> 119 <param name="input_query" type="data" multiple="True" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads in FASTQ/FASTA files" />
119 </when> 120 </when>
120 <when value="paired"> 121 <when value="paired">
121 <!-- ToDo paired coolections --> 122 <!-- ToDo paired coolections -->
122 <repeat name="mate_list" title="Paired End Pairs" min="1"> 123 <repeat name="mate_list" title="Paired End Pairs" min="1">
123 <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" /> 124 <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" />
124 <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" /> 125 <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" />
125 </repeat> 126 </repeat>
126 <param name="maxinsertsize" type="integer" value="5000" label="Maximum size of the inserts (paired end)" help="default: 5000 (-I)" /> 127 <param name="maxinsertsize" type="integer" value="5000" label="Maximum size of the inserts (paired end)" help="default: 5000 (-I)" />
127 </when> 128 </when>
128 </conditional> 129 </conditional>
129 130
131 <conditional name="splitreads">
132 <param name="splits" type="select" label="Detect split/spliced reads" help="(--splits)">
133 <option value="nosplit">No splits</option>
134 <option value="splits">Split reads</option>
135 </param>
136 <when value="splits">
130 <param name="minsplicecover" type="integer" value="80" label="Min coverage for spliced transcripts" help="(--minsplicecover)" /> 137 <param name="minsplicecover" type="integer" value="80" label="Min coverage for spliced transcripts" help="(--minsplicecover)" />
131 <param name="minfragscore" type="integer" value="18" label="Min coverage for spliced transcripts" help="(--minfragscore)" /> 138 <param name="minfragscore" type="integer" value="18" label="Min coverage for spliced transcripts" help="(--minfragscore)" />
132 <param name="minfraglen" type="integer" value="20" label="Min length of a spliced fragment" help="(--minfraglen)" /> 139 <param name="minfraglen" type="integer" value="20" label="Min length of a spliced fragment" help="(--minfraglen)" />
133 <param name="splicescorescale" type="float" value="1.0" label="Report spliced alignment with score greater than this scale times the score" 140 <param name="splicescorescale" type="float" value="1.0" label="Report spliced alignment with score greater than this scale times the score"
134 help="Report only if this value x score is larger than next best spliced alignment (--splicescorescale)" /> 141 help="Report only if this value x score is larger than next best spliced alignment (--splicescorescale)" />
135 142 <param name="sevalue" type="float" min="0" value="50.000000" label="max split evalue" help="(--maxsplitevalue)"/>
136 <param name="minsize" type="integer" value="12" min="1" label="Minimum size of queries" help="(-m)" /> 143 </when>
137 144 <when value="nosplit">
138 <param name="maxout" type="integer" min="0" value="0" optional="True" 145 </when>
139 label="Maximum number of alignments that will be reported" help="(--maxout)" /> 146 </conditional>
140 <param name="accuracy" type="integer" value="85" min="1" max="100" label="Min percentage of matches per read in semi-global alignment" help="(-A)" /> 147
141 148 <param name="minsize" type="integer" value="12" min="1" label="Minimum size of queries" help="(-m)" />
142 <param name="hitstrategy" type="select" label="Hits to report?" help="(-H)"> 149 <param name="maxout" type="integer" min="0" value="0" optional="True"
143 <option value="1">report only best scoring hits</option> 150 label="Maximum number of alignments that will be reported" help="(--maxout)" />
144 <option value="0">report all scoring hits</option> 151 <param name="accuracy" type="integer" value="85" min="1" max="100" label="Min percentage of matches per read in semi-global alignment" help="(-A)" />
145 </param> 152 <param name="hitstrategy" type="select" label="Hits to report?" help="(-H)">
146 <param name="prime5" type="text" label="add 5' adapter" help="default: none (-Q)" /> 153 <option value="1">report only best scoring hits</option>
147 <param name="prime3" type="text" label="add 3' adapter" help="default: none (-P)"/> 154 <option value="0">report all scoring hits</option>
148 <param name="polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail" help="(-T)"/> 155 </param>
149 <param name="autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter" help="(-Y)"/> 156 <param name="prime5" type="text" label="add 5' adapter" help="default: none (-Q)" />
150 <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="(-C)"/> 157 <param name="prime3" type="text" label="add 3' adapter" help="default: none (-P)"/>
151 <param name="order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position" help="(-O)"/> 158 <param name="polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail" help="(-T)"/>
152 <param name="splits" type="boolean" truevalue="--splits" falsevalue="" checked="false" label="Detect split/spliced reads" help="(--splits)"/> 159 <param name="autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter" help="(-Y)"/>
153 </inputs> 160 <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="(-C)"/>
154 <outputs> 161 <param name="order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position" help="(-O)"/>
155 <data format="sam" name="segemehl_out" label="Read alignments on ${on_string}"/> 162 <param name="differences" type="integer" min="0" value="1" label="search seeds initially with n differences" help="(--differences)"/>
156 </outputs> 163 <param name="evalue" type="float" min="0" value="5.000000" label="max evalue" help="(--evalue)"/>
157 <tests> 164 <param name="maxinterval" type="integer" min="1" value="100" label="maximum width of a suffix array interval, i.e. a query seed will be omitted if it matches more than n times" help="(--maxinterval)"/>
158 <test> 165 </inputs>
159 <param name="genomeSource" value="history" /> 166 <outputs>
160 <param name="own_reference_genome" value="chr1.fa" /> 167 <data format="sam" name="segemehl_out" label="Read alignments on ${on_string}"/>
161 <param name="library" value="single" /> 168 </outputs>
162 <param name="input_query" value="test.fastq" /> 169 <tests>
163 <param name="splits" value="true" /> 170 <test>
164 <output name="segemehl_out" file="testmap.sam" lines_diff="2" /> 171 <param name="genomeSource" value="history" />
165 </test> 172 <param name="own_reference_genome" value="chr1.fa" />
166 </tests> 173 <param name="library" value="single" />
167 <help> 174 <param name="input_query" value="test.fastq" />
168 <![CDATA[ 175 <param name="splits" value="nosplit" />
176 <output name="segemehl_out" file="testmap.sam" lines_diff="2" />
177 </test>
178 <test>
179 <param name="genomeSource" value="history" />
180 <param name="own_reference_genome" value="chr1.fa" />
181 <param name="library" value="single" />
182 <param name="input_query" value="test.fastq" />
183 <param name="splits" value="splits" />
184 <param name="minsplicecover" value="40" />
185 <output name="segemehl_out" file="testmap2.sam" lines_diff="2" />
186 </test>
187 </tests>
188 <help>
189 <![CDATA[
169 190
170 .. class:: infomark 191 .. class:: infomark
171 192
172 **What it does** 193 **What it does**
173 194
179 segemehl implements a matching strategy based on enhanced suffix arrays (ESA). Segemehl_ allows bisulfite sequencing mapping and split read mapping. 200 segemehl implements a matching strategy based on enhanced suffix arrays (ESA). Segemehl_ allows bisulfite sequencing mapping and split read mapping.
180 201
181 .. _Segemehl: http://www.bioinf.uni-leipzig.de/Software/segemehl/ 202 .. _Segemehl: http://www.bioinf.uni-leipzig.de/Software/segemehl/
182 203
183 204
184 ]]> 205 ]]>
185 </help> 206 </help>
186 <citations> 207 <citations>
187 <citation type="doi">10.1371/journal.pcbi.1000502</citation> 208 <citation type="doi">10.1371/journal.pcbi.1000502</citation>
188 </citations> 209 </citations>
189 </tool> 210 </tool>