comparison netmhc.xml @ 0:bb25a4e5f211 draft default tip

"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/netmhc commit 3bf9a39fe11622806ac6b032ba4fc6139a003580"
author jjohnson
date Tue, 18 Feb 2020 14:48:51 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:bb25a4e5f211
1 <tool id="netmhc" name="netMHC" version="4.0.0">
2 <description>MHC Binding prediction</description>
3 <requirements>
4 <requirement type="package" version="4.0">netMHC</requirement>
5 </requirements>
6 <stdio>
7 <exit_code range="1:" />
8 </stdio>
9 <configfiles>
10 <configfile name="format_out"><![CDATA[
11 import sys
12 import re
13 if len(sys.argv) != 3:
14 print >> sys.stderr, "python script.py netMHC_output_tsv output_file"
15 exit(4);
16 hpat = '^\s*(pos)\s+(HLA)\s+(peptide)\s+(Core)\s+(Offset)\s+(I_pos)\s+(I_len)\s+(D_pos)\s+(D_len)\s+(iCore)\s+(Identity)\s+(1-log50k.aff.)\s+(Affinity.nM.)\s+(%Rank)\s+(BindLevel)\s*$'
17 epat = '^\s*(\d+)\s+(\S+)\s+([A-Z]+)\s+([-_A-Z]*)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([A-Z]+)\s+(\S+)\s+([0-9.]+)\s+([0-9.]+)\s+([0-9.]+).*?([SWB]*)$'
18 cnt = 0
19 try:
20 wh = open(sys.argv[2],'w')
21 fh = open(sys.argv[1],'r')
22 for i,line in enumerate(fh):
23 line = line.rstrip()
24 if not line:
25 continue
26 ## print >> sys.stderr, line
27 m = re.match(epat,line)
28 if m:
29 ## print >> sys.stderr, str(m.groups())
30 wh.write("%s\n" % '\t'.join([x if x else '' for x in m.groups()]))
31 cnt += 1
32 elif cnt == 0:
33 m = re.match(hpat,line)
34 if m:
35 ## print >> sys.stderr, str(m.groups())
36 wh.write("#%s\n" % '\t'.join(m.groups()))
37 cnt += 1
38 wh.close()
39 fh.close()
40 except Exception, e:
41 print sys.stderr, "error: %s" % e
42 exit(3)
43 ]]>
44 </configfile>
45 <configfile name="format_tsv"><![CDATA[
46 #!/usr/bin/env python
47 import sys
48 if len(sys.argv) != 3:
49 print >> sys.stderr, "python script.py netMHC_xls output_file"
50 exit(4);
51 try:
52 wh = open(sys.argv[2],'w')
53 fh = open(sys.argv[1],'r')
54 for n,line in enumerate(fh):
55 if n > 1:
56 wh.write(line)
57 if n == 0:
58 alleles = line.rstrip('\n').split('\t')
59 if n == 1:
60 hdr = line.rstrip('\n').split('\t')
61 wh.write('#%s\n' % '\t'.join([' '.join([alleles[i - i%3],hdr[i]]).strip() for i in range(len(hdr))]))
62 wh.close()
63 fh.close()
64 except Exception, e:
65 print sys.stderr, "error: %s" % e
66 exit(3)
67 ]]>
68 </configfile>
69 </configfiles>
70 <command><![CDATA[
71 ### netMHC -tdir tmp -f OS11Fusion.fa -a 'HLA-A3001,HLA-A0301,HLA-B4201,HLA-B5802,HLA-C0602' -l '8,9,10' -xls -xlsfile OS11Fusion.xls > OS11_netMHC.out
72 #set $valid_alleles = [
73 'BoLA-AW10',
74 'BoLA-D18.4',
75 'BoLA-HD6',
76 'BoLA-JSP.1',
77 'BoLA-T2C',
78 'BoLA-T2a',
79 'BoLA-T2b',
80 'H-2-Db',
81 'H-2-Dd',
82 'H-2-Kb',
83 'H-2-Kd',
84 'H-2-Kk',
85 'H-2-Ld',
86 'HLA-A0101',
87 'HLA-A0201',
88 'HLA-A0202',
89 'HLA-A0203',
90 'HLA-A0205',
91 'HLA-A0206',
92 'HLA-A0207',
93 'HLA-A0211',
94 'HLA-A0212',
95 'HLA-A0216',
96 'HLA-A0217',
97 'HLA-A0219',
98 'HLA-A0250',
99 'HLA-A0301',
100 'HLA-A0302',
101 'HLA-A0319',
102 'HLA-A1101',
103 'HLA-A2301',
104 'HLA-A2402',
105 'HLA-A2403',
106 'HLA-A2501',
107 'HLA-A2601',
108 'HLA-A2602',
109 'HLA-A2603',
110 'HLA-A2902',
111 'HLA-A3001',
112 'HLA-A3002',
113 'HLA-A3101',
114 'HLA-A3201',
115 'HLA-A3207',
116 'HLA-A3215',
117 'HLA-A3301',
118 'HLA-A6601',
119 'HLA-A6801',
120 'HLA-A6802',
121 'HLA-A6823',
122 'HLA-A6901',
123 'HLA-A8001',
124 'HLA-B0702',
125 'HLA-B0801',
126 'HLA-B0802',
127 'HLA-B0803',
128 'HLA-B1401',
129 'HLA-B1402',
130 'HLA-B1501',
131 'HLA-B1502',
132 'HLA-B1503',
133 'HLA-B1509',
134 'HLA-B1517',
135 'HLA-B1801',
136 'HLA-B2705',
137 'HLA-B2720',
138 'HLA-B3501',
139 'HLA-B3503',
140 'HLA-B3701',
141 'HLA-B3801',
142 'HLA-B3901',
143 'HLA-B4001',
144 'HLA-B4002',
145 'HLA-B4013',
146 'HLA-B4201',
147 'HLA-B4402',
148 'HLA-B4403',
149 'HLA-B4501',
150 'HLA-B4506',
151 'HLA-B4601',
152 'HLA-B4801',
153 'HLA-B5101',
154 'HLA-B5301',
155 'HLA-B5401',
156 'HLA-B5701',
157 'HLA-B5703',
158 'HLA-B5801',
159 'HLA-B5802',
160 'HLA-B7301',
161 'HLA-B8101',
162 'HLA-B8301',
163 'HLA-C0303',
164 'HLA-C0401',
165 'HLA-C0501',
166 'HLA-C0602',
167 'HLA-C0701',
168 'HLA-C0702',
169 'HLA-C0802',
170 'HLA-C1203',
171 'HLA-C1402',
172 'HLA-C1502',
173 'HLA-E0101',
174 'HLA-E0103',
175 'Mamu-A01',
176 'Mamu-A02',
177 'Mamu-A07',
178 'Mamu-A11',
179 'Mamu-A20102',
180 'Mamu-A2201',
181 'Mamu-A2601',
182 'Mamu-A70103',
183 'Mamu-B01',
184 'Mamu-B03',
185 'Mamu-B08',
186 'Mamu-B1001',
187 'Mamu-B17',
188 'Mamu-B3901',
189 'Mamu-B52',
190 'Mamu-B6601',
191 'Mamu-B8301',
192 'Mamu-B8701',
193 'Patr-A0101',
194 'Patr-A0301',
195 'Patr-A0401',
196 'Patr-A0701',
197 'Patr-A0901',
198 'Patr-B0101',
199 'Patr-B1301',
200 'Patr-B2401',
201 'SLA-10401',
202 'SLA-10701',
203 'SLA-20401',
204 'SLA-30401',
205 ]
206 #set $allelelist = []
207 #set $unknown_alleles = []
208 #if $alleles.allelesrc == 'history':
209 #for $line in open(str($alleles.allele_file)):
210 #set $fields = $line.strip().split(',')
211 #set $allele = $fields[0].strip()
212 #if $allele in $valid_alleles:
213 $allelelist.append($allele)
214 #else
215 $unknown_alleles.append($allele)
216 #end if
217 #end for
218 #else:
219 #for $word in str($alleles.allele_text).strip().split():
220 #set $fields = $word.strip().split(',')
221 #set $allele = $fields[0].strip()
222 #if $allele in $valid_alleles:
223 $allelelist.append($allele)
224 #else
225 $unknown_alleles.append($allele)
226 #end if
227 #end for
228 #end if
229 #if len($allelelist) < 1
230 echo 'No netMHC alleles';
231 echo "unknown: $unknown_alleles";
232 exit 1;
233 #else
234 echo "netMHC alleles: $allelelist"
235 && echo "unknown alleles: $unknown_alleles"
236 && echo "peptide lengths: $lengths"
237 #set $alist = ','.join($allelelist)
238 && netMHC -tdir tmp -f "$seq_fasta" -a '$alist' -l '$lengths' $sort
239 #if $threshold_sec.rth:
240 -rth $threshold_sec.rth
241 #end if
242 #if $threshold_sec.rlt:
243 -rlt $threshold_sec.rlt
244 #end if
245 -xls -xlsfile results.tsv > results.out
246 && python $format_out results.out $output
247 && python $format_tsv results.tsv $results_tsv
248 #end if
249 ]]></command>
250 <inputs>
251 <param name="seq_fasta" type="data" format="fasta" label="Peptide Sequence Fasta"/>
252 <conditional name="alleles">
253 <param name="allelesrc" type="select" label="Alleles">
254 <option value="history">From history</option>
255 <option value="entry">Entered</option>
256 </param>
257 <when value="history">
258 <param name="allele_file" type="data" format="txt" label="Alleles file"/>
259 <help>The dataset should have on allele per line: HLA-A0201</help>
260 </when>
261 <when value="entry">
262 <param name="allele_text" type="text" label="Alleles">
263 <help>Enter alleles separated by commas: HLA-A0201,HLA-B0702</help>
264 <validator type="regex" message="IDs separted by commas">^(\S+)(,\S+)*$</validator>
265 </param>
266 </when>
267 </conditional>
268 <param name="lengths" type="select" multiple="true" label="peptide lengths for prediction">
269 <help>Used for any alleles which don't include specified lengths</help>
270 <option value="8">8</option>
271 <option value="9">9</option>
272 <option value="10">10</option>
273 <option value="11">11</option>
274 <option value="12">12 (unvalidated)</option>
275 <option value="13">13 (unvalidated)</option>
276 <option value="14">14 (unvalidated)</option>
277 </param>
278 <param name="sort" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Sort output on descending affinity"/>
279 <section name="threshold_sec" expanded="false" title="Adjust Thresholds">
280 <param name="rth" type="float" value="0.500000" optional="true" label="Threshold for high binding peptides (%Rank)"/>
281 <param name="rlt" type="float" value="2.000000" optional="true" label="Threshold for low binding peptides (%Rank)"/>
282 </section>
283 </inputs>
284 <outputs>
285 <data name="output" format="tabular" label="${tool.name} on ${on_string} Binding Scores"/>
286 <data name="results_tsv" format="tabular" label="${tool.name} on ${on_string} Peptide Summary"/>
287 </outputs>
288 <help><![CDATA[
289 **NetMHC**
290
291 http://www.cbs.dtu.dk/services/NetMHC/
292
293 NetMHC 4.0 predicts binding of peptides to a number of different HLA alleles using artificial neural networks (ANNs).
294
295 ANNs have been trained for 78 different Human MHC (HLA) alleles representing all 12 HLA A and B Supertypes as defined by Lund et al. (2004). Furthermore 41 animal (Monkey, Cattle, Pig, and Mouse) allele predictions are available.
296
297 Prediction values are given in nM IC50 values.
298
299 Predictions of lengths 8-14: Predictions can be made for lengths between 8 and 14 for all alleles using an novel approximation algorithm using ANNs trained on 9mer peptides. Probably because of the limited amount of available 10mer data this method has a better predictive value than ANNs trained on 10mer data.
300 Predictions of peptides longer than 11 have not been extensively validated!
301 Caution should be taken for 8mer predictions as some alleles might not bind 8mers to any significant extend.
302
303 Strong and weak binding peptides are indicated in the output. In the selection window for HLA alleles, the recommended allele for each HLA supertype is indicated.
304
305 **Inputs**
306
307 A fasta file of peptide sequences in your history
308
309 A list Alleles entered as text or from a history dataset, one allele per line
310
311 **Outputs**
312
313 **Binding Scores**
314
315 ==== ========= ========== ========= ====== ===== ===== ===== ===== ========== ============= ============= ============ ===== =========
316 #pos HLA peptide Core Offset I_pos I_len D_pos D_len iCore Identity 1-log50k(aff) Affinity(nM) %Rank BindLevel
317 ==== ========= ========== ========= ====== ===== ===== ===== ===== ========== ============= ============= ============ ===== =========
318 16 HLA-A3001 HGRWDTNCA HGRWDTNCA 0 0 0 0 0 HGRWDTNCA SOGA2_CREB3L1 0.487 257.58 0.90 WB
319 1 HLA-A3001 LQNELERLK LQNELERLK 0 0 0 0 0 LQNELERLK SOGA2_CREB3L1 0.242 3647.96 6.00
320 16 HLA-A3001 HGRWDTNCAP HGRWTNCAP 0 0 0 4 1 HGRWDTNCAP SOGA2_CREB3L1 0.185 6739.05 9.50
321 6 HLA-C0602 ERLKEMQSM ERLKEMQSM 0 0 0 0 0 ERLKEMQSM SOGA2_CREB3L1 0.382 798.43 0.40 SB
322 12 HLA-C0602 QSMEHGRWD QSMEHGRWD 0 0 0 0 0 QSMEHGRWD SOGA2_CREB3L1 0.229 4177.34 1.50 WB
323 3 HLA-C0602 NELERLKEM NELERLKEM 0 0 0 0 0 NELERLKEM SOGA2_CREB3L1 0.209 5224.29 1.80 WB
324 20 HLA-A3001 DTNCAPSW DTNCA-PSW 0 5 1 0 0 DTNCAPSW SOGA2_CREB3L1 0.050 29125.62 60.00
325 20 HLA-C0602 DTNCAPSW DT-NCAPSW 0 2 1 0 0 DTNCAPSW SOGA2_CREB3L1 0.005 47120.04 90.00
326 ==== ========= ========== ========= ====== ===== ===== ===== ===== ========== ============= ============= ============ ===== =========
327
328
329
330 **Peptide Summary**
331
332 ==== ========= ============= ============ ============== ============== ============ ============== ============== =========== =========
333 #Pos Peptide ID HLA-A3001 nM HLA-A3001 Rank HLA-A3001 Core HLA-C0602 nM HLA-C0602 Rank HLA-C0602 Core H_Avg_Ranks N_binders
334 ==== ========= ============= ============ ============== ============== ============ ============== ============== =========== =========
335 16 HGRWDTNCA SOGA2_CREB3L1 257.6 0.900 HGRWDTNCA 35765.3 25.000 HGRWDTNCA 4.124 1
336 20 DTNCAPSW SOGA2_CREB3L1 29125.6 60.000 DTNCA_PSW 47120.0 90.000 DT_NCAPSW 6.909 0
337 ==== ========= ============= ============ ============== ============== ============ ============== ============== =========== =========
338
339
340 ]]></help>
341 <citations>
342 <citation type="doi">10.1093/nar/gkn202</citation>
343 <citation type="doi">10.1093/bioinformatics/btn128</citation>
344 <citation type="doi">10.1093/bioinformatics/btn100</citation>
345 <citation type="doi">10.1110/ps.0239403</citation>
346 </citations>
347 </tool>