Mercurial > repos > jjohnson > netmhc
changeset 0:bb25a4e5f211 draft default tip
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/netmhc commit 3bf9a39fe11622806ac6b032ba4fc6139a003580"
author | jjohnson |
---|---|
date | Tue, 18 Feb 2020 14:48:51 -0500 |
parents | |
children | |
files | README netmhc.xml test-data/test.fsa test-data/test1.fa test-data/test1_allele_scores.tsv test-data/test1_alleles.txt test-data/test1_summary.tsv |
diffstat | 7 files changed, 412 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Tue Feb 18 14:48:51 2020 -0500 @@ -0,0 +1,26 @@ +NetMHC 4.0 server predicts binding of peptides to a number of different HLA alleles using artificial neural networks (ANNs). + +SEE: http://www.cbs.dtu.dk/services/NetMHC/ + +NetMHC is available to academic researchers. +The download webpage requires the user to accept a academic license aggrement, which prevents automatic Galaxy package installation. +http://www.cbs.dtu.dk/cgi-bin/nph-sw_request?netMHC + + +Download NetMHC 4.0 and install as described in: netMHC-4.0.readme +( Be sure to do step 3 which installs the data from: http://www.cbs.dtu.dk/services/NetMHC-4.0/data.tar.gz ) + + +For Galaxy installation : + +Add tool_depedencies/netMHC/4.0/env.sh + +The env.sh must define ENVIROMENT variables: NMHOME and TMPDIR + +For example, if you installed netMHC at: /home/galaxy/src/netMHC-4.0 + +galaxy@galaxy [/home/galaxy] % cat tool_dependencies/netMHC/4.0/env.sh +export NMHOME=/home/galaxy/src/netMHC-4.0 +export TMPDIR=/tmp +export PATH=/home/galaxy/src/netMHC-4.0:$PATH +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/netmhc.xml Tue Feb 18 14:48:51 2020 -0500 @@ -0,0 +1,347 @@ +<tool id="netmhc" name="netMHC" version="4.0.0"> + <description>MHC Binding prediction</description> + <requirements> + <requirement type="package" version="4.0">netMHC</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <configfiles> + <configfile name="format_out"><![CDATA[ +import sys +import re +if len(sys.argv) != 3: + print >> sys.stderr, "python script.py netMHC_output_tsv output_file" + exit(4); +hpat = '^\s*(pos)\s+(HLA)\s+(peptide)\s+(Core)\s+(Offset)\s+(I_pos)\s+(I_len)\s+(D_pos)\s+(D_len)\s+(iCore)\s+(Identity)\s+(1-log50k.aff.)\s+(Affinity.nM.)\s+(%Rank)\s+(BindLevel)\s*$' +epat = '^\s*(\d+)\s+(\S+)\s+([A-Z]+)\s+([-_A-Z]*)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([A-Z]+)\s+(\S+)\s+([0-9.]+)\s+([0-9.]+)\s+([0-9.]+).*?([SWB]*)$' +cnt = 0 +try: + wh = open(sys.argv[2],'w') + fh = open(sys.argv[1],'r') + for i,line in enumerate(fh): + line = line.rstrip() + if not line: + continue + ## print >> sys.stderr, line + m = re.match(epat,line) + if m: + ## print >> sys.stderr, str(m.groups()) + wh.write("%s\n" % '\t'.join([x if x else '' for x in m.groups()])) + cnt += 1 + elif cnt == 0: + m = re.match(hpat,line) + if m: + ## print >> sys.stderr, str(m.groups()) + wh.write("#%s\n" % '\t'.join(m.groups())) + cnt += 1 + wh.close() + fh.close() +except Exception, e: + print sys.stderr, "error: %s" % e + exit(3) +]]> + </configfile> + <configfile name="format_tsv"><![CDATA[ +#!/usr/bin/env python +import sys +if len(sys.argv) != 3: + print >> sys.stderr, "python script.py netMHC_xls output_file" + exit(4); +try: + wh = open(sys.argv[2],'w') + fh = open(sys.argv[1],'r') + for n,line in enumerate(fh): + if n > 1: + wh.write(line) + if n == 0: + alleles = line.rstrip('\n').split('\t') + if n == 1: + hdr = line.rstrip('\n').split('\t') + wh.write('#%s\n' % '\t'.join([' '.join([alleles[i - i%3],hdr[i]]).strip() for i in range(len(hdr))])) + wh.close() + fh.close() +except Exception, e: + print sys.stderr, "error: %s" % e + exit(3) +]]> + </configfile> + </configfiles> + <command><![CDATA[ +### netMHC -tdir tmp -f OS11Fusion.fa -a 'HLA-A3001,HLA-A0301,HLA-B4201,HLA-B5802,HLA-C0602' -l '8,9,10' -xls -xlsfile OS11Fusion.xls > OS11_netMHC.out +#set $valid_alleles = [ +'BoLA-AW10', +'BoLA-D18.4', +'BoLA-HD6', +'BoLA-JSP.1', +'BoLA-T2C', +'BoLA-T2a', +'BoLA-T2b', +'H-2-Db', +'H-2-Dd', +'H-2-Kb', +'H-2-Kd', +'H-2-Kk', +'H-2-Ld', +'HLA-A0101', +'HLA-A0201', +'HLA-A0202', +'HLA-A0203', +'HLA-A0205', +'HLA-A0206', +'HLA-A0207', +'HLA-A0211', +'HLA-A0212', +'HLA-A0216', +'HLA-A0217', +'HLA-A0219', +'HLA-A0250', +'HLA-A0301', +'HLA-A0302', +'HLA-A0319', +'HLA-A1101', +'HLA-A2301', +'HLA-A2402', +'HLA-A2403', +'HLA-A2501', +'HLA-A2601', +'HLA-A2602', +'HLA-A2603', +'HLA-A2902', +'HLA-A3001', +'HLA-A3002', +'HLA-A3101', +'HLA-A3201', +'HLA-A3207', +'HLA-A3215', +'HLA-A3301', +'HLA-A6601', +'HLA-A6801', +'HLA-A6802', +'HLA-A6823', +'HLA-A6901', +'HLA-A8001', +'HLA-B0702', +'HLA-B0801', +'HLA-B0802', +'HLA-B0803', +'HLA-B1401', +'HLA-B1402', +'HLA-B1501', +'HLA-B1502', +'HLA-B1503', +'HLA-B1509', +'HLA-B1517', +'HLA-B1801', +'HLA-B2705', +'HLA-B2720', +'HLA-B3501', +'HLA-B3503', +'HLA-B3701', +'HLA-B3801', +'HLA-B3901', +'HLA-B4001', +'HLA-B4002', +'HLA-B4013', +'HLA-B4201', +'HLA-B4402', +'HLA-B4403', +'HLA-B4501', +'HLA-B4506', +'HLA-B4601', +'HLA-B4801', +'HLA-B5101', +'HLA-B5301', +'HLA-B5401', +'HLA-B5701', +'HLA-B5703', +'HLA-B5801', +'HLA-B5802', +'HLA-B7301', +'HLA-B8101', +'HLA-B8301', +'HLA-C0303', +'HLA-C0401', +'HLA-C0501', +'HLA-C0602', +'HLA-C0701', +'HLA-C0702', +'HLA-C0802', +'HLA-C1203', +'HLA-C1402', +'HLA-C1502', +'HLA-E0101', +'HLA-E0103', +'Mamu-A01', +'Mamu-A02', +'Mamu-A07', +'Mamu-A11', +'Mamu-A20102', +'Mamu-A2201', +'Mamu-A2601', +'Mamu-A70103', +'Mamu-B01', +'Mamu-B03', +'Mamu-B08', +'Mamu-B1001', +'Mamu-B17', +'Mamu-B3901', +'Mamu-B52', +'Mamu-B6601', +'Mamu-B8301', +'Mamu-B8701', +'Patr-A0101', +'Patr-A0301', +'Patr-A0401', +'Patr-A0701', +'Patr-A0901', +'Patr-B0101', +'Patr-B1301', +'Patr-B2401', +'SLA-10401', +'SLA-10701', +'SLA-20401', +'SLA-30401', +] + #set $allelelist = [] + #set $unknown_alleles = [] + #if $alleles.allelesrc == 'history': + #for $line in open(str($alleles.allele_file)): + #set $fields = $line.strip().split(',') + #set $allele = $fields[0].strip() + #if $allele in $valid_alleles: + $allelelist.append($allele) + #else + $unknown_alleles.append($allele) + #end if + #end for + #else: + #for $word in str($alleles.allele_text).strip().split(): + #set $fields = $word.strip().split(',') + #set $allele = $fields[0].strip() + #if $allele in $valid_alleles: + $allelelist.append($allele) + #else + $unknown_alleles.append($allele) + #end if + #end for + #end if + #if len($allelelist) < 1 + echo 'No netMHC alleles'; + echo "unknown: $unknown_alleles"; + exit 1; + #else + echo "netMHC alleles: $allelelist" + && echo "unknown alleles: $unknown_alleles" + && echo "peptide lengths: $lengths" + #set $alist = ','.join($allelelist) + && netMHC -tdir tmp -f "$seq_fasta" -a '$alist' -l '$lengths' $sort + #if $threshold_sec.rth: + -rth $threshold_sec.rth + #end if + #if $threshold_sec.rlt: + -rlt $threshold_sec.rlt + #end if + -xls -xlsfile results.tsv > results.out + && python $format_out results.out $output + && python $format_tsv results.tsv $results_tsv + #end if + ]]></command> + <inputs> + <param name="seq_fasta" type="data" format="fasta" label="Peptide Sequence Fasta"/> + <conditional name="alleles"> + <param name="allelesrc" type="select" label="Alleles"> + <option value="history">From history</option> + <option value="entry">Entered</option> + </param> + <when value="history"> + <param name="allele_file" type="data" format="txt" label="Alleles file"/> + <help>The dataset should have on allele per line: HLA-A0201</help> + </when> + <when value="entry"> + <param name="allele_text" type="text" label="Alleles"> + <help>Enter alleles separated by commas: HLA-A0201,HLA-B0702</help> + <validator type="regex" message="IDs separted by commas">^(\S+)(,\S+)*$</validator> + </param> + </when> + </conditional> + <param name="lengths" type="select" multiple="true" label="peptide lengths for prediction"> + <help>Used for any alleles which don't include specified lengths</help> + <option value="8">8</option> + <option value="9">9</option> + <option value="10">10</option> + <option value="11">11</option> + <option value="12">12 (unvalidated)</option> + <option value="13">13 (unvalidated)</option> + <option value="14">14 (unvalidated)</option> + </param> + <param name="sort" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Sort output on descending affinity"/> + <section name="threshold_sec" expanded="false" title="Adjust Thresholds"> + <param name="rth" type="float" value="0.500000" optional="true" label="Threshold for high binding peptides (%Rank)"/> + <param name="rlt" type="float" value="2.000000" optional="true" label="Threshold for low binding peptides (%Rank)"/> + </section> + </inputs> + <outputs> + <data name="output" format="tabular" label="${tool.name} on ${on_string} Binding Scores"/> + <data name="results_tsv" format="tabular" label="${tool.name} on ${on_string} Peptide Summary"/> + </outputs> + <help><![CDATA[ +**NetMHC** + +http://www.cbs.dtu.dk/services/NetMHC/ + +NetMHC 4.0 predicts binding of peptides to a number of different HLA alleles using artificial neural networks (ANNs). + +ANNs have been trained for 78 different Human MHC (HLA) alleles representing all 12 HLA A and B Supertypes as defined by Lund et al. (2004). Furthermore 41 animal (Monkey, Cattle, Pig, and Mouse) allele predictions are available. + +Prediction values are given in nM IC50 values. + +Predictions of lengths 8-14: Predictions can be made for lengths between 8 and 14 for all alleles using an novel approximation algorithm using ANNs trained on 9mer peptides. Probably because of the limited amount of available 10mer data this method has a better predictive value than ANNs trained on 10mer data. +Predictions of peptides longer than 11 have not been extensively validated! +Caution should be taken for 8mer predictions as some alleles might not bind 8mers to any significant extend. + +Strong and weak binding peptides are indicated in the output. In the selection window for HLA alleles, the recommended allele for each HLA supertype is indicated. + +**Inputs** + + A fasta file of peptide sequences in your history + + A list Alleles entered as text or from a history dataset, one allele per line + +**Outputs** + + **Binding Scores** + + ==== ========= ========== ========= ====== ===== ===== ===== ===== ========== ============= ============= ============ ===== ========= + #pos HLA peptide Core Offset I_pos I_len D_pos D_len iCore Identity 1-log50k(aff) Affinity(nM) %Rank BindLevel + ==== ========= ========== ========= ====== ===== ===== ===== ===== ========== ============= ============= ============ ===== ========= + 16 HLA-A3001 HGRWDTNCA HGRWDTNCA 0 0 0 0 0 HGRWDTNCA SOGA2_CREB3L1 0.487 257.58 0.90 WB + 1 HLA-A3001 LQNELERLK LQNELERLK 0 0 0 0 0 LQNELERLK SOGA2_CREB3L1 0.242 3647.96 6.00 + 16 HLA-A3001 HGRWDTNCAP HGRWTNCAP 0 0 0 4 1 HGRWDTNCAP SOGA2_CREB3L1 0.185 6739.05 9.50 + 6 HLA-C0602 ERLKEMQSM ERLKEMQSM 0 0 0 0 0 ERLKEMQSM SOGA2_CREB3L1 0.382 798.43 0.40 SB + 12 HLA-C0602 QSMEHGRWD QSMEHGRWD 0 0 0 0 0 QSMEHGRWD SOGA2_CREB3L1 0.229 4177.34 1.50 WB + 3 HLA-C0602 NELERLKEM NELERLKEM 0 0 0 0 0 NELERLKEM SOGA2_CREB3L1 0.209 5224.29 1.80 WB + 20 HLA-A3001 DTNCAPSW DTNCA-PSW 0 5 1 0 0 DTNCAPSW SOGA2_CREB3L1 0.050 29125.62 60.00 + 20 HLA-C0602 DTNCAPSW DT-NCAPSW 0 2 1 0 0 DTNCAPSW SOGA2_CREB3L1 0.005 47120.04 90.00 + ==== ========= ========== ========= ====== ===== ===== ===== ===== ========== ============= ============= ============ ===== ========= + + + + **Peptide Summary** + + ==== ========= ============= ============ ============== ============== ============ ============== ============== =========== ========= + #Pos Peptide ID HLA-A3001 nM HLA-A3001 Rank HLA-A3001 Core HLA-C0602 nM HLA-C0602 Rank HLA-C0602 Core H_Avg_Ranks N_binders + ==== ========= ============= ============ ============== ============== ============ ============== ============== =========== ========= + 16 HGRWDTNCA SOGA2_CREB3L1 257.6 0.900 HGRWDTNCA 35765.3 25.000 HGRWDTNCA 4.124 1 + 20 DTNCAPSW SOGA2_CREB3L1 29125.6 60.000 DTNCA_PSW 47120.0 90.000 DT_NCAPSW 6.909 0 + ==== ========= ============= ============ ============== ============== ============ ============== ============== =========== ========= + + + ]]></help> + <citations> + <citation type="doi">10.1093/nar/gkn202</citation> + <citation type="doi">10.1093/bioinformatics/btn128</citation> + <citation type="doi">10.1093/bioinformatics/btn100</citation> + <citation type="doi">10.1110/ps.0239403</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.fsa Tue Feb 18 14:48:51 2020 -0500 @@ -0,0 +1,6 @@ +>143B_BOVIN +TMDKSELVQKAKLAEQAERYDDMAAAMKAVTEQGHELSNEERNLLSVAYKNVVGARRSSW +RVISSIEQKTERNEKKQQMGKEYREKIEAELQDICNDVLQLLDKYLIPNATQPESKVFYL +KMKGDYFRYLSEVASGDNKQTTVSNSQQAYQEAFEISKKEMQPTHPIRLGLALNFSVFYY +EILNSPEKACSLAKTAFDEAIAELDTLNEESYKDSTLIMQLLRDNLTLWTSENQGDEGDA +GEGEN
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1.fa Tue Feb 18 14:48:51 2020 -0500 @@ -0,0 +1,4 @@ +>PPAP2C +SFGMYCMVFLVK +>ADAMTSL1 +SLDMCISGLCQL
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1_allele_scores.tsv Tue Feb 18 14:48:51 2020 -0500 @@ -0,0 +1,9 @@ +#Protein Position Peptide HLA-A02:01 ANN/Mat Direct predicted affinity (Kd, nM)/Matscore HLA-A23:01 ANN/Mat Direct predicted affinity (Kd, nM)/Matscore +ADAMTSL1 0 SLDMCISGL 26 27179 +ADAMTSL1 1 LDMCISGLC 23677 33222 +ADAMTSL1 2 DMCISGLCQ 31630 34451 +ADAMTSL1 3 MCISGLCQL 1823 5781 +PPAP2C 0 SFGMYCMVF 24390 67 +PPAP2C 1 FGMYCMVFL 222 4423 +PPAP2C 2 GMYCMVFLV 4 3256 +PPAP2C 3 MYCMVFLVK 23399 146
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1_alleles.txt Tue Feb 18 14:48:51 2020 -0500 @@ -0,0 +1,3 @@ +HLA-A02:01 +HLA-A23:01 +HLA-C03:01
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1_summary.tsv Tue Feb 18 14:48:51 2020 -0500 @@ -0,0 +1,17 @@ +#pos peptide logscore affinity(nM) Bind Level Protein Name Allele +2 GMYCMVFLV 0.858 4 SB PPAP2C HLA-A02:01 +1 FGMYCMVFL 0.501 222 WB PPAP2C HLA-A02:01 +3 MYCMVFLVK 0.070 23399 PPAP2C HLA-A02:01 +0 SFGMYCMVF 0.066 24390 PPAP2C HLA-A02:01 +0 SLDMCISGL 0.698 26 SB ADAMTSL1 HLA-A02:01 +3 MCISGLCQL 0.306 1823 ADAMTSL1 HLA-A02:01 +1 LDMCISGLC 0.069 23677 ADAMTSL1 HLA-A02:01 +2 DMCISGLCQ 0.042 31630 ADAMTSL1 HLA-A02:01 +0 SFGMYCMVF 0.611 67 WB PPAP2C HLA-A23:01 +3 MYCMVFLVK 0.539 146 WB PPAP2C HLA-A23:01 +2 GMYCMVFLV 0.252 3256 PPAP2C HLA-A23:01 +1 FGMYCMVFL 0.224 4423 PPAP2C HLA-A23:01 +3 MCISGLCQL 0.199 5781 ADAMTSL1 HLA-A23:01 +0 SLDMCISGL 0.056 27179 ADAMTSL1 HLA-A23:01 +1 LDMCISGLC 0.038 33222 ADAMTSL1 HLA-A23:01 +2 DMCISGLCQ 0.034 34451 ADAMTSL1 HLA-A23:01