changeset 0:bb25a4e5f211 draft default tip

"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/netmhc commit 3bf9a39fe11622806ac6b032ba4fc6139a003580"
author jjohnson
date Tue, 18 Feb 2020 14:48:51 -0500
parents
children
files README netmhc.xml test-data/test.fsa test-data/test1.fa test-data/test1_allele_scores.tsv test-data/test1_alleles.txt test-data/test1_summary.tsv
diffstat 7 files changed, 412 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README	Tue Feb 18 14:48:51 2020 -0500
@@ -0,0 +1,26 @@
+NetMHC 4.0 server predicts binding of peptides to a number of different HLA alleles using artificial neural networks (ANNs). 
+
+SEE:  http://www.cbs.dtu.dk/services/NetMHC/
+
+NetMHC is available to academic researchers.  
+The download webpage requires the user to accept a academic license aggrement, which prevents automatic Galaxy package installation.
+http://www.cbs.dtu.dk/cgi-bin/nph-sw_request?netMHC
+
+
+Download NetMHC 4.0 and install as described in:  netMHC-4.0.readme
+( Be sure to do step 3 which installs the data from: http://www.cbs.dtu.dk/services/NetMHC-4.0/data.tar.gz )
+
+
+For Galaxy installation  :  
+
+Add tool_depedencies/netMHC/4.0/env.sh
+
+The env.sh must define ENVIROMENT variables:  NMHOME and TMPDIR
+
+For example, if you installed netMHC at: /home/galaxy/src/netMHC-4.0
+
+galaxy@galaxy [/home/galaxy] % cat tool_dependencies/netMHC/4.0/env.sh 
+export NMHOME=/home/galaxy/src/netMHC-4.0
+export TMPDIR=/tmp
+export PATH=/home/galaxy/src/netMHC-4.0:$PATH
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/netmhc.xml	Tue Feb 18 14:48:51 2020 -0500
@@ -0,0 +1,347 @@
+<tool id="netmhc" name="netMHC" version="4.0.0">
+    <description>MHC Binding prediction</description>
+    <requirements>
+        <requirement type="package" version="4.0">netMHC</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <configfiles>
+        <configfile name="format_out"><![CDATA[
+import sys
+import re
+if len(sys.argv) != 3:
+  print >> sys.stderr, "python script.py  netMHC_output_tsv output_file"
+  exit(4);
+hpat = '^\s*(pos)\s+(HLA)\s+(peptide)\s+(Core)\s+(Offset)\s+(I_pos)\s+(I_len)\s+(D_pos)\s+(D_len)\s+(iCore)\s+(Identity)\s+(1-log50k.aff.)\s+(Affinity.nM.)\s+(%Rank)\s+(BindLevel)\s*$'
+epat = '^\s*(\d+)\s+(\S+)\s+([A-Z]+)\s+([-_A-Z]*)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([A-Z]+)\s+(\S+)\s+([0-9.]+)\s+([0-9.]+)\s+([0-9.]+).*?([SWB]*)$'
+cnt = 0
+try:
+  wh = open(sys.argv[2],'w')
+  fh = open(sys.argv[1],'r')
+  for i,line in enumerate(fh):
+    line = line.rstrip()
+    if not line:
+      continue
+    ## print >> sys.stderr, line
+    m = re.match(epat,line)
+    if m:
+      ## print >> sys.stderr, str(m.groups())
+      wh.write("%s\n" % '\t'.join([x if x else '' for x in m.groups()]))
+      cnt += 1
+    elif cnt == 0:
+      m = re.match(hpat,line)
+      if m:
+         ## print >> sys.stderr, str(m.groups())
+         wh.write("#%s\n" % '\t'.join(m.groups()))
+         cnt += 1
+  wh.close()
+  fh.close()
+except Exception, e:
+  print sys.stderr, "error: %s" % e
+  exit(3)
+]]>
+        </configfile>
+        <configfile name="format_tsv"><![CDATA[
+#!/usr/bin/env python
+import sys
+if len(sys.argv) != 3:
+  print >> sys.stderr, "python script.py netMHC_xls output_file"
+  exit(4); 
+try:
+  wh = open(sys.argv[2],'w')
+  fh = open(sys.argv[1],'r')
+  for n,line in enumerate(fh):
+    if n > 1:
+      wh.write(line)
+    if n == 0:
+      alleles = line.rstrip('\n').split('\t')
+    if n == 1:
+      hdr = line.rstrip('\n').split('\t')
+      wh.write('#%s\n' % '\t'.join([' '.join([alleles[i - i%3],hdr[i]]).strip() for i in range(len(hdr))]))
+  wh.close()
+  fh.close()
+except Exception, e:
+  print sys.stderr, "error: %s" % e
+  exit(3)
+]]>
+        </configfile>
+    </configfiles>
+    <command><![CDATA[
+### netMHC -tdir tmp -f OS11Fusion.fa -a 'HLA-A3001,HLA-A0301,HLA-B4201,HLA-B5802,HLA-C0602' -l '8,9,10' -xls -xlsfile OS11Fusion.xls > OS11_netMHC.out
+#set $valid_alleles = [
+'BoLA-AW10',
+'BoLA-D18.4',
+'BoLA-HD6',
+'BoLA-JSP.1',
+'BoLA-T2C',
+'BoLA-T2a',
+'BoLA-T2b',
+'H-2-Db',
+'H-2-Dd',
+'H-2-Kb',
+'H-2-Kd',
+'H-2-Kk',
+'H-2-Ld',
+'HLA-A0101',
+'HLA-A0201',
+'HLA-A0202',
+'HLA-A0203',
+'HLA-A0205',
+'HLA-A0206',
+'HLA-A0207',
+'HLA-A0211',
+'HLA-A0212',
+'HLA-A0216',
+'HLA-A0217',
+'HLA-A0219',
+'HLA-A0250',
+'HLA-A0301',
+'HLA-A0302',
+'HLA-A0319',
+'HLA-A1101',
+'HLA-A2301',
+'HLA-A2402',
+'HLA-A2403',
+'HLA-A2501',
+'HLA-A2601',
+'HLA-A2602',
+'HLA-A2603',
+'HLA-A2902',
+'HLA-A3001',
+'HLA-A3002',
+'HLA-A3101',
+'HLA-A3201',
+'HLA-A3207',
+'HLA-A3215',
+'HLA-A3301',
+'HLA-A6601',
+'HLA-A6801',
+'HLA-A6802',
+'HLA-A6823',
+'HLA-A6901',
+'HLA-A8001',
+'HLA-B0702',
+'HLA-B0801',
+'HLA-B0802',
+'HLA-B0803',
+'HLA-B1401',
+'HLA-B1402',
+'HLA-B1501',
+'HLA-B1502',
+'HLA-B1503',
+'HLA-B1509',
+'HLA-B1517',
+'HLA-B1801',
+'HLA-B2705',
+'HLA-B2720',
+'HLA-B3501',
+'HLA-B3503',
+'HLA-B3701',
+'HLA-B3801',
+'HLA-B3901',
+'HLA-B4001',
+'HLA-B4002',
+'HLA-B4013',
+'HLA-B4201',
+'HLA-B4402',
+'HLA-B4403',
+'HLA-B4501',
+'HLA-B4506',
+'HLA-B4601',
+'HLA-B4801',
+'HLA-B5101',
+'HLA-B5301',
+'HLA-B5401',
+'HLA-B5701',
+'HLA-B5703',
+'HLA-B5801',
+'HLA-B5802',
+'HLA-B7301',
+'HLA-B8101',
+'HLA-B8301',
+'HLA-C0303',
+'HLA-C0401',
+'HLA-C0501',
+'HLA-C0602',
+'HLA-C0701',
+'HLA-C0702',
+'HLA-C0802',
+'HLA-C1203',
+'HLA-C1402',
+'HLA-C1502',
+'HLA-E0101',
+'HLA-E0103',
+'Mamu-A01',
+'Mamu-A02',
+'Mamu-A07',
+'Mamu-A11',
+'Mamu-A20102',
+'Mamu-A2201',
+'Mamu-A2601',
+'Mamu-A70103',
+'Mamu-B01',
+'Mamu-B03',
+'Mamu-B08',
+'Mamu-B1001',
+'Mamu-B17',
+'Mamu-B3901',
+'Mamu-B52',
+'Mamu-B6601',
+'Mamu-B8301',
+'Mamu-B8701',
+'Patr-A0101',
+'Patr-A0301',
+'Patr-A0401',
+'Patr-A0701',
+'Patr-A0901',
+'Patr-B0101',
+'Patr-B1301',
+'Patr-B2401',
+'SLA-10401',
+'SLA-10701',
+'SLA-20401',
+'SLA-30401',
+]
+        #set $allelelist = []
+        #set $unknown_alleles = []
+        #if $alleles.allelesrc == 'history':
+          #for $line in open(str($alleles.allele_file)):
+            #set $fields = $line.strip().split(',') 
+            #set $allele = $fields[0].strip()
+            #if $allele in $valid_alleles:
+              $allelelist.append($allele)
+            #else
+              $unknown_alleles.append($allele)
+            #end if
+          #end for
+        #else:
+          #for $word in str($alleles.allele_text).strip().split():
+            #set $fields = $word.strip().split(',') 
+            #set $allele = $fields[0].strip()
+            #if $allele in $valid_alleles:
+              $allelelist.append($allele)
+            #else
+              $unknown_alleles.append($allele)
+            #end if
+          #end for
+        #end if
+        #if len($allelelist) < 1
+            echo 'No netMHC alleles'; 
+            echo "unknown: $unknown_alleles";  
+            exit 1;
+        #else
+            echo "netMHC alleles: $allelelist"  
+            && echo "unknown alleles: $unknown_alleles"  
+            && echo "peptide lengths: $lengths"  
+            #set $alist = ','.join($allelelist)
+            && netMHC -tdir tmp -f "$seq_fasta" -a '$alist' -l '$lengths' $sort 
+            #if $threshold_sec.rth:
+              -rth $threshold_sec.rth
+            #end if
+            #if $threshold_sec.rlt:
+              -rlt $threshold_sec.rlt
+            #end if
+            -xls -xlsfile results.tsv > results.out
+            && python $format_out results.out  $output
+            && python $format_tsv results.tsv $results_tsv
+        #end if
+    ]]></command>
+    <inputs>
+        <param name="seq_fasta" type="data" format="fasta" label="Peptide Sequence Fasta"/>
+        <conditional name="alleles">
+           <param name="allelesrc" type="select" label="Alleles">
+               <option value="history">From history</option>
+               <option value="entry">Entered</option>
+           </param>
+           <when value="history">
+               <param name="allele_file" type="data" format="txt" label="Alleles file"/>
+               <help>The dataset should have on allele per line: HLA-A0201</help>
+           </when>
+           <when value="entry">
+               <param name="allele_text" type="text" label="Alleles">
+                   <help>Enter alleles separated by commas: HLA-A0201,HLA-B0702</help>
+                   <validator type="regex" message="IDs separted by commas">^(\S+)(,\S+)*$</validator>
+               </param>
+           </when>
+        </conditional>
+        <param name="lengths" type="select" multiple="true" label="peptide lengths for prediction">
+            <help>Used for any alleles which don't include specified lengths</help>
+            <option value="8">8</option>
+            <option value="9">9</option>
+            <option value="10">10</option>
+            <option value="11">11</option>
+            <option value="12">12 (unvalidated)</option>
+            <option value="13">13 (unvalidated)</option>
+            <option value="14">14 (unvalidated)</option>
+        </param>
+        <param name="sort" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Sort output on descending affinity"/>
+        <section name="threshold_sec" expanded="false" title="Adjust Thresholds">
+            <param name="rth" type="float" value="0.500000" optional="true" label="Threshold for high binding peptides (%Rank)"/>
+            <param name="rlt" type="float" value="2.000000" optional="true" label="Threshold for low binding peptides (%Rank)"/>
+        </section>
+    </inputs>
+    <outputs>
+       <data name="output" format="tabular" label="${tool.name} on ${on_string} Binding Scores"/>
+       <data name="results_tsv" format="tabular" label="${tool.name} on ${on_string} Peptide Summary"/>
+    </outputs>
+    <help><![CDATA[
+**NetMHC**
+
+http://www.cbs.dtu.dk/services/NetMHC/
+
+NetMHC 4.0 predicts binding of peptides to a number of different HLA alleles using artificial neural networks (ANNs). 
+
+ANNs have been trained for 78 different Human MHC (HLA) alleles representing all 12 HLA A and B Supertypes as defined by Lund et al. (2004). Furthermore 41 animal (Monkey, Cattle, Pig, and Mouse) allele predictions are available.
+
+Prediction values are given in nM IC50 values.
+
+Predictions of lengths 8-14:       Predictions can be made for lengths between 8 and 14 for all alleles using an novel approximation algorithm using ANNs trained on 9mer peptides. Probably because of the limited amount of available 10mer data this method has a better predictive value than ANNs trained on 10mer data.
+Predictions of peptides longer than 11 have not been extensively validated!
+Caution should be taken for 8mer predictions as some alleles might not bind 8mers to any significant extend.
+
+Strong and weak binding peptides are indicated in the output. In the selection window for HLA alleles, the recommended allele for each HLA supertype is indicated. 
+
+**Inputs**
+
+  A fasta file of peptide sequences in your history
+  
+  A list Alleles entered as text or from a history dataset, one allele per line 
+
+**Outputs**
+
+  **Binding Scores**
+
+  ====   =========   ==========    ========= ======  =====  =====  =====  =====  ==========   =============  =============  ============  =====  =========
+  #pos   HLA         peptide       Core      Offset  I_pos  I_len  D_pos  D_len  iCore        Identity       1-log50k(aff)  Affinity(nM)  %Rank  BindLevel
+  ====   =========   ==========    ========= ======  =====  =====  =====  =====  ==========   =============  =============  ============  =====  =========
+   16    HLA-A3001   HGRWDTNCA     HGRWDTNCA      0      0      0      0      0  HGRWDTNCA    SOGA2_CREB3L1          0.487       257.58    0.90  WB
+    1    HLA-A3001   LQNELERLK     LQNELERLK      0      0      0      0      0  LQNELERLK    SOGA2_CREB3L1          0.242      3647.96    6.00
+   16    HLA-A3001   HGRWDTNCAP    HGRWTNCAP      0      0      0      4      1  HGRWDTNCAP   SOGA2_CREB3L1          0.185      6739.05    9.50
+    6    HLA-C0602   ERLKEMQSM     ERLKEMQSM      0      0      0      0      0  ERLKEMQSM    SOGA2_CREB3L1          0.382       798.43    0.40  SB
+   12    HLA-C0602   QSMEHGRWD     QSMEHGRWD      0      0      0      0      0  QSMEHGRWD    SOGA2_CREB3L1          0.229      4177.34    1.50  WB
+    3    HLA-C0602   NELERLKEM     NELERLKEM      0      0      0      0      0  NELERLKEM    SOGA2_CREB3L1          0.209      5224.29    1.80  WB
+   20    HLA-A3001   DTNCAPSW      DTNCA-PSW      0      5      1      0      0  DTNCAPSW     SOGA2_CREB3L1          0.050     29125.62   60.00
+   20    HLA-C0602   DTNCAPSW      DT-NCAPSW      0      2      1      0      0  DTNCAPSW     SOGA2_CREB3L1          0.005     47120.04   90.00
+  ====   =========   ==========    ========= ======  =====  =====  =====  =====  ==========   =============  =============  ============  =====  =========
+
+
+
+  **Peptide Summary**
+
+  ====  =========  =============  ============  ==============  ==============  ============  ==============  ==============  ===========  =========
+  #Pos  Peptide    ID             HLA-A3001 nM  HLA-A3001 Rank  HLA-A3001 Core  HLA-C0602 nM  HLA-C0602 Rank  HLA-C0602 Core  H_Avg_Ranks  N_binders
+  ====  =========  =============  ============  ==============  ==============  ============  ==============  ==============  ===========  =========
+  16    HGRWDTNCA  SOGA2_CREB3L1         257.6           0.900  HGRWDTNCA            35765.3          25.000  HGRWDTNCA             4.124          1
+  20    DTNCAPSW   SOGA2_CREB3L1       29125.6          60.000  DTNCA_PSW            47120.0          90.000  DT_NCAPSW             6.909          0
+  ====  =========  =============  ============  ==============  ==============  ============  ==============  ==============  ===========  =========
+
+
+    ]]></help>
+    <citations>
+       <citation type="doi">10.1093/nar/gkn202</citation>
+       <citation type="doi">10.1093/bioinformatics/btn128</citation>
+       <citation type="doi">10.1093/bioinformatics/btn100</citation>
+       <citation type="doi">10.1110/ps.0239403</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.fsa	Tue Feb 18 14:48:51 2020 -0500
@@ -0,0 +1,6 @@
+>143B_BOVIN
+TMDKSELVQKAKLAEQAERYDDMAAAMKAVTEQGHELSNEERNLLSVAYKNVVGARRSSW
+RVISSIEQKTERNEKKQQMGKEYREKIEAELQDICNDVLQLLDKYLIPNATQPESKVFYL
+KMKGDYFRYLSEVASGDNKQTTVSNSQQAYQEAFEISKKEMQPTHPIRLGLALNFSVFYY
+EILNSPEKACSLAKTAFDEAIAELDTLNEESYKDSTLIMQLLRDNLTLWTSENQGDEGDA
+GEGEN
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1.fa	Tue Feb 18 14:48:51 2020 -0500
@@ -0,0 +1,4 @@
+>PPAP2C
+SFGMYCMVFLVK
+>ADAMTSL1
+SLDMCISGLCQL
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1_allele_scores.tsv	Tue Feb 18 14:48:51 2020 -0500
@@ -0,0 +1,9 @@
+#Protein	Position	Peptide	HLA-A02:01 ANN/Mat Direct predicted affinity (Kd, nM)/Matscore	HLA-A23:01 ANN/Mat Direct predicted affinity (Kd, nM)/Matscore
+ADAMTSL1	0	SLDMCISGL	26	27179
+ADAMTSL1	1	LDMCISGLC	23677	33222
+ADAMTSL1	2	DMCISGLCQ	31630	34451
+ADAMTSL1	3	MCISGLCQL	1823	5781
+PPAP2C	0	SFGMYCMVF	24390	67
+PPAP2C	1	FGMYCMVFL	222	4423
+PPAP2C	2	GMYCMVFLV	4	3256
+PPAP2C	3	MYCMVFLVK	23399	146
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1_alleles.txt	Tue Feb 18 14:48:51 2020 -0500
@@ -0,0 +1,3 @@
+HLA-A02:01
+HLA-A23:01
+HLA-C03:01
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1_summary.tsv	Tue Feb 18 14:48:51 2020 -0500
@@ -0,0 +1,17 @@
+#pos	peptide	logscore	affinity(nM)	Bind Level	Protein Name	Allele
+2	GMYCMVFLV	0.858	4	SB	PPAP2C	HLA-A02:01
+1	FGMYCMVFL	0.501	222	WB	PPAP2C	HLA-A02:01
+3	MYCMVFLVK	0.070	23399		PPAP2C	HLA-A02:01
+0	SFGMYCMVF	0.066	24390		PPAP2C	HLA-A02:01
+0	SLDMCISGL	0.698	26	SB	ADAMTSL1	HLA-A02:01
+3	MCISGLCQL	0.306	1823		ADAMTSL1	HLA-A02:01
+1	LDMCISGLC	0.069	23677		ADAMTSL1	HLA-A02:01
+2	DMCISGLCQ	0.042	31630		ADAMTSL1	HLA-A02:01
+0	SFGMYCMVF	0.611	67	WB	PPAP2C	HLA-A23:01
+3	MYCMVFLVK	0.539	146	WB	PPAP2C	HLA-A23:01
+2	GMYCMVFLV	0.252	3256		PPAP2C	HLA-A23:01
+1	FGMYCMVFL	0.224	4423		PPAP2C	HLA-A23:01
+3	MCISGLCQL	0.199	5781		ADAMTSL1	HLA-A23:01
+0	SLDMCISGL	0.056	27179		ADAMTSL1	HLA-A23:01
+1	LDMCISGLC	0.038	33222		ADAMTSL1	HLA-A23:01
+2	DMCISGLCQ	0.034	34451		ADAMTSL1	HLA-A23:01