# HG changeset patch # User jjohnson # Date 1582055331 18000 # Node ID bb25a4e5f2117fd676db7951f053b071167c3725 "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/netmhc commit 3bf9a39fe11622806ac6b032ba4fc6139a003580" diff -r 000000000000 -r bb25a4e5f211 README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Tue Feb 18 14:48:51 2020 -0500 @@ -0,0 +1,26 @@ +NetMHC 4.0 server predicts binding of peptides to a number of different HLA alleles using artificial neural networks (ANNs). + +SEE: http://www.cbs.dtu.dk/services/NetMHC/ + +NetMHC is available to academic researchers. +The download webpage requires the user to accept a academic license aggrement, which prevents automatic Galaxy package installation. +http://www.cbs.dtu.dk/cgi-bin/nph-sw_request?netMHC + + +Download NetMHC 4.0 and install as described in: netMHC-4.0.readme +( Be sure to do step 3 which installs the data from: http://www.cbs.dtu.dk/services/NetMHC-4.0/data.tar.gz ) + + +For Galaxy installation : + +Add tool_depedencies/netMHC/4.0/env.sh + +The env.sh must define ENVIROMENT variables: NMHOME and TMPDIR + +For example, if you installed netMHC at: /home/galaxy/src/netMHC-4.0 + +galaxy@galaxy [/home/galaxy] % cat tool_dependencies/netMHC/4.0/env.sh +export NMHOME=/home/galaxy/src/netMHC-4.0 +export TMPDIR=/tmp +export PATH=/home/galaxy/src/netMHC-4.0:$PATH + diff -r 000000000000 -r bb25a4e5f211 netmhc.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/netmhc.xml Tue Feb 18 14:48:51 2020 -0500 @@ -0,0 +1,347 @@ + + MHC Binding prediction + + netMHC + + + + + + > sys.stderr, "python script.py netMHC_output_tsv output_file" + exit(4); +hpat = '^\s*(pos)\s+(HLA)\s+(peptide)\s+(Core)\s+(Offset)\s+(I_pos)\s+(I_len)\s+(D_pos)\s+(D_len)\s+(iCore)\s+(Identity)\s+(1-log50k.aff.)\s+(Affinity.nM.)\s+(%Rank)\s+(BindLevel)\s*$' +epat = '^\s*(\d+)\s+(\S+)\s+([A-Z]+)\s+([-_A-Z]*)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([0-9]+)\s+([A-Z]+)\s+(\S+)\s+([0-9.]+)\s+([0-9.]+)\s+([0-9.]+).*?([SWB]*)$' +cnt = 0 +try: + wh = open(sys.argv[2],'w') + fh = open(sys.argv[1],'r') + for i,line in enumerate(fh): + line = line.rstrip() + if not line: + continue + ## print >> sys.stderr, line + m = re.match(epat,line) + if m: + ## print >> sys.stderr, str(m.groups()) + wh.write("%s\n" % '\t'.join([x if x else '' for x in m.groups()])) + cnt += 1 + elif cnt == 0: + m = re.match(hpat,line) + if m: + ## print >> sys.stderr, str(m.groups()) + wh.write("#%s\n" % '\t'.join(m.groups())) + cnt += 1 + wh.close() + fh.close() +except Exception, e: + print sys.stderr, "error: %s" % e + exit(3) +]]> + + > sys.stderr, "python script.py netMHC_xls output_file" + exit(4); +try: + wh = open(sys.argv[2],'w') + fh = open(sys.argv[1],'r') + for n,line in enumerate(fh): + if n > 1: + wh.write(line) + if n == 0: + alleles = line.rstrip('\n').split('\t') + if n == 1: + hdr = line.rstrip('\n').split('\t') + wh.write('#%s\n' % '\t'.join([' '.join([alleles[i - i%3],hdr[i]]).strip() for i in range(len(hdr))])) + wh.close() + fh.close() +except Exception, e: + print sys.stderr, "error: %s" % e + exit(3) +]]> + + + OS11_netMHC.out +#set $valid_alleles = [ +'BoLA-AW10', +'BoLA-D18.4', +'BoLA-HD6', +'BoLA-JSP.1', +'BoLA-T2C', +'BoLA-T2a', +'BoLA-T2b', +'H-2-Db', +'H-2-Dd', +'H-2-Kb', +'H-2-Kd', +'H-2-Kk', +'H-2-Ld', +'HLA-A0101', +'HLA-A0201', +'HLA-A0202', +'HLA-A0203', +'HLA-A0205', +'HLA-A0206', +'HLA-A0207', +'HLA-A0211', +'HLA-A0212', +'HLA-A0216', +'HLA-A0217', +'HLA-A0219', +'HLA-A0250', +'HLA-A0301', +'HLA-A0302', +'HLA-A0319', +'HLA-A1101', +'HLA-A2301', +'HLA-A2402', +'HLA-A2403', +'HLA-A2501', +'HLA-A2601', +'HLA-A2602', +'HLA-A2603', +'HLA-A2902', +'HLA-A3001', +'HLA-A3002', +'HLA-A3101', +'HLA-A3201', +'HLA-A3207', +'HLA-A3215', +'HLA-A3301', +'HLA-A6601', +'HLA-A6801', +'HLA-A6802', +'HLA-A6823', +'HLA-A6901', +'HLA-A8001', +'HLA-B0702', +'HLA-B0801', +'HLA-B0802', +'HLA-B0803', +'HLA-B1401', +'HLA-B1402', +'HLA-B1501', +'HLA-B1502', +'HLA-B1503', +'HLA-B1509', +'HLA-B1517', +'HLA-B1801', +'HLA-B2705', +'HLA-B2720', +'HLA-B3501', +'HLA-B3503', +'HLA-B3701', +'HLA-B3801', +'HLA-B3901', +'HLA-B4001', +'HLA-B4002', +'HLA-B4013', +'HLA-B4201', +'HLA-B4402', +'HLA-B4403', +'HLA-B4501', +'HLA-B4506', +'HLA-B4601', +'HLA-B4801', +'HLA-B5101', +'HLA-B5301', +'HLA-B5401', +'HLA-B5701', +'HLA-B5703', +'HLA-B5801', +'HLA-B5802', +'HLA-B7301', +'HLA-B8101', +'HLA-B8301', +'HLA-C0303', +'HLA-C0401', +'HLA-C0501', +'HLA-C0602', +'HLA-C0701', +'HLA-C0702', +'HLA-C0802', +'HLA-C1203', +'HLA-C1402', +'HLA-C1502', +'HLA-E0101', +'HLA-E0103', +'Mamu-A01', +'Mamu-A02', +'Mamu-A07', +'Mamu-A11', +'Mamu-A20102', +'Mamu-A2201', +'Mamu-A2601', +'Mamu-A70103', +'Mamu-B01', +'Mamu-B03', +'Mamu-B08', +'Mamu-B1001', +'Mamu-B17', +'Mamu-B3901', +'Mamu-B52', +'Mamu-B6601', +'Mamu-B8301', +'Mamu-B8701', +'Patr-A0101', +'Patr-A0301', +'Patr-A0401', +'Patr-A0701', +'Patr-A0901', +'Patr-B0101', +'Patr-B1301', +'Patr-B2401', +'SLA-10401', +'SLA-10701', +'SLA-20401', +'SLA-30401', +] + #set $allelelist = [] + #set $unknown_alleles = [] + #if $alleles.allelesrc == 'history': + #for $line in open(str($alleles.allele_file)): + #set $fields = $line.strip().split(',') + #set $allele = $fields[0].strip() + #if $allele in $valid_alleles: + $allelelist.append($allele) + #else + $unknown_alleles.append($allele) + #end if + #end for + #else: + #for $word in str($alleles.allele_text).strip().split(): + #set $fields = $word.strip().split(',') + #set $allele = $fields[0].strip() + #if $allele in $valid_alleles: + $allelelist.append($allele) + #else + $unknown_alleles.append($allele) + #end if + #end for + #end if + #if len($allelelist) < 1 + echo 'No netMHC alleles'; + echo "unknown: $unknown_alleles"; + exit 1; + #else + echo "netMHC alleles: $allelelist" + && echo "unknown alleles: $unknown_alleles" + && echo "peptide lengths: $lengths" + #set $alist = ','.join($allelelist) + && netMHC -tdir tmp -f "$seq_fasta" -a '$alist' -l '$lengths' $sort + #if $threshold_sec.rth: + -rth $threshold_sec.rth + #end if + #if $threshold_sec.rlt: + -rlt $threshold_sec.rlt + #end if + -xls -xlsfile results.tsv > results.out + && python $format_out results.out $output + && python $format_tsv results.tsv $results_tsv + #end if + ]]> + + + + + + + + + + The dataset should have on allele per line: HLA-A0201 + + + + Enter alleles separated by commas: HLA-A0201,HLA-B0702 + ^(\S+)(,\S+)*$ + + + + + Used for any alleles which don't include specified lengths + + + + + + + + + +
+ + +
+
+ + + + + + + 10.1093/nar/gkn202 + 10.1093/bioinformatics/btn128 + 10.1093/bioinformatics/btn100 + 10.1110/ps.0239403 + +
diff -r 000000000000 -r bb25a4e5f211 test-data/test.fsa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.fsa Tue Feb 18 14:48:51 2020 -0500 @@ -0,0 +1,6 @@ +>143B_BOVIN +TMDKSELVQKAKLAEQAERYDDMAAAMKAVTEQGHELSNEERNLLSVAYKNVVGARRSSW +RVISSIEQKTERNEKKQQMGKEYREKIEAELQDICNDVLQLLDKYLIPNATQPESKVFYL +KMKGDYFRYLSEVASGDNKQTTVSNSQQAYQEAFEISKKEMQPTHPIRLGLALNFSVFYY +EILNSPEKACSLAKTAFDEAIAELDTLNEESYKDSTLIMQLLRDNLTLWTSENQGDEGDA +GEGEN diff -r 000000000000 -r bb25a4e5f211 test-data/test1.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1.fa Tue Feb 18 14:48:51 2020 -0500 @@ -0,0 +1,4 @@ +>PPAP2C +SFGMYCMVFLVK +>ADAMTSL1 +SLDMCISGLCQL diff -r 000000000000 -r bb25a4e5f211 test-data/test1_allele_scores.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1_allele_scores.tsv Tue Feb 18 14:48:51 2020 -0500 @@ -0,0 +1,9 @@ +#Protein Position Peptide HLA-A02:01 ANN/Mat Direct predicted affinity (Kd, nM)/Matscore HLA-A23:01 ANN/Mat Direct predicted affinity (Kd, nM)/Matscore +ADAMTSL1 0 SLDMCISGL 26 27179 +ADAMTSL1 1 LDMCISGLC 23677 33222 +ADAMTSL1 2 DMCISGLCQ 31630 34451 +ADAMTSL1 3 MCISGLCQL 1823 5781 +PPAP2C 0 SFGMYCMVF 24390 67 +PPAP2C 1 FGMYCMVFL 222 4423 +PPAP2C 2 GMYCMVFLV 4 3256 +PPAP2C 3 MYCMVFLVK 23399 146 diff -r 000000000000 -r bb25a4e5f211 test-data/test1_alleles.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1_alleles.txt Tue Feb 18 14:48:51 2020 -0500 @@ -0,0 +1,3 @@ +HLA-A02:01 +HLA-A23:01 +HLA-C03:01 diff -r 000000000000 -r bb25a4e5f211 test-data/test1_summary.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1_summary.tsv Tue Feb 18 14:48:51 2020 -0500 @@ -0,0 +1,17 @@ +#pos peptide logscore affinity(nM) Bind Level Protein Name Allele +2 GMYCMVFLV 0.858 4 SB PPAP2C HLA-A02:01 +1 FGMYCMVFL 0.501 222 WB PPAP2C HLA-A02:01 +3 MYCMVFLVK 0.070 23399 PPAP2C HLA-A02:01 +0 SFGMYCMVF 0.066 24390 PPAP2C HLA-A02:01 +0 SLDMCISGL 0.698 26 SB ADAMTSL1 HLA-A02:01 +3 MCISGLCQL 0.306 1823 ADAMTSL1 HLA-A02:01 +1 LDMCISGLC 0.069 23677 ADAMTSL1 HLA-A02:01 +2 DMCISGLCQ 0.042 31630 ADAMTSL1 HLA-A02:01 +0 SFGMYCMVF 0.611 67 WB PPAP2C HLA-A23:01 +3 MYCMVFLVK 0.539 146 WB PPAP2C HLA-A23:01 +2 GMYCMVFLV 0.252 3256 PPAP2C HLA-A23:01 +1 FGMYCMVFL 0.224 4423 PPAP2C HLA-A23:01 +3 MCISGLCQL 0.199 5781 ADAMTSL1 HLA-A23:01 +0 SLDMCISGL 0.056 27179 ADAMTSL1 HLA-A23:01 +1 LDMCISGLC 0.038 33222 ADAMTSL1 HLA-A23:01 +2 DMCISGLCQ 0.034 34451 ADAMTSL1 HLA-A23:01