annotate unipept.py @ 4:4953dcd7dd39 draft

planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
author galaxyp
date Wed, 23 Jan 2019 09:16:38 -0500
parents 34758ab8aaa4
children 917fd3ebc223
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
1 #!/usr/bin/env python
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
2 """
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
3 #
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
4 #------------------------------------------------------------------------------
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
5 # University of Minnesota
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
6 # Copyright 2015, Regents of the University of Minnesota
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
7 #------------------------------------------------------------------------------
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
8 # Author:
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
9 #
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
10 # James E Johnson
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
11 #
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
12 #------------------------------------------------------------------------------
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
13 """
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
14
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
15 import json
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
16 import logging
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
17 import optparse
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
18 from optparse import OptionParser
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
19 import os
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
20 import sys
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
21 import re
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
22 import urllib
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
23 import urllib2
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
24
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
25 """
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
26 pept2taxa json
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
27 pept2lca json
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
28 pept2prot
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
29 pept2ec ecjson ec
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
30 pept2go go
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
31 pept2funct go ec
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
32 peptinfo json ecjson ec go
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
33
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
34 """
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
35
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
36 try:
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
37 import xml.etree.cElementTree as ET
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
38 except ImportError:
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
39 import xml.etree.ElementTree as ET
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
40
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
41 def warn_err(msg,exit_code=1):
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
42 sys.stderr.write(msg)
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
43 if exit_code:
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
44 sys.exit(exit_code)
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
45
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
46 go_types = ['biological process', 'molecular function', 'cellular component']
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
47 ec_name_dict = {
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
48 '1' : 'Oxidoreductase',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
49 '1.1' : 'act on the CH-OH group of donors',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
50 '1.2' : 'act on the aldehyde or oxo group of donors',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
51 '1.3' : 'act on the CH-CH group of donors',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
52 '1.4' : 'act on the CH-NH2 group of donors',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
53 '1.5' : 'act on CH-NH group of donors',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
54 '1.6' : 'act on NADH or NADPH',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
55 '1.7' : 'act on other nitrogenous compounds as donors',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
56 '1.8' : 'act on a sulfur group of donors',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
57 '1.9' : 'act on a heme group of donors',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
58 '1.10' : 'act on diphenols and related substances as donors',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
59 '1.11' : 'act on peroxide as an acceptor -- peroxidases',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
60 '1.12' : 'act on hydrogen as a donor',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
61 '1.13' : 'act on single donors with incorporation of molecular oxygen',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
62 '1.14' : 'act on paired donors with incorporation of molecular oxygen',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
63 '1.15' : 'act on superoxide radicals as acceptors',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
64 '1.16' : 'oxidize metal ions',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
65 '1.17' : 'act on CH or CH2 groups',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
66 '1.18' : 'act on iron-sulfur proteins as donors',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
67 '1.19' : 'act on reduced flavodoxin as donor',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
68 '1.20' : 'act on phosphorus or arsenic as donors',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
69 '1.21' : 'act on X-H and Y-H to form an X-Y bond',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
70 '1.97' : 'other oxidoreductases',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
71 '2' : 'Transferase',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
72 '2.1' : 'transfer one-carbon groups, Methylase',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
73 '2.2' : 'transfer aldehyde or ketone groups',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
74 '2.3' : 'acyltransferases',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
75 '2.4' : 'glycosyltransferases',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
76 '2.5' : 'transfer alkyl or aryl groups, other than methyl groups',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
77 '2.6' : 'transfer nitrogenous groups',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
78 '2.7' : 'transfer phosphorus-containing groups',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
79 '2.8' : 'transfer sulfur-containing groups',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
80 '2.9' : 'transfer selenium-containing groups',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
81 '3' : 'Hydrolase',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
82 '3.1' : 'act on ester bonds',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
83 '3.2' : 'act on sugars - glycosylases',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
84 '3.3' : 'act on ether bonds',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
85 '3.4' : 'act on peptide bonds - Peptidase',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
86 '3.5' : 'act on carbon-nitrogen bonds, other than peptide bonds',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
87 '3.6' : 'act on acid anhydrides',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
88 '3.7' : 'act on carbon-carbon bonds',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
89 '3.8' : 'act on halide bonds',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
90 '3.9' : 'act on phosphorus-nitrogen bonds',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
91 '3.10' : 'act on sulfur-nitrogen bonds',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
92 '3.11' : 'act on carbon-phosphorus bonds',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
93 '3.12' : 'act on sulfur-sulfur bonds',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
94 '3.13' : 'act on carbon-sulfur bonds',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
95 '4' : 'Lyase',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
96 '4.1' : 'carbon-carbon lyases',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
97 '4.2' : 'carbon-oxygen lyases',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
98 '4.3' : 'carbon-nitrogen lyases',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
99 '4.4' : 'carbon-sulfur lyases',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
100 '4.5' : 'carbon-halide lyases',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
101 '4.6' : 'phosphorus-oxygen lyases',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
102 '5' : 'Isomerase',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
103 '5.1' : 'racemases and epimerases',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
104 '5.2' : 'cis-trans-isomerases',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
105 '5.3' : 'intramolecular oxidoreductases',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
106 '5.4' : 'intramolecular transferases -- mutases',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
107 '5.5' : 'intramolecular lyases',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
108 '5.99' : 'other isomerases',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
109 '6' : 'Ligase',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
110 '6.1' : 'form carbon-oxygen bonds',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
111 '6.2' : 'form carbon-sulfur bonds',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
112 '6.3' : 'form carbon-nitrogen bonds',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
113 '6.4' : 'form carbon-carbon bonds',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
114 '6.5' : 'form phosphoric ester bonds',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
115 '6.6' : 'form nitrogen-metal bonds',
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
116 }
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
117 pept2lca_column_order = ['peptide','taxon_rank','taxon_id','taxon_name']
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
118 pept2lca_extra_column_order = ['peptide','superkingdom','kingdom','subkingdom','superphylum','phylum','subphylum','superclass','class','subclass','infraclass','superorder','order','suborder','infraorder','parvorder','superfamily','family','subfamily','tribe','subtribe','genus','subgenus','species_group','species_subgroup','species','subspecies','varietas','forma' ]
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
119 pept2lca_all_column_order = pept2lca_column_order + pept2lca_extra_column_order[1:]
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
120 pept2prot_column_order = ['peptide','uniprot_id','taxon_id']
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
121 pept2prot_extra_column_order = pept2prot_column_order + ['taxon_name','ec_references','go_references','refseq_ids','refseq_protein_ids','insdc_ids','insdc_protein_ids']
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
122 pept2ec_column_order = [['peptide', 'total_protein_count'], ['ec_number', 'protein_count']]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
123 pept2ec_extra_column_order = [['peptide', 'total_protein_count'], ['ec_number', 'protein_count', 'name']]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
124 pept2go_column_order = [['peptide', 'total_protein_count'], ['go_term', 'protein_count']]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
125 pept2go_extra_column_order = [['peptide', 'total_protein_count'], ['go_term', 'protein_count', 'name']]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
126 pept2funct_column_order = ['peptide', 'total_protein_count', 'ec', 'go']
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
127
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
128 def __main__():
3
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
129 version = '2.0'
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
130 pep_pat = '^([ABCDEFGHIKLMNPQRSTVWXYZ]+)$'
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
131
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
132 def read_tabular(filepath,col):
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
133 peptides = []
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
134 with open(filepath) as fp:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
135 for i,line in enumerate(fp):
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
136 if line.strip() == '' or line.startswith('#'):
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
137 continue
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
138 fields = line.rstrip('\n').split('\t')
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
139 peptide = fields[col]
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
140 if not re.match(pep_pat,peptide):
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
141 warn_err('"%s" is not a peptide (line %d column %d of tabular file: %s)\n' % (peptide,i,col,filepath),exit_code=invalid_ec)
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
142 peptides.append(peptide)
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
143 return peptides
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
144
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
145 def get_fasta_entries(fp):
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
146 name, seq = None, []
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
147 for line in fp:
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
148 line = line.rstrip()
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
149 if line.startswith(">"):
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
150 if name: yield (name, ''.join(seq))
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
151 name, seq = line, []
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
152 else:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
153 seq.append(line)
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
154 if name: yield (name, ''.join(seq))
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
155
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
156 def read_fasta(filepath):
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
157 peptides = []
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
158 with open(filepath) as fp:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
159 for id, peptide in get_fasta_entries(fp):
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
160 if not re.match(pep_pat,peptide):
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
161 warn_err('"%s" is not a peptide (id %s of fasta file: %s)\n' % (peptide,id,filepath),exit_code=invalid_ec)
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
162 peptides.append(peptide)
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
163 return peptides
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
164
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
165 def read_mzid(fp):
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
166 peptides = []
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
167 for event, elem in ET.iterparse(fp):
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
168 if event == 'end':
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
169 if re.search('PeptideSequence',elem.tag):
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
170 peptides.append(elem.text)
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
171 return peptides
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
172
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
173 def read_pepxml(fp):
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
174 peptides = []
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
175 for event, elem in ET.iterparse(fp):
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
176 if event == 'end':
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
177 if re.search('search_hit',elem.tag):
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
178 peptides.append(elem.get('peptide'))
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
179 return peptides
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
180
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
181 def best_match(peptide,matches):
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
182 if not matches:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
183 return None
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
184 elif len(matches) == 1:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
185 return matches[0].copy()
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
186 elif 'taxon_rank' in matches[0]:
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
187 # find the most specific match (peptide is always the first column order field)
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
188 for col in reversed(pept2lca_extra_column_order[1:]):
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
189 col_id = col+"_id" if options.extra else col
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
190 for match in matches:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
191 if 'taxon_rank' in match and match['taxon_rank'] == col:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
192 return match.copy()
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
193 if col_id in match and match[col_id]:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
194 return match.copy()
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
195 else:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
196 return sorted(matches, key=lambda x: len(x['peptide']))[-1].copy()
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
197 return None
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
198
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
199 def get_taxon_json(resp):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
200 found_keys = set()
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
201 for i,pdict in enumerate(resp):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
202 found_keys |= set(pdict.keys())
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
203 taxa_cols = []
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
204 for col in pept2lca_extra_column_order[-1:0:-1]:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
205 if col+'_id' in found_keys:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
206 taxa_cols.append(col)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
207 id_to_node = dict()
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
208 def get_node(id,name,rank,child,seq):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
209 if id not in id_to_node:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
210 data = {'count' : 0, 'self_count' : 0, 'valid_taxon' : 1, 'rank' : rank, 'sequences' : [] }
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
211 node = {'id' : id, 'name' : name, 'children' : [], 'kids': [],'data' : data }
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
212 id_to_node[id] = node
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
213 else:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
214 node = id_to_node[id]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
215 node['data']['count'] += 1
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
216 if seq is not None and seq not in node['data']['sequences']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
217 node['data']['sequences'].append(seq)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
218 if child is None:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
219 node['data']['self_count'] += 1
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
220 elif child['id'] not in node['kids']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
221 node['kids'].append(child['id'])
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
222 node['children'].append(child)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
223 return node
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
224 root = get_node(1,'root','no rank',None,None)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
225 for i,pdict in enumerate(resp):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
226 sequence = pdict.get('peptide',pdict.get('tryptic_peptide',None))
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
227 seq = sequence
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
228 child = None
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
229 for col in taxa_cols:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
230 col_id = col+'_id'
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
231 if col_id in pdict and pdict.get(col_id):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
232 col_name = col if col in found_keys else col+'_name'
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
233 child = get_node(pdict.get(col_id,None),pdict.get(col_name,''),col,child,seq)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
234 seq = None
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
235 if child:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
236 get_node(1,'root','no rank',child,None)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
237 return root
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
238
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
239 def get_ec_json(resp):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
240 ecMap = dict()
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
241 for pdict in resp:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
242 if 'ec' in pdict:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
243 for ec in pdict['ec']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
244 ec_number = ec['ec_number']
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
245 if ec_number not in ecMap:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
246 ecMap[ec_number] = []
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
247 ecMap[ec_number].append(pdict)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
248 def get_ids(ec):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
249 ids = []
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
250 i = len(ec)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
251 while i >= 0:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
252 ids.append(ec[:i])
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
253 i = ec.rfind('.',0,i - 1)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
254 return ids
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
255 id_to_node = dict()
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
256 def get_node(id,name,child,seq):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
257 if id not in id_to_node:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
258 data = {'count' : 0, 'self_count' : 0, 'sequences' : [] }
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
259 node = {'id' : id, 'name' : name, 'children' : [], 'kids': [],'data' : data }
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
260 id_to_node[id] = node
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
261 else:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
262 node = id_to_node[id]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
263 node['data']['count'] += 1
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
264 if seq is not None and seq not in node['data']['sequences']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
265 node['data']['sequences'].append(seq)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
266 if child is None:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
267 node['data']['self_count'] += 1
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
268 elif child['id'] not in node['kids']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
269 node['kids'].append(child['id'])
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
270 node['children'].append(child)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
271 return node
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
272 root = get_node(0,'-.-.-.-',None,None)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
273 for i in range(1,7):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
274 child = get_node(str(i),'%s\n%s' %(str(i), ec_name_dict[str(i)] ),None,None)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
275 get_node(0,'-.-.-.-',child,None)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
276 for i,pdict in enumerate(resp):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
277 sequence = pdict.get('peptide',pdict.get('tryptic_peptide',None))
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
278 seq = sequence
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
279 if 'ec' in pdict:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
280 for ec in pdict['ec']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
281 child = None
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
282 protein_count = ec['protein_count']
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
283 ec_number = ec['ec_number']
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
284 for ec_id in get_ids(ec_number):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
285 ec_name = str(ec_id)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
286 ## if len(ec_id) == 3:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
287 ## ec_name = '%s\n%s\n%s' %(str(ec_id), ec_name_dict[str(ec_id[0])], ec_name_dict[str(ec_id)])
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
288 child = get_node(ec_id,ec_name,child,seq)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
289 seq = None
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
290 if child:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
291 get_node(0,'-.-.-.-',child,None)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
292 return root
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
293
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
294 def get_taxon_dict(resp, column_order, extra=False, names=False):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
295 found_keys = set()
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
296 results = []
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
297 for i,pdict in enumerate(resp):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
298 results.append(pdict)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
299 found_keys |= set(pdict.keys())
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
300 # print >> sys.stderr, "%s\n%s" % (pdict.keys(),found_keys)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
301 column_names = []
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
302 column_keys = []
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
303 for col in column_order:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
304 if col in found_keys:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
305 column_names.append(col)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
306 column_keys.append(col)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
307 elif names:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
308 col_id = col+'_id'
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
309 col_name = col+'_name'
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
310 if extra:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
311 if col_id in found_keys:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
312 column_names.append(col_id)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
313 column_keys.append(col_id)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
314 if names:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
315 if col_name in found_keys:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
316 column_names.append(col)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
317 column_keys.append(col_name)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
318 else:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
319 if col+'_name' in found_keys:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
320 column_names.append(col)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
321 column_keys.append(col+'_name')
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
322 elif col+'_id' in found_keys:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
323 column_names.append(col)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
324 column_keys.append(col+'_id')
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
325 # print >> sys.stderr, "%s\n%s" % (column_names,column_keys)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
326 taxa = dict() ## peptide : [taxonomy]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
327 for i,pdict in enumerate(results):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
328 peptide = pdict['peptide'] if 'peptide' in pdict else None
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
329 if peptide and peptide not in taxa:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
330 vals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_keys]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
331 taxa[peptide] = vals
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
332 return (taxa,column_names)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
333
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
334 def get_ec_dict(resp, extra=False):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
335 ec_cols = ['ec_numbers', 'ec_protein_counts']
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
336 if extra:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
337 ec_cols.append('ec_names')
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
338 ec_dict = dict()
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
339 for i,pdict in enumerate(resp):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
340 peptide = pdict['peptide']
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
341 ec_numbers = []
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
342 protein_counts = []
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
343 ec_names = []
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
344 if 'ec' in pdict:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
345 for ec in pdict['ec']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
346 ec_numbers.append(ec['ec_number'])
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
347 protein_counts.append(str(ec['protein_count']))
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
348 if extra:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
349 ec_names.append(ec['name'] if 'name' in ec else '')
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
350 vals = [','.join(ec_numbers),','.join(protein_counts)]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
351 if extra:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
352 vals.append(','.join(ec_names))
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
353 ec_dict[peptide] = vals
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
354 return (ec_dict, ec_cols)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
355
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
356 def get_go_dict(resp, extra=False):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
357 go_cols = ['go_terms', 'go_protein_counts']
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
358 if extra:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
359 go_cols.append('go_names')
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
360 go_dict = dict()
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
361 for i,pdict in enumerate(resp):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
362 peptide = pdict['peptide']
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
363 go_terms = []
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
364 protein_counts = []
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
365 go_names = []
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
366 if 'go' in pdict:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
367 for go in pdict['go']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
368 if 'go_term' in go:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
369 go_terms.append(go['go_term'])
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
370 protein_counts.append(str(go['protein_count']))
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
371 if extra:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
372 go_names.append(go['name'] if 'name' in go else '')
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
373 else:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
374 for go_type in go_types:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
375 if go_type in go:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
376 for _go in go[go_type]:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
377 go_terms.append(_go['go_term'])
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
378 protein_counts.append(str(_go['protein_count']))
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
379 if extra:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
380 go_names.append(_go['name'] if 'name' in _go else '')
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
381 vals = [','.join(go_terms),','.join(protein_counts)]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
382 if extra:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
383 vals.append(','.join(go_names))
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
384 go_dict[peptide] = vals
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
385 return (go_dict, go_cols)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
386
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
387 def write_ec_table(outfile, resp, column_order):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
388 with open(outfile,'w') as fh:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
389 for i,pdict in enumerate(resp):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
390 if 'ec' in pdict:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
391 tvals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_order[0]]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
392 for ec in pdict['ec']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
393 vals = [str(ec[x]) if x in ec and ec[x] else '' for x in column_order[-1]]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
394 fh.write('%s\n' % '\t'.join(tvals + vals))
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
395
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
396 def write_go_table(outfile, resp, column_order):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
397 with open(outfile,'w') as fh:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
398 for i,pdict in enumerate(resp):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
399 if 'go' in pdict:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
400 tvals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_order[0]]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
401 for go in pdict['go']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
402 if 'go_term' in go:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
403 vals = [str(go[x]) if x in go and go[x] else '' for x in column_order[-1]]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
404 fh.write('%s\n' % '\t'.join(tvals + vals))
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
405 else:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
406 for go_type in go_types:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
407 if go_type in go:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
408 for _go in go[go_type]:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
409 vals = [str(_go[x]) if x in _go and _go[x] else '' for x in column_order[-1]]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
410 vals.append(go_type)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
411 fh.write('%s\n' % '\t'.join(tvals + vals))
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
412
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
413 #Parse Command Line
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
414 parser = optparse.OptionParser()
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
415 # unipept API choice
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
416 parser.add_option( '-a', '--api', dest='unipept', default='pept2lca', choices=['pept2lca','pept2taxa','pept2prot', 'pept2ec', 'pept2go', 'pept2funct', 'peptinfo'],
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
417 help='The unipept application: pept2lca, pept2taxa, pept2prot, pept2ec, pept2go, pept2funct, or peptinfo' )
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
418 # input files
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
419 parser.add_option( '-t', '--tabular', dest='tabular', default=None, help='A tabular file that contains a peptide column' )
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
420 parser.add_option( '-c', '--column', dest='column', type='int', default=0, help='The column (zero-based) in the tabular file that contains peptide sequences' )
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
421 parser.add_option( '-f', '--fasta', dest='fasta', default=None, help='A fasta file containing peptide sequences' )
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
422 parser.add_option( '-m', '--mzid', dest='mzid', default=None, help='A mxIdentML file containing peptide sequences' )
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
423 parser.add_option( '-p', '--pepxml', dest='pepxml', default=None, help='A pepxml file containing peptide sequences' )
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
424 # Unipept Flags
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
425 parser.add_option( '-e', '--equate_il', dest='equate_il', action='store_true', default=False, help='isoleucine (I) and leucine (L) are equated when matching tryptic peptides to UniProt records' )
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
426 parser.add_option( '-x', '--extra', dest='extra', action='store_true', default=False, help='return the complete lineage of the taxonomic lowest common ancestor' )
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
427 parser.add_option( '-n', '--names', dest='names', action='store_true', default=False, help='return the names of all ranks in the lineage of the taxonomic lowest common ancestor' )
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
428 parser.add_option( '-D', '--domains', dest='domains', action='store_true', default=False, help='group response by GO namaspace: biological process, molecular function, cellular component' )
3
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
429 parser.add_option( '-M', '--max_request', dest='max_request', type='int', default=200, help='The maximum number of entries per unipept request' )
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
430
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
431 # output fields
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
432 parser.add_option( '-A', '--allfields', dest='allfields', action='store_true', default=False, help='inlcude fields: taxon_rank,taxon_id,taxon_name csv and tsv outputs' )
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
433 # Warn vs Error Flag
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
434 parser.add_option( '-S', '--strict', dest='strict', action='store_true', default=False, help='Print exit on invalid peptide' )
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
435 # output files
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
436 parser.add_option( '-J', '--json', dest='json', default=None, help='Output file path for json formatted results')
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
437 parser.add_option( '-j', '--ec_json', dest='ec_json', default=None, help='Output file path for json formatted results')
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
438 parser.add_option( '-E', '--ec_tsv', dest='ec_tsv', default=None, help='Output file path for EC TAB-separated-values (.tsv) formatted results')
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
439 parser.add_option( '-G', '--go_tsv', dest='go_tsv', default=None, help='Output file path for GO TAB-separated-values (.tsv) formatted results')
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
440 parser.add_option( '-L', '--lineage_tsv', dest='lineage_tsv', default=None, help='Output file path for Lineage TAB-separated-values (.tsv) formatted results')
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
441 parser.add_option( '-T', '--tsv', dest='tsv', default=None, help='Output file path for TAB-separated-values (.tsv) formatted results')
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
442 parser.add_option( '-C', '--csv', dest='csv', default=None, help='Output file path for Comma-separated-values (.csv) formatted results')
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
443 parser.add_option( '-U', '--unmatched', dest='unmatched', default=None, help='Output file path for peptide with no matches' )
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
444 parser.add_option( '-u', '--url', dest='url', default='http://api.unipept.ugent.be/api/v1/', help='unipept url http://api.unipept.ugent.be/api/v1/' )
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
445 # debug
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
446 parser.add_option( '-g', '--get', dest='get', action='store_true', default=False, help='Use GET instead of POST' )
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
447 parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turning on debugging' )
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
448 parser.add_option( '-v', '--version', dest='version', action='store_true', default=False, help='pring version and exit' )
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
449 (options, args) = parser.parse_args()
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
450 if options.version:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
451 print >> sys.stdout,"%s" % version
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
452 sys.exit(0)
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
453 invalid_ec = 2 if options.strict else None
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
454 peptides = []
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
455 ## Get peptide sequences
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
456 if options.mzid:
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
457 peptides += read_mzid(options.mzid)
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
458 if options.pepxml:
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
459 peptides += read_pepxml(options.pepxml)
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
460 if options.tabular:
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
461 peptides += read_tabular(options.tabular,options.column)
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
462 if options.fasta:
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
463 peptides += read_fasta(options.fasta)
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
464 if args and len(args) > 0:
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
465 for i,peptide in enumerate(args):
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
466 if not re.match(pep_pat,peptide):
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
467 warn_err('"%s" is not a peptide (arg %d)\n' % (peptide,i),exit_code=invalid_ec)
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
468 peptides.append(peptide)
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
469 if len(peptides) < 1:
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
470 warn_err("No peptides input!",exit_code=1)
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
471 column_order = pept2lca_column_order
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
472 if options.unipept == 'pept2prot':
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
473 column_order = pept2prot_extra_column_order if options.extra else pept2prot_column_order
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
474 else:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
475 if options.extra or options.names:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
476 column_order = pept2lca_all_column_order if options.allfields else pept2lca_extra_column_order
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
477 else:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
478 column_order = pept2lca_column_order
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
479 ## map to tryptic peptides
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
480 pepToParts = {p: re.split("\n", re.sub(r'(?<=[RK])(?=[^P])','\n', p)) for p in peptides}
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
481 partToPeps = {}
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
482 for peptide, parts in pepToParts.iteritems():
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
483 if options.debug: print >> sys.stdout, "peptide: %s\ttryptic: %s\n" % (peptide, parts)
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
484 for part in parts:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
485 if len(part) > 50:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
486 warn_err("peptide: %s tryptic fragment len %d > 50 for %s\n" % (peptide,len(part),part),exit_code=None)
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
487 if 5 <= len(part) <= 50:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
488 partToPeps.setdefault(part,[]).append(peptide)
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
489 trypticPeptides = partToPeps.keys()
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
490 ## unipept
3
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
491 unipept_resp = []
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
492 idx = range(0,len(trypticPeptides),options.max_request)
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
493 idx.append(len(trypticPeptides))
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
494 for i in range(len(idx)-1):
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
495 post_data = []
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
496 if options.equate_il:
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
497 post_data.append(("equate_il","true"))
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
498 if options.names or options.json:
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
499 post_data.append(("extra","true"))
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
500 post_data.append(("names","true"))
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
501 elif options.extra or options.json:
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
502 post_data.append(("extra","true"))
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
503 if options.domains:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
504 post_data.append(("domains","true"))
3
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
505 post_data += [('input[]', x) for x in trypticPeptides[idx[i]:idx[i+1]]]
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
506 if options.debug: print >> sys.stdout, "post_data: %s\n" % (str(post_data))
3
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
507 headers = {'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'application/json'}
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
508 ## headers = {'Accept': 'application/json'}
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
509 url = '%s/%s' % (options.url.rstrip('/'),options.unipept)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
510 if options.get:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
511 params = '&'.join(['%s=%s' % (i[0],i[1]) for i in post_data])
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
512 url = '%s.json?%s' % (url,params)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
513 req = urllib2.Request( url )
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
514 else:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
515 url = '%s.json' % (url)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
516 req = urllib2.Request( url, headers = headers, data = urllib.urlencode(post_data) )
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
517 if options.debug: print >> sys.stdout, "url: %s\n" % (str(url))
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
518 try:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
519 resp = urllib2.urlopen( req )
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
520 if options.debug: print >> sys.stdout,"%s %s\n" % (url,str(resp.getcode()))
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
521 if resp.getcode() == 200:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
522 unipept_resp += json.loads( urllib2.urlopen( req ).read() )
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
523 except Exception, e:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
524 warn_err('HTTP Error %s\n' % (str(e)),exit_code=None)
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
525 unmatched_peptides = []
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
526 peptideMatches = []
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
527 if options.debug: print >> sys.stdout,"unipept response: %s\n" % str(unipept_resp)
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
528 if options.unipept in ['pept2prot', 'pept2taxa']:
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
529 dupkey = 'uniprot_id' if options.unipept == 'pept2prot' else 'taxon_id' ## should only keep one of these per input peptide
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
530 ## multiple entries per trypticPeptide for pep2prot or pep2taxa
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
531 mapping = {}
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
532 for match in unipept_resp:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
533 mapping.setdefault(match['peptide'],[]).append(match)
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
534 for peptide in peptides:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
535 # Get the intersection of matches to the tryptic parts
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
536 keyToMatch = None
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
537 for part in pepToParts[peptide]:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
538 if part in mapping:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
539 temp = {match[dupkey] : match for match in mapping[part]}
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
540 if keyToMatch:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
541 dkeys = set(keyToMatch.keys()) - set(temp.keys())
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
542 for k in dkeys:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
543 del keyToMatch[k]
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
544 else:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
545 keyToMatch = temp
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
546 ## keyToMatch = keyToMatch.fromkeys([x for x in keyToMatch if x in temp]) if keyToMatch else temp
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
547 if not keyToMatch:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
548 unmatched_peptides.append(peptide)
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
549 else:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
550 for key,match in keyToMatch.iteritems():
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
551 match['tryptic_peptide'] = match['peptide']
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
552 match['peptide'] = peptide
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
553 peptideMatches.append(match)
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
554 elif options.unipept in ['pept2lca', 'peptinfo']:
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
555 ## should be one response per trypticPeptide for pep2lca
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
556 respMap = {v['peptide']:v for v in unipept_resp}
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
557 ## map resp back to peptides
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
558 for peptide in peptides:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
559 matches = list()
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
560 for part in pepToParts[peptide]:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
561 if part in respMap:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
562 matches.append(respMap[part])
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
563 match = best_match(peptide,matches)
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
564 if not match:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
565 unmatched_peptides.append(peptide)
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
566 longest_tryptic_peptide = sorted(pepToParts[peptide], key=lambda x: len(x))[-1]
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
567 match = {'peptide' : longest_tryptic_peptide}
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
568 match['tryptic_peptide'] = match['peptide']
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
569 match['peptide'] = peptide
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
570 peptideMatches.append(match)
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
571 else:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
572 respMap = {v['peptide']:v for v in unipept_resp}
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
573 ## map resp back to peptides
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
574 for peptide in peptides:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
575 matches = list()
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
576 for part in pepToParts[peptide]:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
577 if part in respMap and 'total_protein_count' in respMap[part]:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
578 matches.append(respMap[part])
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
579 match = best_match(peptide,matches)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
580 if not match:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
581 unmatched_peptides.append(peptide)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
582 longest_tryptic_peptide = sorted(pepToParts[peptide], key=lambda x: len(x))[-1]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
583 match = {'peptide' : longest_tryptic_peptide}
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
584 match['tryptic_peptide'] = match['peptide']
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
585 match['peptide'] = peptide
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
586 peptideMatches.append(match)
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
587 resp = peptideMatches
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
588 if options.debug: print >> sys.stdout,"\nmapped response: %s\n" % str(resp)
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
589 ## output results
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
590 if not (options.unmatched or options.json or options.tsv or options.csv):
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
591 print >> sys.stdout, str(resp)
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
592 if options.unmatched:
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
593 with open(options.unmatched,'w') as outputFile:
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
594 for peptide in peptides:
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
595 if peptide in unmatched_peptides:
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
596 outputFile.write("%s\n" % peptide)
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
597 if options.json:
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
598 if options.unipept in ['pept2lca', 'pept2taxa', 'peptinfo']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
599 root = get_taxon_json(resp)
3
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
600 with open(options.json,'w') as outputFile:
34758ab8aaa4 Uploaded
galaxyp
parents: 1
diff changeset
601 outputFile.write(json.dumps(root))
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
602 elif options.unipept in ['pept2prot', 'pept2ec', 'pept2go', 'pept2funct']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
603 with open(options.json,'w') as outputFile:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
604 outputFile.write(str(resp))
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
605 if options.ec_json:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
606 if options.unipept in ['pept2ec', 'pept2funct', 'peptinfo']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
607 root = get_ec_json(resp)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
608 with open(options.ec_json,'w') as outputFile:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
609 outputFile.write(json.dumps(root))
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
610 if options.tsv or options.csv:
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
611 rows = []
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
612 column_names = None
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
613 if options.unipept in ['pept2ec', 'pept2go', 'pept2funct', 'peptinfo']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
614 taxa = None
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
615 ec_dict = None
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
616 go_dict = None
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
617 if options.unipept in ['peptinfo']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
618 (taxa,taxon_cols) = get_taxon_dict(resp, column_order, extra=options.extra, names=options.names)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
619 if options.unipept in ['pept2ec', 'pept2funct', 'peptinfo']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
620 (ec_dict,ec_cols) = get_ec_dict(resp, extra=options.extra)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
621 if options.unipept in ['pept2go', 'pept2funct', 'peptinfo']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
622 (go_dict,go_cols) = get_go_dict(resp, extra=options.extra)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
623 for i,pdict in enumerate(resp):
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
624 peptide = pdict['peptide']
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
625 total_protein_count = str(pdict['total_protein_count']) if 'total_protein_count' in pdict else '0'
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
626 column_names = ['peptide', 'total_protein_count']
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
627 vals = [peptide,total_protein_count]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
628 if ec_dict:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
629 vals += ec_dict[peptide]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
630 column_names += ec_cols
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
631 if go_dict:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
632 vals += go_dict[peptide]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
633 column_names += go_cols
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
634 if taxa:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
635 vals += taxa[peptide][1:]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
636 column_names += taxon_cols[1:]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
637 rows.append(vals)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
638 elif options.unipept in ['pept2lca', 'pept2taxa', 'pept2prot']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
639 (taxa,taxon_cols) = get_taxon_dict(resp, column_order, extra=options.extra, names=options.names)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
640 column_names = taxon_cols
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
641 rows = taxa.values()
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
642 for peptide,vals in taxa.iteritems():
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
643 rows.append(vals)
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
644 if options.tsv:
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
645 with open(options.tsv,'w') as outputFile:
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
646 if column_names:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
647 outputFile.write("#%s\n"% '\t'.join(column_names))
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
648 for vals in rows:
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
649 outputFile.write("%s\n"% '\t'.join(vals))
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
650 if options.csv:
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
651 with open(options.csv,'w') as outputFile:
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
652 if column_names:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
653 outputFile.write("%s\n"% ','.join(column_names))
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
654 for vals in rows:
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
655 outputFile.write("%s\n"% ','.join(['"%s"' % (v if v else '') for v in vals]))
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
656 if options.ec_tsv and options.unipept in ['pept2ec', 'pept2funct', 'peptinfo']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
657 column_order = pept2ec_extra_column_order if options.extra else pept2ec_column_order
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
658 write_ec_table(options.ec_tsv, resp, column_order)
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
659 if options.go_tsv and options.unipept in ['pept2go', 'pept2funct', 'peptinfo']:
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
660 column_order = pept2go_extra_column_order if options.extra else pept2go_column_order
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
661 write_go_table(options.go_tsv, resp, column_order)
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
662
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
663 if __name__ == "__main__" : __main__()