Mercurial > repos > galaxyp > unipept
annotate unipept.py @ 4:4953dcd7dd39 draft
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
author | galaxyp |
---|---|
date | Wed, 23 Jan 2019 09:16:38 -0500 |
parents | 34758ab8aaa4 |
children | 917fd3ebc223 |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 """ | |
3 # | |
4 #------------------------------------------------------------------------------ | |
5 # University of Minnesota | |
6 # Copyright 2015, Regents of the University of Minnesota | |
7 #------------------------------------------------------------------------------ | |
8 # Author: | |
9 # | |
10 # James E Johnson | |
11 # | |
12 #------------------------------------------------------------------------------ | |
13 """ | |
14 | |
15 import json | |
16 import logging | |
17 import optparse | |
18 from optparse import OptionParser | |
19 import os | |
20 import sys | |
21 import re | |
22 import urllib | |
23 import urllib2 | |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
24 |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
25 """ |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
26 pept2taxa json |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
27 pept2lca json |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
28 pept2prot |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
29 pept2ec ecjson ec |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
30 pept2go go |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
31 pept2funct go ec |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
32 peptinfo json ecjson ec go |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
33 |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
34 """ |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
35 |
0 | 36 try: |
37 import xml.etree.cElementTree as ET | |
38 except ImportError: | |
39 import xml.etree.ElementTree as ET | |
40 | |
41 def warn_err(msg,exit_code=1): | |
42 sys.stderr.write(msg) | |
43 if exit_code: | |
44 sys.exit(exit_code) | |
45 | |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
46 go_types = ['biological process', 'molecular function', 'cellular component'] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
47 ec_name_dict = { |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
48 '1' : 'Oxidoreductase', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
49 '1.1' : 'act on the CH-OH group of donors', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
50 '1.2' : 'act on the aldehyde or oxo group of donors', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
51 '1.3' : 'act on the CH-CH group of donors', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
52 '1.4' : 'act on the CH-NH2 group of donors', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
53 '1.5' : 'act on CH-NH group of donors', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
54 '1.6' : 'act on NADH or NADPH', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
55 '1.7' : 'act on other nitrogenous compounds as donors', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
56 '1.8' : 'act on a sulfur group of donors', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
57 '1.9' : 'act on a heme group of donors', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
58 '1.10' : 'act on diphenols and related substances as donors', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
59 '1.11' : 'act on peroxide as an acceptor -- peroxidases', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
60 '1.12' : 'act on hydrogen as a donor', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
61 '1.13' : 'act on single donors with incorporation of molecular oxygen', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
62 '1.14' : 'act on paired donors with incorporation of molecular oxygen', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
63 '1.15' : 'act on superoxide radicals as acceptors', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
64 '1.16' : 'oxidize metal ions', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
65 '1.17' : 'act on CH or CH2 groups', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
66 '1.18' : 'act on iron-sulfur proteins as donors', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
67 '1.19' : 'act on reduced flavodoxin as donor', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
68 '1.20' : 'act on phosphorus or arsenic as donors', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
69 '1.21' : 'act on X-H and Y-H to form an X-Y bond', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
70 '1.97' : 'other oxidoreductases', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
71 '2' : 'Transferase', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
72 '2.1' : 'transfer one-carbon groups, Methylase', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
73 '2.2' : 'transfer aldehyde or ketone groups', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
74 '2.3' : 'acyltransferases', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
75 '2.4' : 'glycosyltransferases', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
76 '2.5' : 'transfer alkyl or aryl groups, other than methyl groups', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
77 '2.6' : 'transfer nitrogenous groups', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
78 '2.7' : 'transfer phosphorus-containing groups', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
79 '2.8' : 'transfer sulfur-containing groups', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
80 '2.9' : 'transfer selenium-containing groups', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
81 '3' : 'Hydrolase', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
82 '3.1' : 'act on ester bonds', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
83 '3.2' : 'act on sugars - glycosylases', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
84 '3.3' : 'act on ether bonds', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
85 '3.4' : 'act on peptide bonds - Peptidase', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
86 '3.5' : 'act on carbon-nitrogen bonds, other than peptide bonds', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
87 '3.6' : 'act on acid anhydrides', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
88 '3.7' : 'act on carbon-carbon bonds', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
89 '3.8' : 'act on halide bonds', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
90 '3.9' : 'act on phosphorus-nitrogen bonds', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
91 '3.10' : 'act on sulfur-nitrogen bonds', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
92 '3.11' : 'act on carbon-phosphorus bonds', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
93 '3.12' : 'act on sulfur-sulfur bonds', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
94 '3.13' : 'act on carbon-sulfur bonds', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
95 '4' : 'Lyase', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
96 '4.1' : 'carbon-carbon lyases', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
97 '4.2' : 'carbon-oxygen lyases', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
98 '4.3' : 'carbon-nitrogen lyases', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
99 '4.4' : 'carbon-sulfur lyases', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
100 '4.5' : 'carbon-halide lyases', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
101 '4.6' : 'phosphorus-oxygen lyases', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
102 '5' : 'Isomerase', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
103 '5.1' : 'racemases and epimerases', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
104 '5.2' : 'cis-trans-isomerases', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
105 '5.3' : 'intramolecular oxidoreductases', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
106 '5.4' : 'intramolecular transferases -- mutases', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
107 '5.5' : 'intramolecular lyases', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
108 '5.99' : 'other isomerases', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
109 '6' : 'Ligase', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
110 '6.1' : 'form carbon-oxygen bonds', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
111 '6.2' : 'form carbon-sulfur bonds', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
112 '6.3' : 'form carbon-nitrogen bonds', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
113 '6.4' : 'form carbon-carbon bonds', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
114 '6.5' : 'form phosphoric ester bonds', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
115 '6.6' : 'form nitrogen-metal bonds', |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
116 } |
1 | 117 pept2lca_column_order = ['peptide','taxon_rank','taxon_id','taxon_name'] |
118 pept2lca_extra_column_order = ['peptide','superkingdom','kingdom','subkingdom','superphylum','phylum','subphylum','superclass','class','subclass','infraclass','superorder','order','suborder','infraorder','parvorder','superfamily','family','subfamily','tribe','subtribe','genus','subgenus','species_group','species_subgroup','species','subspecies','varietas','forma' ] | |
119 pept2lca_all_column_order = pept2lca_column_order + pept2lca_extra_column_order[1:] | |
120 pept2prot_column_order = ['peptide','uniprot_id','taxon_id'] | |
121 pept2prot_extra_column_order = pept2prot_column_order + ['taxon_name','ec_references','go_references','refseq_ids','refseq_protein_ids','insdc_ids','insdc_protein_ids'] | |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
122 pept2ec_column_order = [['peptide', 'total_protein_count'], ['ec_number', 'protein_count']] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
123 pept2ec_extra_column_order = [['peptide', 'total_protein_count'], ['ec_number', 'protein_count', 'name']] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
124 pept2go_column_order = [['peptide', 'total_protein_count'], ['go_term', 'protein_count']] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
125 pept2go_extra_column_order = [['peptide', 'total_protein_count'], ['go_term', 'protein_count', 'name']] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
126 pept2funct_column_order = ['peptide', 'total_protein_count', 'ec', 'go'] |
1 | 127 |
128 def __main__(): | |
3 | 129 version = '2.0' |
1 | 130 pep_pat = '^([ABCDEFGHIKLMNPQRSTVWXYZ]+)$' |
131 | |
132 def read_tabular(filepath,col): | |
133 peptides = [] | |
134 with open(filepath) as fp: | |
135 for i,line in enumerate(fp): | |
136 if line.strip() == '' or line.startswith('#'): | |
137 continue | |
138 fields = line.rstrip('\n').split('\t') | |
139 peptide = fields[col] | |
140 if not re.match(pep_pat,peptide): | |
141 warn_err('"%s" is not a peptide (line %d column %d of tabular file: %s)\n' % (peptide,i,col,filepath),exit_code=invalid_ec) | |
142 peptides.append(peptide) | |
143 return peptides | |
144 | |
145 def get_fasta_entries(fp): | |
0 | 146 name, seq = None, [] |
147 for line in fp: | |
1 | 148 line = line.rstrip() |
149 if line.startswith(">"): | |
150 if name: yield (name, ''.join(seq)) | |
151 name, seq = line, [] | |
152 else: | |
153 seq.append(line) | |
0 | 154 if name: yield (name, ''.join(seq)) |
155 | |
1 | 156 def read_fasta(filepath): |
157 peptides = [] | |
158 with open(filepath) as fp: | |
159 for id, peptide in get_fasta_entries(fp): | |
160 if not re.match(pep_pat,peptide): | |
161 warn_err('"%s" is not a peptide (id %s of fasta file: %s)\n' % (peptide,id,filepath),exit_code=invalid_ec) | |
162 peptides.append(peptide) | |
163 return peptides | |
164 | |
165 def read_mzid(fp): | |
166 peptides = [] | |
167 for event, elem in ET.iterparse(fp): | |
168 if event == 'end': | |
169 if re.search('PeptideSequence',elem.tag): | |
170 peptides.append(elem.text) | |
171 return peptides | |
0 | 172 |
1 | 173 def read_pepxml(fp): |
174 peptides = [] | |
175 for event, elem in ET.iterparse(fp): | |
176 if event == 'end': | |
177 if re.search('search_hit',elem.tag): | |
178 peptides.append(elem.get('peptide')) | |
179 return peptides | |
0 | 180 |
1 | 181 def best_match(peptide,matches): |
182 if not matches: | |
183 return None | |
184 elif len(matches) == 1: | |
185 return matches[0].copy() | |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
186 elif 'taxon_rank' in matches[0]: |
1 | 187 # find the most specific match (peptide is always the first column order field) |
188 for col in reversed(pept2lca_extra_column_order[1:]): | |
189 col_id = col+"_id" if options.extra else col | |
190 for match in matches: | |
191 if 'taxon_rank' in match and match['taxon_rank'] == col: | |
192 return match.copy() | |
193 if col_id in match and match[col_id]: | |
194 return match.copy() | |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
195 else: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
196 return sorted(matches, key=lambda x: len(x['peptide']))[-1].copy() |
1 | 197 return None |
198 | |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
199 def get_taxon_json(resp): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
200 found_keys = set() |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
201 for i,pdict in enumerate(resp): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
202 found_keys |= set(pdict.keys()) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
203 taxa_cols = [] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
204 for col in pept2lca_extra_column_order[-1:0:-1]: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
205 if col+'_id' in found_keys: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
206 taxa_cols.append(col) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
207 id_to_node = dict() |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
208 def get_node(id,name,rank,child,seq): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
209 if id not in id_to_node: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
210 data = {'count' : 0, 'self_count' : 0, 'valid_taxon' : 1, 'rank' : rank, 'sequences' : [] } |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
211 node = {'id' : id, 'name' : name, 'children' : [], 'kids': [],'data' : data } |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
212 id_to_node[id] = node |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
213 else: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
214 node = id_to_node[id] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
215 node['data']['count'] += 1 |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
216 if seq is not None and seq not in node['data']['sequences']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
217 node['data']['sequences'].append(seq) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
218 if child is None: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
219 node['data']['self_count'] += 1 |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
220 elif child['id'] not in node['kids']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
221 node['kids'].append(child['id']) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
222 node['children'].append(child) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
223 return node |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
224 root = get_node(1,'root','no rank',None,None) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
225 for i,pdict in enumerate(resp): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
226 sequence = pdict.get('peptide',pdict.get('tryptic_peptide',None)) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
227 seq = sequence |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
228 child = None |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
229 for col in taxa_cols: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
230 col_id = col+'_id' |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
231 if col_id in pdict and pdict.get(col_id): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
232 col_name = col if col in found_keys else col+'_name' |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
233 child = get_node(pdict.get(col_id,None),pdict.get(col_name,''),col,child,seq) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
234 seq = None |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
235 if child: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
236 get_node(1,'root','no rank',child,None) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
237 return root |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
238 |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
239 def get_ec_json(resp): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
240 ecMap = dict() |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
241 for pdict in resp: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
242 if 'ec' in pdict: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
243 for ec in pdict['ec']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
244 ec_number = ec['ec_number'] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
245 if ec_number not in ecMap: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
246 ecMap[ec_number] = [] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
247 ecMap[ec_number].append(pdict) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
248 def get_ids(ec): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
249 ids = [] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
250 i = len(ec) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
251 while i >= 0: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
252 ids.append(ec[:i]) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
253 i = ec.rfind('.',0,i - 1) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
254 return ids |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
255 id_to_node = dict() |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
256 def get_node(id,name,child,seq): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
257 if id not in id_to_node: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
258 data = {'count' : 0, 'self_count' : 0, 'sequences' : [] } |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
259 node = {'id' : id, 'name' : name, 'children' : [], 'kids': [],'data' : data } |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
260 id_to_node[id] = node |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
261 else: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
262 node = id_to_node[id] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
263 node['data']['count'] += 1 |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
264 if seq is not None and seq not in node['data']['sequences']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
265 node['data']['sequences'].append(seq) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
266 if child is None: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
267 node['data']['self_count'] += 1 |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
268 elif child['id'] not in node['kids']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
269 node['kids'].append(child['id']) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
270 node['children'].append(child) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
271 return node |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
272 root = get_node(0,'-.-.-.-',None,None) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
273 for i in range(1,7): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
274 child = get_node(str(i),'%s\n%s' %(str(i), ec_name_dict[str(i)] ),None,None) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
275 get_node(0,'-.-.-.-',child,None) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
276 for i,pdict in enumerate(resp): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
277 sequence = pdict.get('peptide',pdict.get('tryptic_peptide',None)) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
278 seq = sequence |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
279 if 'ec' in pdict: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
280 for ec in pdict['ec']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
281 child = None |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
282 protein_count = ec['protein_count'] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
283 ec_number = ec['ec_number'] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
284 for ec_id in get_ids(ec_number): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
285 ec_name = str(ec_id) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
286 ## if len(ec_id) == 3: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
287 ## ec_name = '%s\n%s\n%s' %(str(ec_id), ec_name_dict[str(ec_id[0])], ec_name_dict[str(ec_id)]) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
288 child = get_node(ec_id,ec_name,child,seq) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
289 seq = None |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
290 if child: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
291 get_node(0,'-.-.-.-',child,None) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
292 return root |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
293 |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
294 def get_taxon_dict(resp, column_order, extra=False, names=False): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
295 found_keys = set() |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
296 results = [] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
297 for i,pdict in enumerate(resp): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
298 results.append(pdict) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
299 found_keys |= set(pdict.keys()) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
300 # print >> sys.stderr, "%s\n%s" % (pdict.keys(),found_keys) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
301 column_names = [] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
302 column_keys = [] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
303 for col in column_order: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
304 if col in found_keys: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
305 column_names.append(col) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
306 column_keys.append(col) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
307 elif names: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
308 col_id = col+'_id' |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
309 col_name = col+'_name' |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
310 if extra: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
311 if col_id in found_keys: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
312 column_names.append(col_id) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
313 column_keys.append(col_id) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
314 if names: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
315 if col_name in found_keys: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
316 column_names.append(col) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
317 column_keys.append(col_name) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
318 else: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
319 if col+'_name' in found_keys: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
320 column_names.append(col) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
321 column_keys.append(col+'_name') |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
322 elif col+'_id' in found_keys: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
323 column_names.append(col) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
324 column_keys.append(col+'_id') |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
325 # print >> sys.stderr, "%s\n%s" % (column_names,column_keys) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
326 taxa = dict() ## peptide : [taxonomy] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
327 for i,pdict in enumerate(results): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
328 peptide = pdict['peptide'] if 'peptide' in pdict else None |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
329 if peptide and peptide not in taxa: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
330 vals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_keys] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
331 taxa[peptide] = vals |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
332 return (taxa,column_names) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
333 |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
334 def get_ec_dict(resp, extra=False): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
335 ec_cols = ['ec_numbers', 'ec_protein_counts'] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
336 if extra: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
337 ec_cols.append('ec_names') |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
338 ec_dict = dict() |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
339 for i,pdict in enumerate(resp): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
340 peptide = pdict['peptide'] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
341 ec_numbers = [] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
342 protein_counts = [] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
343 ec_names = [] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
344 if 'ec' in pdict: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
345 for ec in pdict['ec']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
346 ec_numbers.append(ec['ec_number']) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
347 protein_counts.append(str(ec['protein_count'])) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
348 if extra: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
349 ec_names.append(ec['name'] if 'name' in ec else '') |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
350 vals = [','.join(ec_numbers),','.join(protein_counts)] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
351 if extra: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
352 vals.append(','.join(ec_names)) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
353 ec_dict[peptide] = vals |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
354 return (ec_dict, ec_cols) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
355 |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
356 def get_go_dict(resp, extra=False): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
357 go_cols = ['go_terms', 'go_protein_counts'] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
358 if extra: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
359 go_cols.append('go_names') |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
360 go_dict = dict() |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
361 for i,pdict in enumerate(resp): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
362 peptide = pdict['peptide'] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
363 go_terms = [] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
364 protein_counts = [] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
365 go_names = [] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
366 if 'go' in pdict: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
367 for go in pdict['go']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
368 if 'go_term' in go: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
369 go_terms.append(go['go_term']) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
370 protein_counts.append(str(go['protein_count'])) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
371 if extra: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
372 go_names.append(go['name'] if 'name' in go else '') |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
373 else: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
374 for go_type in go_types: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
375 if go_type in go: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
376 for _go in go[go_type]: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
377 go_terms.append(_go['go_term']) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
378 protein_counts.append(str(_go['protein_count'])) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
379 if extra: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
380 go_names.append(_go['name'] if 'name' in _go else '') |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
381 vals = [','.join(go_terms),','.join(protein_counts)] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
382 if extra: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
383 vals.append(','.join(go_names)) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
384 go_dict[peptide] = vals |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
385 return (go_dict, go_cols) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
386 |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
387 def write_ec_table(outfile, resp, column_order): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
388 with open(outfile,'w') as fh: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
389 for i,pdict in enumerate(resp): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
390 if 'ec' in pdict: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
391 tvals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_order[0]] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
392 for ec in pdict['ec']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
393 vals = [str(ec[x]) if x in ec and ec[x] else '' for x in column_order[-1]] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
394 fh.write('%s\n' % '\t'.join(tvals + vals)) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
395 |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
396 def write_go_table(outfile, resp, column_order): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
397 with open(outfile,'w') as fh: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
398 for i,pdict in enumerate(resp): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
399 if 'go' in pdict: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
400 tvals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_order[0]] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
401 for go in pdict['go']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
402 if 'go_term' in go: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
403 vals = [str(go[x]) if x in go and go[x] else '' for x in column_order[-1]] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
404 fh.write('%s\n' % '\t'.join(tvals + vals)) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
405 else: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
406 for go_type in go_types: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
407 if go_type in go: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
408 for _go in go[go_type]: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
409 vals = [str(_go[x]) if x in _go and _go[x] else '' for x in column_order[-1]] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
410 vals.append(go_type) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
411 fh.write('%s\n' % '\t'.join(tvals + vals)) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
412 |
0 | 413 #Parse Command Line |
414 parser = optparse.OptionParser() | |
1 | 415 # unipept API choice |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
416 parser.add_option( '-a', '--api', dest='unipept', default='pept2lca', choices=['pept2lca','pept2taxa','pept2prot', 'pept2ec', 'pept2go', 'pept2funct', 'peptinfo'], |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
417 help='The unipept application: pept2lca, pept2taxa, pept2prot, pept2ec, pept2go, pept2funct, or peptinfo' ) |
1 | 418 # input files |
0 | 419 parser.add_option( '-t', '--tabular', dest='tabular', default=None, help='A tabular file that contains a peptide column' ) |
420 parser.add_option( '-c', '--column', dest='column', type='int', default=0, help='The column (zero-based) in the tabular file that contains peptide sequences' ) | |
421 parser.add_option( '-f', '--fasta', dest='fasta', default=None, help='A fasta file containing peptide sequences' ) | |
422 parser.add_option( '-m', '--mzid', dest='mzid', default=None, help='A mxIdentML file containing peptide sequences' ) | |
423 parser.add_option( '-p', '--pepxml', dest='pepxml', default=None, help='A pepxml file containing peptide sequences' ) | |
424 # Unipept Flags | |
425 parser.add_option( '-e', '--equate_il', dest='equate_il', action='store_true', default=False, help='isoleucine (I) and leucine (L) are equated when matching tryptic peptides to UniProt records' ) | |
426 parser.add_option( '-x', '--extra', dest='extra', action='store_true', default=False, help='return the complete lineage of the taxonomic lowest common ancestor' ) | |
427 parser.add_option( '-n', '--names', dest='names', action='store_true', default=False, help='return the names of all ranks in the lineage of the taxonomic lowest common ancestor' ) | |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
428 parser.add_option( '-D', '--domains', dest='domains', action='store_true', default=False, help='group response by GO namaspace: biological process, molecular function, cellular component' ) |
3 | 429 parser.add_option( '-M', '--max_request', dest='max_request', type='int', default=200, help='The maximum number of entries per unipept request' ) |
430 | |
1 | 431 # output fields |
432 parser.add_option( '-A', '--allfields', dest='allfields', action='store_true', default=False, help='inlcude fields: taxon_rank,taxon_id,taxon_name csv and tsv outputs' ) | |
0 | 433 # Warn vs Error Flag |
434 parser.add_option( '-S', '--strict', dest='strict', action='store_true', default=False, help='Print exit on invalid peptide' ) | |
1 | 435 # output files |
0 | 436 parser.add_option( '-J', '--json', dest='json', default=None, help='Output file path for json formatted results') |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
437 parser.add_option( '-j', '--ec_json', dest='ec_json', default=None, help='Output file path for json formatted results') |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
438 parser.add_option( '-E', '--ec_tsv', dest='ec_tsv', default=None, help='Output file path for EC TAB-separated-values (.tsv) formatted results') |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
439 parser.add_option( '-G', '--go_tsv', dest='go_tsv', default=None, help='Output file path for GO TAB-separated-values (.tsv) formatted results') |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
440 parser.add_option( '-L', '--lineage_tsv', dest='lineage_tsv', default=None, help='Output file path for Lineage TAB-separated-values (.tsv) formatted results') |
0 | 441 parser.add_option( '-T', '--tsv', dest='tsv', default=None, help='Output file path for TAB-separated-values (.tsv) formatted results') |
442 parser.add_option( '-C', '--csv', dest='csv', default=None, help='Output file path for Comma-separated-values (.csv) formatted results') | |
1 | 443 parser.add_option( '-U', '--unmatched', dest='unmatched', default=None, help='Output file path for peptide with no matches' ) |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
444 parser.add_option( '-u', '--url', dest='url', default='http://api.unipept.ugent.be/api/v1/', help='unipept url http://api.unipept.ugent.be/api/v1/' ) |
1 | 445 # debug |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
446 parser.add_option( '-g', '--get', dest='get', action='store_true', default=False, help='Use GET instead of POST' ) |
1 | 447 parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turning on debugging' ) |
448 parser.add_option( '-v', '--version', dest='version', action='store_true', default=False, help='pring version and exit' ) | |
0 | 449 (options, args) = parser.parse_args() |
1 | 450 if options.version: |
451 print >> sys.stdout,"%s" % version | |
452 sys.exit(0) | |
0 | 453 invalid_ec = 2 if options.strict else None |
454 peptides = [] | |
455 ## Get peptide sequences | |
456 if options.mzid: | |
457 peptides += read_mzid(options.mzid) | |
458 if options.pepxml: | |
459 peptides += read_pepxml(options.pepxml) | |
460 if options.tabular: | |
1 | 461 peptides += read_tabular(options.tabular,options.column) |
0 | 462 if options.fasta: |
1 | 463 peptides += read_fasta(options.fasta) |
0 | 464 if args and len(args) > 0: |
465 for i,peptide in enumerate(args): | |
466 if not re.match(pep_pat,peptide): | |
467 warn_err('"%s" is not a peptide (arg %d)\n' % (peptide,i),exit_code=invalid_ec) | |
468 peptides.append(peptide) | |
469 if len(peptides) < 1: | |
470 warn_err("No peptides input!",exit_code=1) | |
1 | 471 column_order = pept2lca_column_order |
472 if options.unipept == 'pept2prot': | |
473 column_order = pept2prot_extra_column_order if options.extra else pept2prot_column_order | |
474 else: | |
475 if options.extra or options.names: | |
476 column_order = pept2lca_all_column_order if options.allfields else pept2lca_extra_column_order | |
477 else: | |
478 column_order = pept2lca_column_order | |
479 ## map to tryptic peptides | |
480 pepToParts = {p: re.split("\n", re.sub(r'(?<=[RK])(?=[^P])','\n', p)) for p in peptides} | |
481 partToPeps = {} | |
482 for peptide, parts in pepToParts.iteritems(): | |
483 if options.debug: print >> sys.stdout, "peptide: %s\ttryptic: %s\n" % (peptide, parts) | |
484 for part in parts: | |
485 if len(part) > 50: | |
486 warn_err("peptide: %s tryptic fragment len %d > 50 for %s\n" % (peptide,len(part),part),exit_code=None) | |
487 if 5 <= len(part) <= 50: | |
488 partToPeps.setdefault(part,[]).append(peptide) | |
489 trypticPeptides = partToPeps.keys() | |
0 | 490 ## unipept |
3 | 491 unipept_resp = [] |
492 idx = range(0,len(trypticPeptides),options.max_request) | |
493 idx.append(len(trypticPeptides)) | |
494 for i in range(len(idx)-1): | |
495 post_data = [] | |
496 if options.equate_il: | |
497 post_data.append(("equate_il","true")) | |
498 if options.names or options.json: | |
499 post_data.append(("extra","true")) | |
500 post_data.append(("names","true")) | |
501 elif options.extra or options.json: | |
502 post_data.append(("extra","true")) | |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
503 if options.domains: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
504 post_data.append(("domains","true")) |
3 | 505 post_data += [('input[]', x) for x in trypticPeptides[idx[i]:idx[i+1]]] |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
506 if options.debug: print >> sys.stdout, "post_data: %s\n" % (str(post_data)) |
3 | 507 headers = {'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'application/json'} |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
508 ## headers = {'Accept': 'application/json'} |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
509 url = '%s/%s' % (options.url.rstrip('/'),options.unipept) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
510 if options.get: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
511 params = '&'.join(['%s=%s' % (i[0],i[1]) for i in post_data]) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
512 url = '%s.json?%s' % (url,params) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
513 req = urllib2.Request( url ) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
514 else: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
515 url = '%s.json' % (url) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
516 req = urllib2.Request( url, headers = headers, data = urllib.urlencode(post_data) ) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
517 if options.debug: print >> sys.stdout, "url: %s\n" % (str(url)) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
518 try: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
519 resp = urllib2.urlopen( req ) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
520 if options.debug: print >> sys.stdout,"%s %s\n" % (url,str(resp.getcode())) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
521 if resp.getcode() == 200: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
522 unipept_resp += json.loads( urllib2.urlopen( req ).read() ) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
523 except Exception, e: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
524 warn_err('HTTP Error %s\n' % (str(e)),exit_code=None) |
1 | 525 unmatched_peptides = [] |
526 peptideMatches = [] | |
527 if options.debug: print >> sys.stdout,"unipept response: %s\n" % str(unipept_resp) | |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
528 if options.unipept in ['pept2prot', 'pept2taxa']: |
1 | 529 dupkey = 'uniprot_id' if options.unipept == 'pept2prot' else 'taxon_id' ## should only keep one of these per input peptide |
530 ## multiple entries per trypticPeptide for pep2prot or pep2taxa | |
531 mapping = {} | |
532 for match in unipept_resp: | |
533 mapping.setdefault(match['peptide'],[]).append(match) | |
534 for peptide in peptides: | |
535 # Get the intersection of matches to the tryptic parts | |
536 keyToMatch = None | |
537 for part in pepToParts[peptide]: | |
538 if part in mapping: | |
539 temp = {match[dupkey] : match for match in mapping[part]} | |
540 if keyToMatch: | |
541 dkeys = set(keyToMatch.keys()) - set(temp.keys()) | |
542 for k in dkeys: | |
543 del keyToMatch[k] | |
544 else: | |
545 keyToMatch = temp | |
546 ## keyToMatch = keyToMatch.fromkeys([x for x in keyToMatch if x in temp]) if keyToMatch else temp | |
547 if not keyToMatch: | |
548 unmatched_peptides.append(peptide) | |
549 else: | |
550 for key,match in keyToMatch.iteritems(): | |
551 match['tryptic_peptide'] = match['peptide'] | |
552 match['peptide'] = peptide | |
553 peptideMatches.append(match) | |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
554 elif options.unipept in ['pept2lca', 'peptinfo']: |
1 | 555 ## should be one response per trypticPeptide for pep2lca |
556 respMap = {v['peptide']:v for v in unipept_resp} | |
557 ## map resp back to peptides | |
558 for peptide in peptides: | |
559 matches = list() | |
560 for part in pepToParts[peptide]: | |
561 if part in respMap: | |
562 matches.append(respMap[part]) | |
563 match = best_match(peptide,matches) | |
564 if not match: | |
565 unmatched_peptides.append(peptide) | |
566 longest_tryptic_peptide = sorted(pepToParts[peptide], key=lambda x: len(x))[-1] | |
567 match = {'peptide' : longest_tryptic_peptide} | |
568 match['tryptic_peptide'] = match['peptide'] | |
569 match['peptide'] = peptide | |
570 peptideMatches.append(match) | |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
571 else: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
572 respMap = {v['peptide']:v for v in unipept_resp} |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
573 ## map resp back to peptides |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
574 for peptide in peptides: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
575 matches = list() |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
576 for part in pepToParts[peptide]: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
577 if part in respMap and 'total_protein_count' in respMap[part]: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
578 matches.append(respMap[part]) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
579 match = best_match(peptide,matches) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
580 if not match: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
581 unmatched_peptides.append(peptide) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
582 longest_tryptic_peptide = sorted(pepToParts[peptide], key=lambda x: len(x))[-1] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
583 match = {'peptide' : longest_tryptic_peptide} |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
584 match['tryptic_peptide'] = match['peptide'] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
585 match['peptide'] = peptide |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
586 peptideMatches.append(match) |
1 | 587 resp = peptideMatches |
588 if options.debug: print >> sys.stdout,"\nmapped response: %s\n" % str(resp) | |
0 | 589 ## output results |
1 | 590 if not (options.unmatched or options.json or options.tsv or options.csv): |
0 | 591 print >> sys.stdout, str(resp) |
1 | 592 if options.unmatched: |
593 with open(options.unmatched,'w') as outputFile: | |
0 | 594 for peptide in peptides: |
1 | 595 if peptide in unmatched_peptides: |
0 | 596 outputFile.write("%s\n" % peptide) |
597 if options.json: | |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
598 if options.unipept in ['pept2lca', 'pept2taxa', 'peptinfo']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
599 root = get_taxon_json(resp) |
3 | 600 with open(options.json,'w') as outputFile: |
601 outputFile.write(json.dumps(root)) | |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
602 elif options.unipept in ['pept2prot', 'pept2ec', 'pept2go', 'pept2funct']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
603 with open(options.json,'w') as outputFile: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
604 outputFile.write(str(resp)) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
605 if options.ec_json: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
606 if options.unipept in ['pept2ec', 'pept2funct', 'peptinfo']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
607 root = get_ec_json(resp) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
608 with open(options.ec_json,'w') as outputFile: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
609 outputFile.write(json.dumps(root)) |
0 | 610 if options.tsv or options.csv: |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
611 rows = [] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
612 column_names = None |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
613 if options.unipept in ['pept2ec', 'pept2go', 'pept2funct', 'peptinfo']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
614 taxa = None |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
615 ec_dict = None |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
616 go_dict = None |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
617 if options.unipept in ['peptinfo']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
618 (taxa,taxon_cols) = get_taxon_dict(resp, column_order, extra=options.extra, names=options.names) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
619 if options.unipept in ['pept2ec', 'pept2funct', 'peptinfo']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
620 (ec_dict,ec_cols) = get_ec_dict(resp, extra=options.extra) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
621 if options.unipept in ['pept2go', 'pept2funct', 'peptinfo']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
622 (go_dict,go_cols) = get_go_dict(resp, extra=options.extra) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
623 for i,pdict in enumerate(resp): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
624 peptide = pdict['peptide'] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
625 total_protein_count = str(pdict['total_protein_count']) if 'total_protein_count' in pdict else '0' |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
626 column_names = ['peptide', 'total_protein_count'] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
627 vals = [peptide,total_protein_count] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
628 if ec_dict: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
629 vals += ec_dict[peptide] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
630 column_names += ec_cols |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
631 if go_dict: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
632 vals += go_dict[peptide] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
633 column_names += go_cols |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
634 if taxa: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
635 vals += taxa[peptide][1:] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
636 column_names += taxon_cols[1:] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
637 rows.append(vals) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
638 elif options.unipept in ['pept2lca', 'pept2taxa', 'pept2prot']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
639 (taxa,taxon_cols) = get_taxon_dict(resp, column_order, extra=options.extra, names=options.names) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
640 column_names = taxon_cols |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
641 rows = taxa.values() |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
642 for peptide,vals in taxa.iteritems(): |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
643 rows.append(vals) |
0 | 644 if options.tsv: |
645 with open(options.tsv,'w') as outputFile: | |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
646 if column_names: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
647 outputFile.write("#%s\n"% '\t'.join(column_names)) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
648 for vals in rows: |
0 | 649 outputFile.write("%s\n"% '\t'.join(vals)) |
650 if options.csv: | |
651 with open(options.csv,'w') as outputFile: | |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
652 if column_names: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
653 outputFile.write("%s\n"% ','.join(column_names)) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
654 for vals in rows: |
0 | 655 outputFile.write("%s\n"% ','.join(['"%s"' % (v if v else '') for v in vals])) |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
656 if options.ec_tsv and options.unipept in ['pept2ec', 'pept2funct', 'peptinfo']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
657 column_order = pept2ec_extra_column_order if options.extra else pept2ec_column_order |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
658 write_ec_table(options.ec_tsv, resp, column_order) |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
659 if options.go_tsv and options.unipept in ['pept2go', 'pept2funct', 'peptinfo']: |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
660 column_order = pept2go_extra_column_order if options.extra else pept2go_column_order |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
661 write_go_table(options.go_tsv, resp, column_order) |
0 | 662 |
663 if __name__ == "__main__" : __main__() |