Mercurial > repos > galaxyp > unipept
annotate unipept.py @ 9:21a560af5913 draft default tip
planemo upload for repository https://unipept.ugent.be/apidocs commit 19735e85caae264d98562f6fdb3b213841087fc7
author | galaxyp |
---|---|
date | Tue, 12 Mar 2024 11:44:08 +0000 |
parents | 7863f1abcdda |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 """ | |
3 # | |
4 # Author: | |
5 # | |
6 # James E Johnson | |
7 # | |
8 #------------------------------------------------------------------------------ | |
9 """ | |
10 import json | |
11 import optparse | |
7
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
12 import re |
0 | 13 import sys |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
14 import urllib.error |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
15 import urllib.parse |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
16 import urllib.request |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
17 |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
18 |
0 | 19 try: |
20 import xml.etree.cElementTree as ET | |
21 except ImportError: | |
22 import xml.etree.ElementTree as ET | |
23 | |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
24 |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
25 def warn_err(msg, exit_code=1): |
0 | 26 sys.stderr.write(msg) |
27 if exit_code: | |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
28 sys.exit(exit_code) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
29 |
0 | 30 |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
31 go_types = ['biological process', 'molecular function', 'cellular component'] |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
32 ipr_types = ['Domain', 'Family', 'Homologous_superfamily', 'Repeat', 'Conserved_site', 'Active_site', 'Binding_site', 'PTM'] |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
33 ec_name_dict = { |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
34 '1': 'Oxidoreductase', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
35 '1.1': 'act on the CH-OH group of donors', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
36 '1.2': 'act on the aldehyde or oxo group of donors', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
37 '1.3': 'act on the CH-CH group of donors', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
38 '1.4': 'act on the CH-NH2 group of donors', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
39 '1.5': 'act on CH-NH group of donors', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
40 '1.6': 'act on NADH or NADPH', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
41 '1.7': 'act on other nitrogenous compounds as donors', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
42 '1.8': 'act on a sulfur group of donors', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
43 '1.9': 'act on a heme group of donors', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
44 '1.10': 'act on diphenols and related substances as donors', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
45 '1.11': 'act on peroxide as an acceptor -- peroxidases', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
46 '1.12': 'act on hydrogen as a donor', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
47 '1.13': 'act on single donors with incorporation of molecular oxygen', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
48 '1.14': 'act on paired donors with incorporation of molecular oxygen', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
49 '1.15': 'act on superoxide radicals as acceptors', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
50 '1.16': 'oxidize metal ions', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
51 '1.17': 'act on CH or CH2 groups', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
52 '1.18': 'act on iron-sulfur proteins as donors', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
53 '1.19': 'act on reduced flavodoxin as donor', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
54 '1.20': 'act on phosphorus or arsenic as donors', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
55 '1.21': 'act on X-H and Y-H to form an X-Y bond', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
56 '1.97': 'other oxidoreductases', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
57 '2': 'Transferase', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
58 '2.1': 'transfer one-carbon groups, Methylase', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
59 '2.2': 'transfer aldehyde or ketone groups', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
60 '2.3': 'acyltransferases', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
61 '2.4': 'glycosyltransferases', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
62 '2.5': 'transfer alkyl or aryl groups, other than methyl groups', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
63 '2.6': 'transfer nitrogenous groups', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
64 '2.7': 'transfer phosphorus-containing groups', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
65 '2.8': 'transfer sulfur-containing groups', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
66 '2.9': 'transfer selenium-containing groups', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
67 '3': 'Hydrolase', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
68 '3.1': 'act on ester bonds', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
69 '3.2': 'act on sugars - glycosylases', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
70 '3.3': 'act on ether bonds', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
71 '3.4': 'act on peptide bonds - Peptidase', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
72 '3.5': 'act on carbon-nitrogen bonds, other than peptide bonds', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
73 '3.6': 'act on acid anhydrides', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
74 '3.7': 'act on carbon-carbon bonds', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
75 '3.8': 'act on halide bonds', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
76 '3.9': 'act on phosphorus-nitrogen bonds', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
77 '3.10': 'act on sulfur-nitrogen bonds', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
78 '3.11': 'act on carbon-phosphorus bonds', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
79 '3.12': 'act on sulfur-sulfur bonds', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
80 '3.13': 'act on carbon-sulfur bonds', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
81 '4': 'Lyase', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
82 '4.1': 'carbon-carbon lyases', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
83 '4.2': 'carbon-oxygen lyases', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
84 '4.3': 'carbon-nitrogen lyases', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
85 '4.4': 'carbon-sulfur lyases', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
86 '4.5': 'carbon-halide lyases', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
87 '4.6': 'phosphorus-oxygen lyases', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
88 '5': 'Isomerase', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
89 '5.1': 'racemases and epimerases', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
90 '5.2': 'cis-trans-isomerases', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
91 '5.3': 'intramolecular oxidoreductases', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
92 '5.4': 'intramolecular transferases -- mutases', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
93 '5.5': 'intramolecular lyases', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
94 '5.99': 'other isomerases', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
95 '6': 'Ligase', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
96 '6.1': 'form carbon-oxygen bonds', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
97 '6.2': 'form carbon-sulfur bonds', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
98 '6.3': 'form carbon-nitrogen bonds', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
99 '6.4': 'form carbon-carbon bonds', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
100 '6.5': 'form phosphoric ester bonds', |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
101 '6.6': 'form nitrogen-metal bonds', |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
102 } |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
103 pept2lca_column_order = ['peptide', 'taxon_rank', 'taxon_id', 'taxon_name'] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
104 pept2lca_extra_column_order = ['peptide', 'superkingdom', 'kingdom', 'subkingdom', 'superphylum', 'phylum', 'subphylum', 'superclass', 'class', 'subclass', 'infraclass', 'superorder', 'order', 'suborder', 'infraorder', 'parvorder', 'superfamily', 'family', 'subfamily', 'tribe', 'subtribe', 'genus', 'subgenus', 'species_group', 'species_subgroup', 'species', 'subspecies', 'varietas', 'forma'] |
7
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
105 pept2lca_all_column_order = pept2lca_column_order + pept2lca_extra_column_order[2:] |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
106 pept2prot_column_order = ['peptide', 'uniprot_id', 'taxon_id'] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
107 pept2prot_extra_column_order = pept2prot_column_order + ['taxon_name', 'ec_references', 'go_references', 'refseq_ids', 'refseq_protein_ids', 'insdc_ids', 'insdc_protein_ids'] |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
108 pept2ec_column_order = [['peptide', 'total_protein_count'], ['ec_number', 'protein_count']] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
109 pept2ec_extra_column_order = [['peptide', 'total_protein_count'], ['ec_number', 'protein_count', 'name']] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
110 pept2go_column_order = [['peptide', 'total_protein_count'], ['go_term', 'protein_count']] |
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
111 pept2go_extra_column_order = [['peptide', 'total_protein_count'], ['go_term', 'protein_count', 'name']] |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
112 pept2interpro_column_order = [['peptide', 'total_protein_count'], ['code', 'protein_count']] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
113 pept2interpro_extra_column_order = [['peptide', 'total_protein_count'], ['code', 'protein_count', 'type', 'name']] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
114 pept2funct_column_order = ['peptide', 'total_protein_count', 'ec', 'go', 'ipr'] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
115 |
1 | 116 |
117 def __main__(): | |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
118 version = '4.3' |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
119 pep_pat = '^([ABCDEFGHIKLMNPQRSTVWXYZ]+)$' |
1 | 120 |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
121 def read_tabular(filepath, col): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
122 peptides = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
123 with open(filepath) as fp: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
124 for i, line in enumerate(fp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
125 if line.strip() == '' or line.startswith('#'): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
126 continue |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
127 fields = line.rstrip('\n').split('\t') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
128 peptide = fields[col] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
129 if not re.match(pep_pat, peptide): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
130 warn_err('"%s" is not a peptide (line %d column %d of tabular file: %s)\n' % (peptide, i, col, filepath), exit_code=invalid_ec) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
131 peptides.append(peptide) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
132 return peptides |
1 | 133 |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
134 def get_fasta_entries(fp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
135 name, seq = None, [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
136 for line in fp: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
137 line = line.rstrip() |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
138 if line.startswith(">"): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
139 if name: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
140 yield (name, ''.join(seq)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
141 name, seq = line, [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
142 else: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
143 seq.append(line) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
144 if name: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
145 yield (name, ''.join(seq)) |
0 | 146 |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
147 def read_fasta(filepath): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
148 peptides = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
149 with open(filepath) as fp: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
150 for id, peptide in get_fasta_entries(fp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
151 if not re.match(pep_pat, peptide): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
152 warn_err('"%s" is not a peptide (id %s of fasta file: %s)\n' % (peptide, id, filepath), exit_code=invalid_ec) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
153 peptides.append(peptide) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
154 return peptides |
1 | 155 |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
156 def read_mzid(fp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
157 peptides = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
158 for event, elem in ET.iterparse(fp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
159 if event == 'end': |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
160 if re.search('PeptideSequence', elem.tag): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
161 peptides.append(elem.text) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
162 return peptides |
0 | 163 |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
164 def read_pepxml(fp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
165 peptides = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
166 for event, elem in ET.iterparse(fp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
167 if event == 'end': |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
168 if re.search('search_hit', elem.tag): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
169 peptides.append(elem.get('peptide')) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
170 return peptides |
0 | 171 |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
172 def best_match(peptide, matches): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
173 if not matches: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
174 return None |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
175 elif len(matches) == 1: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
176 return matches[0].copy() |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
177 elif 'taxon_rank' in matches[0]: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
178 # find the most specific match (peptide is always the first column order field) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
179 for col in reversed(pept2lca_extra_column_order[1:]): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
180 col_id = col + "_id" if options.extra else col |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
181 for match in matches: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
182 if 'taxon_rank' in match and match['taxon_rank'] == col: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
183 return match.copy() |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
184 if col_id in match and match[col_id]: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
185 return match.copy() |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
186 else: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
187 return sorted(matches, key=lambda x: len(x['peptide']))[-1].copy() |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
188 return None |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
189 |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
190 def get_taxon_json(resp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
191 found_keys = set() |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
192 for i, pdict in enumerate(resp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
193 found_keys |= set(pdict.keys()) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
194 taxa_cols = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
195 for col in pept2lca_extra_column_order[-1:0:-1]: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
196 if col + '_id' in found_keys: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
197 taxa_cols.append(col) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
198 id_to_node = dict() |
1 | 199 |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
200 def get_node(id, name, rank, child, seq): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
201 if id not in id_to_node: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
202 data = {'count': 0, 'self_count': 0, 'valid_taxon': 1, 'rank': rank, 'sequences': []} |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
203 node = {'id': id, 'name': name, 'children': [], 'kids': [], 'data': data} |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
204 id_to_node[id] = node |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
205 else: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
206 node = id_to_node[id] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
207 node['data']['count'] += 1 |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
208 if seq is not None and seq not in node['data']['sequences']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
209 node['data']['sequences'].append(seq) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
210 if child is None: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
211 node['data']['self_count'] += 1 |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
212 elif child['id'] not in node['kids']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
213 node['kids'].append(child['id']) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
214 node['children'].append(child) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
215 return node |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
216 root = get_node(1, 'root', 'no rank', None, None) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
217 for i, pdict in enumerate(resp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
218 sequence = pdict.get('peptide', pdict.get('tryptic_peptide', None)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
219 seq = sequence |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
220 child = None |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
221 for col in taxa_cols: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
222 col_id = col + '_id' |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
223 if col_id in pdict and pdict.get(col_id): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
224 col_name = col if col in found_keys else col + '_name' |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
225 child = get_node(pdict.get(col_id, None), pdict.get(col_name, ''), col, child, seq) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
226 seq = None |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
227 if child is not None: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
228 get_node(1, 'root', 'no rank', child, None) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
229 return root |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
230 |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
231 def get_ec_json(resp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
232 ecMap = dict() |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
233 for pdict in resp: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
234 if 'ec' in pdict: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
235 for ec in pdict['ec']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
236 ec_number = ec['ec_number'] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
237 if ec_number not in ecMap: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
238 ecMap[ec_number] = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
239 ecMap[ec_number].append(pdict) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
240 |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
241 def get_ids(ec): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
242 ids = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
243 i = len(ec) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
244 while i >= 0: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
245 ids.append(ec[:i]) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
246 i = ec.rfind('.', 0, i - 1) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
247 return ids |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
248 id_to_node = dict() |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
249 |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
250 def get_node(id, name, child, seq): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
251 if id not in id_to_node: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
252 data = {'count': 0, 'self_count': 0, 'sequences': []} |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
253 node = {'id': id, 'name': name, 'children': [], 'kids': [], 'data': data} |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
254 id_to_node[id] = node |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
255 else: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
256 node = id_to_node[id] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
257 node['data']['count'] += 1 |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
258 if seq is not None and seq not in node['data']['sequences']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
259 node['data']['sequences'].append(seq) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
260 if child is None: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
261 node['data']['self_count'] += 1 |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
262 elif child['id'] not in node['kids']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
263 node['kids'].append(child['id']) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
264 node['children'].append(child) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
265 return node |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
266 root = get_node(0, '-.-.-.-', None, None) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
267 for i in range(1, 7): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
268 child = get_node(str(i), '%s\n%s' % (str(i), ec_name_dict[str(i)]), None, None) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
269 get_node(0, '-.-.-.-', child, None) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
270 for i, pdict in enumerate(resp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
271 sequence = pdict.get('peptide', pdict.get('tryptic_peptide', None)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
272 seq = sequence |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
273 if 'ec' in pdict: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
274 for ec in pdict['ec']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
275 child = None |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
276 ec_number = ec['ec_number'] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
277 for ec_id in get_ids(ec_number): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
278 ec_name = str(ec_id) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
279 child = get_node(ec_id, ec_name, child, seq) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
280 seq = None |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
281 if child: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
282 get_node(0, '-.-.-.-', child, None) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
283 return root |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
284 |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
285 def get_taxon_dict(resp, column_order, extra=False, names=False): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
286 found_keys = set() |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
287 results = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
288 for i, pdict in enumerate(resp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
289 results.append(pdict) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
290 found_keys |= set(pdict.keys()) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
291 # print >> sys.stderr, "%s\n%s" % (pdict.keys(), found_keys) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
292 column_names = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
293 column_keys = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
294 for col in column_order: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
295 if col in found_keys: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
296 column_names.append(col) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
297 column_keys.append(col) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
298 elif names: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
299 col_id = col + '_id' |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
300 col_name = col + '_name' |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
301 if extra: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
302 if col_id in found_keys: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
303 column_names.append(col_id) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
304 column_keys.append(col_id) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
305 if names: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
306 if col_name in found_keys: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
307 column_names.append(col) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
308 column_keys.append(col_name) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
309 else: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
310 if col + '_name' in found_keys: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
311 column_names.append(col) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
312 column_keys.append(col + '_name') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
313 elif col + '_id' in found_keys: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
314 column_names.append(col) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
315 column_keys.append(col + '_id') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
316 # print >> sys.stderr, "%s\n%s" % (column_names, column_keys) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
317 taxa = dict() # peptide: [taxonomy] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
318 for i, pdict in enumerate(results): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
319 peptide = pdict['peptide'] if 'peptide' in pdict else None |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
320 if peptide and peptide not in taxa: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
321 vals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_keys] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
322 taxa[peptide] = vals |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
323 return (taxa, column_names) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
324 |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
325 def get_ec_dict(resp, extra=False): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
326 ec_cols = ['ec_numbers', 'ec_protein_counts'] |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
327 if extra: |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
328 ec_cols.append('ec_names') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
329 ec_dict = dict() |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
330 for i, pdict in enumerate(resp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
331 peptide = pdict['peptide'] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
332 ec_numbers = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
333 protein_counts = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
334 ec_names = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
335 if 'ec' in pdict: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
336 for ec in pdict['ec']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
337 ec_numbers.append(ec['ec_number']) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
338 protein_counts.append(str(ec['protein_count'])) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
339 if extra: |
6
9aaa46d45472
"planemo upload for repository http://unipept.ugent.be/apidocs commit 228074586987e33782b8c2faf61257ac7506e51e"
galaxyp
parents:
5
diff
changeset
|
340 ec_names.append(ec['name'] if 'name' in ec and ec['name'] else '') |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
341 vals = [','.join(ec_numbers), ','.join(protein_counts)] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
342 if extra: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
343 vals.append(','.join(ec_names)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
344 ec_dict[peptide] = vals |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
345 return (ec_dict, ec_cols) |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
346 |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
347 def get_go_dict(resp, extra=False): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
348 go_cols = ['go_terms', 'go_protein_counts'] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
349 if extra: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
350 go_cols.append('go_names') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
351 go_dict = dict() |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
352 for i, pdict in enumerate(resp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
353 peptide = pdict['peptide'] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
354 go_terms = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
355 protein_counts = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
356 go_names = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
357 if 'go' in pdict: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
358 for go in pdict['go']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
359 if 'go_term' in go: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
360 go_terms.append(go['go_term']) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
361 protein_counts.append(str(go['protein_count'])) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
362 if extra: |
6
9aaa46d45472
"planemo upload for repository http://unipept.ugent.be/apidocs commit 228074586987e33782b8c2faf61257ac7506e51e"
galaxyp
parents:
5
diff
changeset
|
363 go_names.append(go['name'] if 'name' in go and go['name'] else '') |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
364 else: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
365 for go_type in go_types: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
366 if go_type in go: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
367 for _go in go[go_type]: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
368 go_terms.append(_go['go_term']) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
369 protein_counts.append(str(_go['protein_count'])) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
370 if extra: |
6
9aaa46d45472
"planemo upload for repository http://unipept.ugent.be/apidocs commit 228074586987e33782b8c2faf61257ac7506e51e"
galaxyp
parents:
5
diff
changeset
|
371 go_names.append(_go['name'] if 'name' in _go and _go['name'] else '') |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
372 vals = [','.join(go_terms), ','.join(protein_counts)] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
373 if extra: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
374 vals.append(','.join(go_names)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
375 go_dict[peptide] = vals |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
376 return (go_dict, go_cols) |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
377 |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
378 def get_ipr_dict(resp, extra=False): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
379 ipr_cols = ['ipr_codes', 'ipr_protein_counts'] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
380 if extra: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
381 ipr_cols.append('ipr_types') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
382 ipr_cols.append('ipr_names') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
383 ipr_dict = dict() |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
384 for i, pdict in enumerate(resp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
385 peptide = pdict['peptide'] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
386 ipr_codes = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
387 protein_counts = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
388 ipr_names = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
389 ipr_types = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
390 if 'ipr' in pdict: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
391 for ipr in pdict['ipr']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
392 if 'code' in ipr: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
393 ipr_codes.append(ipr['code']) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
394 protein_counts.append(str(ipr['protein_count'])) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
395 if extra: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
396 ipr_types.append(ipr['type'] if 'type' in ipr else '') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
397 ipr_names.append(ipr['name'] if 'name' in ipr else '') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
398 else: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
399 for ipr_type in ipr_types: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
400 if ipr_type in ipr: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
401 for _ipr in ipr[ipr_type]: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
402 ipr_codes.append(_ipr['code']) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
403 protein_counts.append(str(_ipr['protein_count'])) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
404 if extra: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
405 ipr_types.append(ipr_type) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
406 ipr_names.append(_ipr['name'] if 'name' in _ipr else '') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
407 vals = [','.join(ipr_codes), ','.join(protein_counts)] |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
408 if extra: |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
409 vals.append(','.join(ipr_types)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
410 vals.append(','.join(ipr_names)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
411 ipr_dict[peptide] = vals |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
412 return (ipr_dict, ipr_cols) |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
413 |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
414 def write_ec_table(outfile, resp, column_order): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
415 with open(outfile, 'w') as fh: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
416 for i, pdict in enumerate(resp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
417 if 'ec' in pdict: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
418 tvals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_order[0]] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
419 for ec in pdict['ec']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
420 vals = [str(ec[x]) if x in ec and ec[x] else '' for x in column_order[-1]] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
421 fh.write('%s\n' % '\t'.join(tvals + vals)) |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
422 |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
423 def write_go_table(outfile, resp, column_order): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
424 with open(outfile, 'w') as fh: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
425 for i, pdict in enumerate(resp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
426 if 'go' in pdict: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
427 tvals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_order[0]] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
428 for go in pdict['go']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
429 if 'go_term' in go: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
430 vals = [str(go[x]) if x in go and go[x] else '' for x in column_order[-1]] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
431 fh.write('%s\n' % '\t'.join(tvals + vals)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
432 else: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
433 for go_type in go_types: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
434 if go_type in go: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
435 for _go in go[go_type]: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
436 vals = [str(_go[x]) if x in _go and _go[x] else '' for x in column_order[-1]] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
437 vals.append(go_type) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
438 fh.write('%s\n' % '\t'.join(tvals + vals)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
439 |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
440 def write_ipr_table(outfile, resp, column_order): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
441 with open(outfile, 'w') as fh: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
442 for i, pdict in enumerate(resp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
443 if 'ipr' in pdict: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
444 tvals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_order[0]] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
445 for ipr in pdict['ipr']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
446 if 'code' in ipr: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
447 vals = [str(ipr[x]) if x in ipr and ipr[x] else '' for x in column_order[-1]] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
448 fh.write('%s\n' % '\t'.join(tvals + vals)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
449 else: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
450 for ipr_type in ipr_types: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
451 if ipr_type in ipr: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
452 for _ipr in ipr[ipr_type]: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
453 vals = [str(_ipr[x]) if x in _ipr and _ipr[x] else '' for x in column_order[-1]] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
454 vals.append(ipr_type) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
455 fh.write('%s\n' % '\t'.join(tvals + vals)) |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
456 |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
457 # Parse Command Line |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
458 parser = optparse.OptionParser() |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
459 # unipept API choice |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
460 parser.add_option('-a', '--api', dest='unipept', default='pept2lca', choices=['pept2lca', 'pept2taxa', 'pept2prot', 'pept2ec', 'pept2go', 'pept2interpro', 'pept2funct', 'peptinfo'], |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
461 help='The unipept application: pept2lca, pept2taxa, pept2prot, pept2ec, pept2go, pept2funct, or peptinfo') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
462 # input files |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
463 parser.add_option('-t', '--tabular', dest='tabular', default=None, help='A tabular file that contains a peptide column') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
464 parser.add_option('-c', '--column', dest='column', type='int', default=0, help='The column (zero-based) in the tabular file that contains peptide sequences') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
465 parser.add_option('-f', '--fasta', dest='fasta', default=None, help='A fasta file containing peptide sequences') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
466 parser.add_option('-m', '--mzid', dest='mzid', default=None, help='A mxIdentML file containing peptide sequences') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
467 parser.add_option('-p', '--pepxml', dest='pepxml', default=None, help='A pepxml file containing peptide sequences') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
468 # Unipept Flags |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
469 parser.add_option('-e', '--equate_il', dest='equate_il', action='store_true', default=False, help='isoleucine (I) and leucine (L) are equated when matching tryptic peptides to UniProt records') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
470 parser.add_option('-x', '--extra', dest='extra', action='store_true', default=False, help='return the complete lineage of the taxonomic lowest common ancestor') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
471 parser.add_option('-n', '--names', dest='names', action='store_true', default=False, help='return the names of all ranks in the lineage of the taxonomic lowest common ancestor') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
472 parser.add_option('-D', '--domains', dest='domains', action='store_true', default=False, help='group response by GO namaspace: biological process, molecular function, cellular component') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
473 parser.add_option('-M', '--max_request', dest='max_request', type='int', default=200, help='The maximum number of entries per unipept request') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
474 |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
475 # output fields |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
476 parser.add_option('-A', '--allfields', dest='allfields', action='store_true', default=False, help='inlcude fields: taxon_rank,taxon_id,taxon_name csv and tsv outputs') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
477 # Warn vs Error Flag |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
478 parser.add_option('-S', '--strict', dest='strict', action='store_true', default=False, help='Print exit on invalid peptide') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
479 # output files |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
480 parser.add_option('-J', '--json', dest='json', default=None, help='Output file path for json formatted results') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
481 parser.add_option('-j', '--ec_json', dest='ec_json', default=None, help='Output file path for json formatted results') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
482 parser.add_option('-E', '--ec_tsv', dest='ec_tsv', default=None, help='Output file path for EC TAB-separated-values (.tsv) formatted results') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
483 parser.add_option('-G', '--go_tsv', dest='go_tsv', default=None, help='Output file path for GO TAB-separated-values (.tsv) formatted results') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
484 parser.add_option('-I', '--ipr_tsv', dest='ipr_tsv', default=None, help='Output file path for InterPro TAB-separated-values (.tsv) formatted results') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
485 parser.add_option('-L', '--lineage_tsv', dest='lineage_tsv', default=None, help='Output file path for Lineage TAB-separated-values (.tsv) formatted results') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
486 parser.add_option('-T', '--tsv', dest='tsv', default=None, help='Output file path for TAB-separated-values (.tsv) formatted results') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
487 parser.add_option('-C', '--csv', dest='csv', default=None, help='Output file path for Comma-separated-values (.csv) formatted results') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
488 parser.add_option('-U', '--unmatched', dest='unmatched', default=None, help='Output file path for peptide with no matches') |
8
7863f1abcdda
planemo upload for repository https://unipept.ugent.be/apidocs commit 8b14b02c5b11232c0c2cc278e3ac492455f2e0a5
galaxyp
parents:
7
diff
changeset
|
489 parser.add_option('-u', '--url', dest='url', default='https://api.unipept.ugent.be/api/v1/', help='unipept url https://api.unipept.ugent.be/api/v1/') |
7
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
490 parser.add_option('-P', '--peptide_match', dest='peptide_match', choices=['best', 'full', 'report'], default='best', help='Match whole peptide') |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
491 parser.add_option('--unmatched_aa', dest='unmatched_aa', default=None, help='Show unmatched AA in peptide as') |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
492 # debug |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
493 parser.add_option('-g', '--get', dest='get', action='store_true', default=False, help='Use GET instead of POST') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
494 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turning on debugging') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
495 parser.add_option('-v', '--version', dest='version', action='store_true', default=False, help='print version and exit') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
496 (options, args) = parser.parse_args() |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
497 if options.version: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
498 print('%s' % version) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
499 sys.exit(0) |
7
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
500 |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
501 def tryptic_match_string(peptide, tryptic_matches): |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
502 if options.unmatched_aa: |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
503 p = peptide.lower() |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
504 for m in tryptic_matches: |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
505 p = p.replace(m.lower(), m) |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
506 return re.sub('[a-z]', options.unmatched_aa, p) |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
507 else: |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
508 return ','.join(tryptic_matches) |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
509 |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
510 invalid_ec = 2 if options.strict else None |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
511 peptides = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
512 # Get peptide sequences |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
513 if options.mzid: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
514 peptides += read_mzid(options.mzid) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
515 if options.pepxml: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
516 peptides += read_pepxml(options.pepxml) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
517 if options.tabular: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
518 peptides += read_tabular(options.tabular, options.column) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
519 if options.fasta: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
520 peptides += read_fasta(options.fasta) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
521 if args and len(args) > 0: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
522 for i, peptide in enumerate(args): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
523 if not re.match(pep_pat, peptide): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
524 warn_err('"%s" is not a peptide (arg %d)\n' % (peptide, i), exit_code=invalid_ec) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
525 peptides.append(peptide) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
526 if len(peptides) < 1: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
527 warn_err("No peptides input!", exit_code=1) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
528 column_order = pept2lca_column_order |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
529 if options.unipept == 'pept2prot': |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
530 column_order = pept2prot_extra_column_order if options.extra else pept2prot_column_order |
4
4953dcd7dd39
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents:
3
diff
changeset
|
531 else: |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
532 if options.extra or options.names: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
533 column_order = pept2lca_all_column_order if options.allfields else pept2lca_extra_column_order |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
534 else: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
535 column_order = pept2lca_column_order |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
536 # map to tryptic peptides |
7
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
537 if options.peptide_match == 'full': |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
538 pepToParts = {p: [p] for p in peptides} |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
539 else: |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
540 pepToParts = {p: re.split('\n', re.sub(r'(?<=[RK])(?=[^P])', '\n', p)) for p in peptides} |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
541 if options.debug: |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
542 print("column_order: %s\n" % (column_order), file=sys.stderr) |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
543 partToPeps = {} |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
544 for peptide, parts in pepToParts.items(): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
545 if options.debug: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
546 print("peptide: %s\ttryptic: %s\n" % (peptide, parts), file=sys.stderr) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
547 for part in parts: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
548 if len(part) > 50: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
549 warn_err("peptide: %s tryptic fragment len %d > 50 for %s\n" % (peptide, len(part), part), exit_code=None) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
550 if 5 <= len(part) <= 50: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
551 partToPeps.setdefault(part, []).append(peptide) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
552 trypticPeptides = list(partToPeps.keys()) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
553 # unipept |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
554 unipept_resp = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
555 idx = list(range(0, len(trypticPeptides), options.max_request)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
556 idx.append(len(trypticPeptides)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
557 for i in range(len(idx) - 1): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
558 post_data = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
559 if options.equate_il: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
560 post_data.append(('equate_il', 'true')) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
561 if options.names or options.json: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
562 post_data.append(('extra', 'true')) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
563 post_data.append(('names', 'true')) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
564 elif options.extra or options.json: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
565 post_data.append(('extra', 'true')) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
566 if options.domains: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
567 post_data.append(('domains', 'true')) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
568 post_data += [('input[]', x) for x in trypticPeptides[idx[i]:idx[i + 1]]] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
569 if options.debug: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
570 print('post_data: %s\n' % (str(post_data)), file=sys.stderr) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
571 headers = {'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'application/json'} |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
572 url = '%s/%s' % (options.url.rstrip('/'), options.unipept) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
573 if options.get: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
574 params = '&'.join(["%s=%s" % (i[0], i[1]) for i in post_data]) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
575 url = '%s.json?%s' % (url, params) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
576 req = urllib.request.Request(url) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
577 else: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
578 url = '%s.json' % (url) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
579 data = urllib.parse.urlencode(post_data).encode() |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
580 params = '&'.join(["%s=%s" % (i[0], i[1]) for i in post_data]) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
581 if options.debug: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
582 print('data:\n%s\n' % (data), file=sys.stderr) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
583 req = urllib.request.Request(url, headers=headers, data=urllib.parse.urlencode(post_data).encode(), method='POST') |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
584 if options.debug: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
585 print("url: %s\n" % (str(url)), file=sys.stderr) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
586 try: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
587 resp = urllib.request.urlopen(req) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
588 rdata = resp.read() |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
589 if options.debug: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
590 print("%s %s\n" % (url, str(resp.getcode())), file=sys.stderr) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
591 if resp.getcode() == 200: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
592 if options.debug: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
593 print("rdata: \n%s\n\n" % (rdata), file=sys.stderr) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
594 unipept_resp += json.loads(rdata) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
595 # unipept_resp += json.loads(urllib.request.urlopen(req).read()) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
596 except Exception as e: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
597 warn_err('HTTP Error %s\n' % (str(e)), exit_code=None) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
598 unmatched_peptides = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
599 peptideMatches = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
600 if options.debug: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
601 print("unipept response: %s\n" % str(unipept_resp), file=sys.stderr) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
602 if options.unipept in ['pept2prot', 'pept2taxa']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
603 dupkey = 'uniprot_id' if options.unipept == 'pept2prot' else 'taxon_id' # should only keep one of these per input peptide |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
604 # multiple entries per trypticPeptide for pep2prot or pep2taxa |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
605 mapping = {} |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
606 for match in unipept_resp: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
607 mapping.setdefault(match['peptide'], []).append(match) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
608 for peptide in peptides: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
609 # Get the intersection of matches to the tryptic parts |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
610 keyToMatch = None |
7
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
611 tryptic_match = [] |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
612 for part in pepToParts[peptide]: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
613 if part in mapping: |
7
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
614 tryptic_match.append(part) |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
615 temp = {match[dupkey]: match for match in mapping[part]} |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
616 if keyToMatch: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
617 dkeys = set(keyToMatch.keys()) - set(temp.keys()) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
618 for k in dkeys: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
619 del keyToMatch[k] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
620 else: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
621 keyToMatch = temp |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
622 # keyToMatch = keyToMatch.fromkeys([x for x in keyToMatch if x in temp]) if keyToMatch else temp |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
623 if not keyToMatch: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
624 unmatched_peptides.append(peptide) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
625 else: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
626 for key, match in keyToMatch.items(): |
7
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
627 match['tryptic_match'] = tryptic_match_string(peptide, tryptic_match) |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
628 match['tryptic_peptide'] = match['peptide'] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
629 match['peptide'] = peptide |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
630 peptideMatches.append(match) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
631 elif options.unipept in ['pept2lca', 'peptinfo']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
632 # should be one response per trypticPeptide for pep2lca |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
633 respMap = {v['peptide']: v for v in unipept_resp} |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
634 # map resp back to peptides |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
635 for peptide in peptides: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
636 matches = list() |
7
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
637 tryptic_match = [] |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
638 for part in pepToParts[peptide]: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
639 if part in respMap: |
7
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
640 tryptic_match.append(part) |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
641 matches.append(respMap[part]) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
642 match = best_match(peptide, matches) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
643 if not match: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
644 unmatched_peptides.append(peptide) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
645 longest_tryptic_peptide = sorted(pepToParts[peptide], key=lambda x: len(x))[-1] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
646 match = {'peptide': longest_tryptic_peptide} |
7
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
647 match['tryptic_match'] = tryptic_match_string(peptide, tryptic_match) |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
648 match['tryptic_peptide'] = match['peptide'] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
649 match['peptide'] = peptide |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
650 peptideMatches.append(match) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
651 else: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
652 respMap = {v['peptide']: v for v in unipept_resp} |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
653 # map resp back to peptides |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
654 for peptide in peptides: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
655 matches = list() |
7
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
656 tryptic_match = [] |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
657 for part in pepToParts[peptide]: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
658 if part in respMap and 'total_protein_count' in respMap[part]: |
7
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
659 tryptic_match.append(part) |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
660 matches.append(respMap[part]) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
661 match = best_match(peptide, matches) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
662 if not match: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
663 unmatched_peptides.append(peptide) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
664 longest_tryptic_peptide = sorted(pepToParts[peptide], key=lambda x: len(x))[-1] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
665 match = {'peptide': longest_tryptic_peptide} |
7
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
666 match['tryptic_match'] = tryptic_match_string(peptide, tryptic_match) |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
667 match['tryptic_peptide'] = match['peptide'] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
668 match['peptide'] = peptide |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
669 peptideMatches.append(match) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
670 resp = peptideMatches |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
671 if options.debug: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
672 print("\nmapped response: %s\n" % str(resp), file=sys.stderr) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
673 # output results |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
674 if not (options.unmatched or options.json or options.tsv or options.csv): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
675 print(str(resp)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
676 if options.unmatched: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
677 with open(options.unmatched, 'w') as outputFile: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
678 for peptide in peptides: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
679 if peptide in unmatched_peptides: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
680 outputFile.write("%s\n" % peptide) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
681 if options.json: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
682 if options.unipept in ['pept2lca', 'pept2taxa', 'peptinfo']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
683 root = get_taxon_json(resp) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
684 with open(options.json, 'w') as outputFile: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
685 outputFile.write(json.dumps(root)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
686 elif options.unipept in ['pept2prot', 'pept2ec', 'pept2go', 'pept2interpro', 'pept2funct']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
687 with open(options.json, 'w') as outputFile: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
688 outputFile.write(str(resp)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
689 if options.ec_json: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
690 if options.unipept in ['pept2ec', 'pept2funct', 'peptinfo']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
691 root = get_ec_json(resp) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
692 with open(options.ec_json, 'w') as outputFile: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
693 outputFile.write(json.dumps(root)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
694 if options.tsv or options.csv: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
695 rows = [] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
696 column_names = None |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
697 if options.unipept in ['pept2ec', 'pept2go', 'pept2interpro', 'pept2funct', 'peptinfo']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
698 taxa = None |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
699 ec_dict = None |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
700 go_dict = None |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
701 ipr_dict = None |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
702 if options.unipept in ['peptinfo']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
703 (taxa, taxon_cols) = get_taxon_dict(resp, column_order, extra=options.extra, names=options.names) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
704 if options.unipept in ['pept2ec', 'pept2funct', 'peptinfo']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
705 (ec_dict, ec_cols) = get_ec_dict(resp, extra=options.extra) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
706 if options.unipept in ['pept2go', 'pept2funct', 'peptinfo']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
707 (go_dict, go_cols) = get_go_dict(resp, extra=options.extra) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
708 if options.unipept in ['pept2interpro', 'pept2funct', 'peptinfo']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
709 (ipr_dict, ipr_cols) = get_ipr_dict(resp, extra=options.extra) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
710 for i, pdict in enumerate(resp): |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
711 peptide = pdict['peptide'] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
712 total_protein_count = str(pdict['total_protein_count']) if 'total_protein_count' in pdict else '0' |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
713 column_names = ['peptide', 'total_protein_count'] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
714 vals = [peptide, total_protein_count] |
7
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
715 if options.peptide_match == 'report': |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
716 column_names = ['peptide', 'tryptic_match', 'total_protein_count'] |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
717 tryptic_match = pdict.get('tryptic_match', '') |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
718 vals = [peptide, tryptic_match, total_protein_count] |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
719 if ec_dict: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
720 vals += ec_dict[peptide] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
721 column_names += ec_cols |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
722 if go_dict: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
723 vals += go_dict[peptide] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
724 column_names += go_cols |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
725 if ipr_dict: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
726 vals += ipr_dict[peptide] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
727 column_names += ipr_cols |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
728 if taxa: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
729 vals += taxa[peptide][1:] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
730 column_names += taxon_cols[1:] |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
731 rows.append(vals) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
732 elif options.unipept in ['pept2lca', 'pept2taxa', 'pept2prot']: |
7
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
733 if options.peptide_match == 'report': |
75b3b3d0adbf
"planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents:
6
diff
changeset
|
734 column_order.insert(1, 'tryptic_match') |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
735 (taxa, taxon_cols) = get_taxon_dict(resp, column_order, extra=options.extra, names=options.names) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
736 column_names = taxon_cols |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
737 rows = list(taxa.values()) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
738 if options.tsv: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
739 with open(options.tsv, 'w') as outputFile: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
740 if column_names: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
741 outputFile.write("#%s\n" % '\t'.join(column_names)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
742 for vals in rows: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
743 outputFile.write("%s\n" % '\t'.join(vals)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
744 if options.csv: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
745 with open(options.csv, 'w') as outputFile: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
746 if column_names: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
747 outputFile.write("%s\n" % ','.join(column_names)) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
748 for vals in rows: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
749 outputFile.write("%s\n" % ','.join(['"%s"' % (v if v else '') for v in vals])) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
750 if options.ec_tsv and options.unipept in ['pept2ec', 'pept2funct', 'peptinfo']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
751 column_order = pept2ec_extra_column_order if options.extra else pept2ec_column_order |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
752 write_ec_table(options.ec_tsv, resp, column_order) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
753 if options.go_tsv and options.unipept in ['pept2go', 'pept2funct', 'peptinfo']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
754 column_order = pept2go_extra_column_order if options.extra else pept2go_column_order |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
755 write_go_table(options.go_tsv, resp, column_order) |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
756 if options.ipr_tsv and options.unipept in ['pept2interpro', 'pept2funct', 'peptinfo']: |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
757 column_order = pept2interpro_extra_column_order if options.extra else pept2interpro_column_order |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
758 write_ipr_table(options.ipr_tsv, resp, column_order) |
0 | 759 |
5
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
760 |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
761 if __name__ == "__main__": |
917fd3ebc223
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents:
4
diff
changeset
|
762 __main__() |