annotate unipept.py @ 9:21a560af5913 draft default tip

planemo upload for repository https://unipept.ugent.be/apidocs commit 19735e85caae264d98562f6fdb3b213841087fc7
author galaxyp
date Tue, 12 Mar 2024 11:44:08 +0000
parents 7863f1abcdda
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
1 #!/usr/bin/env python
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
2 """
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
3 #
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
4 # Author:
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
5 #
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
6 # James E Johnson
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
7 #
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
8 #------------------------------------------------------------------------------
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
9 """
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
10 import json
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
11 import optparse
7
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
12 import re
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
13 import sys
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
14 import urllib.error
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
15 import urllib.parse
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
16 import urllib.request
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
17
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
18
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
19 try:
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
20 import xml.etree.cElementTree as ET
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
21 except ImportError:
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
22 import xml.etree.ElementTree as ET
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
23
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
24
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
25 def warn_err(msg, exit_code=1):
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
26 sys.stderr.write(msg)
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
27 if exit_code:
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
28 sys.exit(exit_code)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
29
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
30
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
31 go_types = ['biological process', 'molecular function', 'cellular component']
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
32 ipr_types = ['Domain', 'Family', 'Homologous_superfamily', 'Repeat', 'Conserved_site', 'Active_site', 'Binding_site', 'PTM']
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
33 ec_name_dict = {
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
34 '1': 'Oxidoreductase',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
35 '1.1': 'act on the CH-OH group of donors',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
36 '1.2': 'act on the aldehyde or oxo group of donors',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
37 '1.3': 'act on the CH-CH group of donors',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
38 '1.4': 'act on the CH-NH2 group of donors',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
39 '1.5': 'act on CH-NH group of donors',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
40 '1.6': 'act on NADH or NADPH',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
41 '1.7': 'act on other nitrogenous compounds as donors',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
42 '1.8': 'act on a sulfur group of donors',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
43 '1.9': 'act on a heme group of donors',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
44 '1.10': 'act on diphenols and related substances as donors',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
45 '1.11': 'act on peroxide as an acceptor -- peroxidases',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
46 '1.12': 'act on hydrogen as a donor',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
47 '1.13': 'act on single donors with incorporation of molecular oxygen',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
48 '1.14': 'act on paired donors with incorporation of molecular oxygen',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
49 '1.15': 'act on superoxide radicals as acceptors',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
50 '1.16': 'oxidize metal ions',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
51 '1.17': 'act on CH or CH2 groups',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
52 '1.18': 'act on iron-sulfur proteins as donors',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
53 '1.19': 'act on reduced flavodoxin as donor',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
54 '1.20': 'act on phosphorus or arsenic as donors',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
55 '1.21': 'act on X-H and Y-H to form an X-Y bond',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
56 '1.97': 'other oxidoreductases',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
57 '2': 'Transferase',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
58 '2.1': 'transfer one-carbon groups, Methylase',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
59 '2.2': 'transfer aldehyde or ketone groups',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
60 '2.3': 'acyltransferases',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
61 '2.4': 'glycosyltransferases',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
62 '2.5': 'transfer alkyl or aryl groups, other than methyl groups',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
63 '2.6': 'transfer nitrogenous groups',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
64 '2.7': 'transfer phosphorus-containing groups',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
65 '2.8': 'transfer sulfur-containing groups',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
66 '2.9': 'transfer selenium-containing groups',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
67 '3': 'Hydrolase',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
68 '3.1': 'act on ester bonds',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
69 '3.2': 'act on sugars - glycosylases',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
70 '3.3': 'act on ether bonds',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
71 '3.4': 'act on peptide bonds - Peptidase',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
72 '3.5': 'act on carbon-nitrogen bonds, other than peptide bonds',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
73 '3.6': 'act on acid anhydrides',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
74 '3.7': 'act on carbon-carbon bonds',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
75 '3.8': 'act on halide bonds',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
76 '3.9': 'act on phosphorus-nitrogen bonds',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
77 '3.10': 'act on sulfur-nitrogen bonds',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
78 '3.11': 'act on carbon-phosphorus bonds',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
79 '3.12': 'act on sulfur-sulfur bonds',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
80 '3.13': 'act on carbon-sulfur bonds',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
81 '4': 'Lyase',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
82 '4.1': 'carbon-carbon lyases',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
83 '4.2': 'carbon-oxygen lyases',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
84 '4.3': 'carbon-nitrogen lyases',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
85 '4.4': 'carbon-sulfur lyases',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
86 '4.5': 'carbon-halide lyases',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
87 '4.6': 'phosphorus-oxygen lyases',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
88 '5': 'Isomerase',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
89 '5.1': 'racemases and epimerases',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
90 '5.2': 'cis-trans-isomerases',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
91 '5.3': 'intramolecular oxidoreductases',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
92 '5.4': 'intramolecular transferases -- mutases',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
93 '5.5': 'intramolecular lyases',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
94 '5.99': 'other isomerases',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
95 '6': 'Ligase',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
96 '6.1': 'form carbon-oxygen bonds',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
97 '6.2': 'form carbon-sulfur bonds',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
98 '6.3': 'form carbon-nitrogen bonds',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
99 '6.4': 'form carbon-carbon bonds',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
100 '6.5': 'form phosphoric ester bonds',
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
101 '6.6': 'form nitrogen-metal bonds',
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
102 }
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
103 pept2lca_column_order = ['peptide', 'taxon_rank', 'taxon_id', 'taxon_name']
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
104 pept2lca_extra_column_order = ['peptide', 'superkingdom', 'kingdom', 'subkingdom', 'superphylum', 'phylum', 'subphylum', 'superclass', 'class', 'subclass', 'infraclass', 'superorder', 'order', 'suborder', 'infraorder', 'parvorder', 'superfamily', 'family', 'subfamily', 'tribe', 'subtribe', 'genus', 'subgenus', 'species_group', 'species_subgroup', 'species', 'subspecies', 'varietas', 'forma']
7
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
105 pept2lca_all_column_order = pept2lca_column_order + pept2lca_extra_column_order[2:]
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
106 pept2prot_column_order = ['peptide', 'uniprot_id', 'taxon_id']
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
107 pept2prot_extra_column_order = pept2prot_column_order + ['taxon_name', 'ec_references', 'go_references', 'refseq_ids', 'refseq_protein_ids', 'insdc_ids', 'insdc_protein_ids']
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
108 pept2ec_column_order = [['peptide', 'total_protein_count'], ['ec_number', 'protein_count']]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
109 pept2ec_extra_column_order = [['peptide', 'total_protein_count'], ['ec_number', 'protein_count', 'name']]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
110 pept2go_column_order = [['peptide', 'total_protein_count'], ['go_term', 'protein_count']]
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
111 pept2go_extra_column_order = [['peptide', 'total_protein_count'], ['go_term', 'protein_count', 'name']]
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
112 pept2interpro_column_order = [['peptide', 'total_protein_count'], ['code', 'protein_count']]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
113 pept2interpro_extra_column_order = [['peptide', 'total_protein_count'], ['code', 'protein_count', 'type', 'name']]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
114 pept2funct_column_order = ['peptide', 'total_protein_count', 'ec', 'go', 'ipr']
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
115
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
116
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
117 def __main__():
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
118 version = '4.3'
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
119 pep_pat = '^([ABCDEFGHIKLMNPQRSTVWXYZ]+)$'
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
120
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
121 def read_tabular(filepath, col):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
122 peptides = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
123 with open(filepath) as fp:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
124 for i, line in enumerate(fp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
125 if line.strip() == '' or line.startswith('#'):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
126 continue
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
127 fields = line.rstrip('\n').split('\t')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
128 peptide = fields[col]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
129 if not re.match(pep_pat, peptide):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
130 warn_err('"%s" is not a peptide (line %d column %d of tabular file: %s)\n' % (peptide, i, col, filepath), exit_code=invalid_ec)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
131 peptides.append(peptide)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
132 return peptides
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
133
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
134 def get_fasta_entries(fp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
135 name, seq = None, []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
136 for line in fp:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
137 line = line.rstrip()
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
138 if line.startswith(">"):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
139 if name:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
140 yield (name, ''.join(seq))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
141 name, seq = line, []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
142 else:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
143 seq.append(line)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
144 if name:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
145 yield (name, ''.join(seq))
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
146
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
147 def read_fasta(filepath):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
148 peptides = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
149 with open(filepath) as fp:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
150 for id, peptide in get_fasta_entries(fp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
151 if not re.match(pep_pat, peptide):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
152 warn_err('"%s" is not a peptide (id %s of fasta file: %s)\n' % (peptide, id, filepath), exit_code=invalid_ec)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
153 peptides.append(peptide)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
154 return peptides
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
155
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
156 def read_mzid(fp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
157 peptides = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
158 for event, elem in ET.iterparse(fp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
159 if event == 'end':
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
160 if re.search('PeptideSequence', elem.tag):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
161 peptides.append(elem.text)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
162 return peptides
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
163
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
164 def read_pepxml(fp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
165 peptides = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
166 for event, elem in ET.iterparse(fp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
167 if event == 'end':
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
168 if re.search('search_hit', elem.tag):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
169 peptides.append(elem.get('peptide'))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
170 return peptides
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
171
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
172 def best_match(peptide, matches):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
173 if not matches:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
174 return None
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
175 elif len(matches) == 1:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
176 return matches[0].copy()
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
177 elif 'taxon_rank' in matches[0]:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
178 # find the most specific match (peptide is always the first column order field)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
179 for col in reversed(pept2lca_extra_column_order[1:]):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
180 col_id = col + "_id" if options.extra else col
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
181 for match in matches:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
182 if 'taxon_rank' in match and match['taxon_rank'] == col:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
183 return match.copy()
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
184 if col_id in match and match[col_id]:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
185 return match.copy()
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
186 else:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
187 return sorted(matches, key=lambda x: len(x['peptide']))[-1].copy()
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
188 return None
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
189
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
190 def get_taxon_json(resp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
191 found_keys = set()
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
192 for i, pdict in enumerate(resp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
193 found_keys |= set(pdict.keys())
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
194 taxa_cols = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
195 for col in pept2lca_extra_column_order[-1:0:-1]:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
196 if col + '_id' in found_keys:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
197 taxa_cols.append(col)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
198 id_to_node = dict()
1
0c1ee95282fa Uploaded
galaxyp
parents: 0
diff changeset
199
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
200 def get_node(id, name, rank, child, seq):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
201 if id not in id_to_node:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
202 data = {'count': 0, 'self_count': 0, 'valid_taxon': 1, 'rank': rank, 'sequences': []}
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
203 node = {'id': id, 'name': name, 'children': [], 'kids': [], 'data': data}
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
204 id_to_node[id] = node
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
205 else:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
206 node = id_to_node[id]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
207 node['data']['count'] += 1
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
208 if seq is not None and seq not in node['data']['sequences']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
209 node['data']['sequences'].append(seq)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
210 if child is None:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
211 node['data']['self_count'] += 1
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
212 elif child['id'] not in node['kids']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
213 node['kids'].append(child['id'])
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
214 node['children'].append(child)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
215 return node
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
216 root = get_node(1, 'root', 'no rank', None, None)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
217 for i, pdict in enumerate(resp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
218 sequence = pdict.get('peptide', pdict.get('tryptic_peptide', None))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
219 seq = sequence
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
220 child = None
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
221 for col in taxa_cols:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
222 col_id = col + '_id'
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
223 if col_id in pdict and pdict.get(col_id):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
224 col_name = col if col in found_keys else col + '_name'
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
225 child = get_node(pdict.get(col_id, None), pdict.get(col_name, ''), col, child, seq)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
226 seq = None
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
227 if child is not None:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
228 get_node(1, 'root', 'no rank', child, None)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
229 return root
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
230
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
231 def get_ec_json(resp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
232 ecMap = dict()
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
233 for pdict in resp:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
234 if 'ec' in pdict:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
235 for ec in pdict['ec']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
236 ec_number = ec['ec_number']
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
237 if ec_number not in ecMap:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
238 ecMap[ec_number] = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
239 ecMap[ec_number].append(pdict)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
240
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
241 def get_ids(ec):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
242 ids = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
243 i = len(ec)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
244 while i >= 0:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
245 ids.append(ec[:i])
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
246 i = ec.rfind('.', 0, i - 1)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
247 return ids
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
248 id_to_node = dict()
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
249
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
250 def get_node(id, name, child, seq):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
251 if id not in id_to_node:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
252 data = {'count': 0, 'self_count': 0, 'sequences': []}
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
253 node = {'id': id, 'name': name, 'children': [], 'kids': [], 'data': data}
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
254 id_to_node[id] = node
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
255 else:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
256 node = id_to_node[id]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
257 node['data']['count'] += 1
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
258 if seq is not None and seq not in node['data']['sequences']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
259 node['data']['sequences'].append(seq)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
260 if child is None:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
261 node['data']['self_count'] += 1
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
262 elif child['id'] not in node['kids']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
263 node['kids'].append(child['id'])
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
264 node['children'].append(child)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
265 return node
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
266 root = get_node(0, '-.-.-.-', None, None)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
267 for i in range(1, 7):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
268 child = get_node(str(i), '%s\n%s' % (str(i), ec_name_dict[str(i)]), None, None)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
269 get_node(0, '-.-.-.-', child, None)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
270 for i, pdict in enumerate(resp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
271 sequence = pdict.get('peptide', pdict.get('tryptic_peptide', None))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
272 seq = sequence
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
273 if 'ec' in pdict:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
274 for ec in pdict['ec']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
275 child = None
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
276 ec_number = ec['ec_number']
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
277 for ec_id in get_ids(ec_number):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
278 ec_name = str(ec_id)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
279 child = get_node(ec_id, ec_name, child, seq)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
280 seq = None
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
281 if child:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
282 get_node(0, '-.-.-.-', child, None)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
283 return root
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
284
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
285 def get_taxon_dict(resp, column_order, extra=False, names=False):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
286 found_keys = set()
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
287 results = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
288 for i, pdict in enumerate(resp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
289 results.append(pdict)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
290 found_keys |= set(pdict.keys())
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
291 # print >> sys.stderr, "%s\n%s" % (pdict.keys(), found_keys)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
292 column_names = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
293 column_keys = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
294 for col in column_order:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
295 if col in found_keys:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
296 column_names.append(col)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
297 column_keys.append(col)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
298 elif names:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
299 col_id = col + '_id'
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
300 col_name = col + '_name'
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
301 if extra:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
302 if col_id in found_keys:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
303 column_names.append(col_id)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
304 column_keys.append(col_id)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
305 if names:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
306 if col_name in found_keys:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
307 column_names.append(col)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
308 column_keys.append(col_name)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
309 else:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
310 if col + '_name' in found_keys:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
311 column_names.append(col)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
312 column_keys.append(col + '_name')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
313 elif col + '_id' in found_keys:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
314 column_names.append(col)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
315 column_keys.append(col + '_id')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
316 # print >> sys.stderr, "%s\n%s" % (column_names, column_keys)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
317 taxa = dict() # peptide: [taxonomy]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
318 for i, pdict in enumerate(results):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
319 peptide = pdict['peptide'] if 'peptide' in pdict else None
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
320 if peptide and peptide not in taxa:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
321 vals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_keys]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
322 taxa[peptide] = vals
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
323 return (taxa, column_names)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
324
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
325 def get_ec_dict(resp, extra=False):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
326 ec_cols = ['ec_numbers', 'ec_protein_counts']
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
327 if extra:
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
328 ec_cols.append('ec_names')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
329 ec_dict = dict()
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
330 for i, pdict in enumerate(resp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
331 peptide = pdict['peptide']
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
332 ec_numbers = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
333 protein_counts = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
334 ec_names = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
335 if 'ec' in pdict:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
336 for ec in pdict['ec']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
337 ec_numbers.append(ec['ec_number'])
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
338 protein_counts.append(str(ec['protein_count']))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
339 if extra:
6
9aaa46d45472 "planemo upload for repository http://unipept.ugent.be/apidocs commit 228074586987e33782b8c2faf61257ac7506e51e"
galaxyp
parents: 5
diff changeset
340 ec_names.append(ec['name'] if 'name' in ec and ec['name'] else '')
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
341 vals = [','.join(ec_numbers), ','.join(protein_counts)]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
342 if extra:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
343 vals.append(','.join(ec_names))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
344 ec_dict[peptide] = vals
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
345 return (ec_dict, ec_cols)
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
346
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
347 def get_go_dict(resp, extra=False):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
348 go_cols = ['go_terms', 'go_protein_counts']
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
349 if extra:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
350 go_cols.append('go_names')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
351 go_dict = dict()
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
352 for i, pdict in enumerate(resp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
353 peptide = pdict['peptide']
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
354 go_terms = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
355 protein_counts = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
356 go_names = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
357 if 'go' in pdict:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
358 for go in pdict['go']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
359 if 'go_term' in go:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
360 go_terms.append(go['go_term'])
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
361 protein_counts.append(str(go['protein_count']))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
362 if extra:
6
9aaa46d45472 "planemo upload for repository http://unipept.ugent.be/apidocs commit 228074586987e33782b8c2faf61257ac7506e51e"
galaxyp
parents: 5
diff changeset
363 go_names.append(go['name'] if 'name' in go and go['name'] else '')
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
364 else:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
365 for go_type in go_types:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
366 if go_type in go:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
367 for _go in go[go_type]:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
368 go_terms.append(_go['go_term'])
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
369 protein_counts.append(str(_go['protein_count']))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
370 if extra:
6
9aaa46d45472 "planemo upload for repository http://unipept.ugent.be/apidocs commit 228074586987e33782b8c2faf61257ac7506e51e"
galaxyp
parents: 5
diff changeset
371 go_names.append(_go['name'] if 'name' in _go and _go['name'] else '')
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
372 vals = [','.join(go_terms), ','.join(protein_counts)]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
373 if extra:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
374 vals.append(','.join(go_names))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
375 go_dict[peptide] = vals
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
376 return (go_dict, go_cols)
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
377
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
378 def get_ipr_dict(resp, extra=False):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
379 ipr_cols = ['ipr_codes', 'ipr_protein_counts']
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
380 if extra:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
381 ipr_cols.append('ipr_types')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
382 ipr_cols.append('ipr_names')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
383 ipr_dict = dict()
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
384 for i, pdict in enumerate(resp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
385 peptide = pdict['peptide']
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
386 ipr_codes = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
387 protein_counts = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
388 ipr_names = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
389 ipr_types = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
390 if 'ipr' in pdict:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
391 for ipr in pdict['ipr']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
392 if 'code' in ipr:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
393 ipr_codes.append(ipr['code'])
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
394 protein_counts.append(str(ipr['protein_count']))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
395 if extra:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
396 ipr_types.append(ipr['type'] if 'type' in ipr else '')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
397 ipr_names.append(ipr['name'] if 'name' in ipr else '')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
398 else:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
399 for ipr_type in ipr_types:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
400 if ipr_type in ipr:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
401 for _ipr in ipr[ipr_type]:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
402 ipr_codes.append(_ipr['code'])
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
403 protein_counts.append(str(_ipr['protein_count']))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
404 if extra:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
405 ipr_types.append(ipr_type)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
406 ipr_names.append(_ipr['name'] if 'name' in _ipr else '')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
407 vals = [','.join(ipr_codes), ','.join(protein_counts)]
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
408 if extra:
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
409 vals.append(','.join(ipr_types))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
410 vals.append(','.join(ipr_names))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
411 ipr_dict[peptide] = vals
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
412 return (ipr_dict, ipr_cols)
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
413
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
414 def write_ec_table(outfile, resp, column_order):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
415 with open(outfile, 'w') as fh:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
416 for i, pdict in enumerate(resp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
417 if 'ec' in pdict:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
418 tvals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_order[0]]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
419 for ec in pdict['ec']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
420 vals = [str(ec[x]) if x in ec and ec[x] else '' for x in column_order[-1]]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
421 fh.write('%s\n' % '\t'.join(tvals + vals))
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
422
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
423 def write_go_table(outfile, resp, column_order):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
424 with open(outfile, 'w') as fh:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
425 for i, pdict in enumerate(resp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
426 if 'go' in pdict:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
427 tvals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_order[0]]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
428 for go in pdict['go']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
429 if 'go_term' in go:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
430 vals = [str(go[x]) if x in go and go[x] else '' for x in column_order[-1]]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
431 fh.write('%s\n' % '\t'.join(tvals + vals))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
432 else:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
433 for go_type in go_types:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
434 if go_type in go:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
435 for _go in go[go_type]:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
436 vals = [str(_go[x]) if x in _go and _go[x] else '' for x in column_order[-1]]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
437 vals.append(go_type)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
438 fh.write('%s\n' % '\t'.join(tvals + vals))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
439
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
440 def write_ipr_table(outfile, resp, column_order):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
441 with open(outfile, 'w') as fh:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
442 for i, pdict in enumerate(resp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
443 if 'ipr' in pdict:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
444 tvals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_order[0]]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
445 for ipr in pdict['ipr']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
446 if 'code' in ipr:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
447 vals = [str(ipr[x]) if x in ipr and ipr[x] else '' for x in column_order[-1]]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
448 fh.write('%s\n' % '\t'.join(tvals + vals))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
449 else:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
450 for ipr_type in ipr_types:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
451 if ipr_type in ipr:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
452 for _ipr in ipr[ipr_type]:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
453 vals = [str(_ipr[x]) if x in _ipr and _ipr[x] else '' for x in column_order[-1]]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
454 vals.append(ipr_type)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
455 fh.write('%s\n' % '\t'.join(tvals + vals))
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
456
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
457 # Parse Command Line
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
458 parser = optparse.OptionParser()
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
459 # unipept API choice
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
460 parser.add_option('-a', '--api', dest='unipept', default='pept2lca', choices=['pept2lca', 'pept2taxa', 'pept2prot', 'pept2ec', 'pept2go', 'pept2interpro', 'pept2funct', 'peptinfo'],
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
461 help='The unipept application: pept2lca, pept2taxa, pept2prot, pept2ec, pept2go, pept2funct, or peptinfo')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
462 # input files
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
463 parser.add_option('-t', '--tabular', dest='tabular', default=None, help='A tabular file that contains a peptide column')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
464 parser.add_option('-c', '--column', dest='column', type='int', default=0, help='The column (zero-based) in the tabular file that contains peptide sequences')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
465 parser.add_option('-f', '--fasta', dest='fasta', default=None, help='A fasta file containing peptide sequences')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
466 parser.add_option('-m', '--mzid', dest='mzid', default=None, help='A mxIdentML file containing peptide sequences')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
467 parser.add_option('-p', '--pepxml', dest='pepxml', default=None, help='A pepxml file containing peptide sequences')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
468 # Unipept Flags
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
469 parser.add_option('-e', '--equate_il', dest='equate_il', action='store_true', default=False, help='isoleucine (I) and leucine (L) are equated when matching tryptic peptides to UniProt records')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
470 parser.add_option('-x', '--extra', dest='extra', action='store_true', default=False, help='return the complete lineage of the taxonomic lowest common ancestor')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
471 parser.add_option('-n', '--names', dest='names', action='store_true', default=False, help='return the names of all ranks in the lineage of the taxonomic lowest common ancestor')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
472 parser.add_option('-D', '--domains', dest='domains', action='store_true', default=False, help='group response by GO namaspace: biological process, molecular function, cellular component')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
473 parser.add_option('-M', '--max_request', dest='max_request', type='int', default=200, help='The maximum number of entries per unipept request')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
474
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
475 # output fields
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
476 parser.add_option('-A', '--allfields', dest='allfields', action='store_true', default=False, help='inlcude fields: taxon_rank,taxon_id,taxon_name csv and tsv outputs')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
477 # Warn vs Error Flag
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
478 parser.add_option('-S', '--strict', dest='strict', action='store_true', default=False, help='Print exit on invalid peptide')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
479 # output files
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
480 parser.add_option('-J', '--json', dest='json', default=None, help='Output file path for json formatted results')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
481 parser.add_option('-j', '--ec_json', dest='ec_json', default=None, help='Output file path for json formatted results')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
482 parser.add_option('-E', '--ec_tsv', dest='ec_tsv', default=None, help='Output file path for EC TAB-separated-values (.tsv) formatted results')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
483 parser.add_option('-G', '--go_tsv', dest='go_tsv', default=None, help='Output file path for GO TAB-separated-values (.tsv) formatted results')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
484 parser.add_option('-I', '--ipr_tsv', dest='ipr_tsv', default=None, help='Output file path for InterPro TAB-separated-values (.tsv) formatted results')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
485 parser.add_option('-L', '--lineage_tsv', dest='lineage_tsv', default=None, help='Output file path for Lineage TAB-separated-values (.tsv) formatted results')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
486 parser.add_option('-T', '--tsv', dest='tsv', default=None, help='Output file path for TAB-separated-values (.tsv) formatted results')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
487 parser.add_option('-C', '--csv', dest='csv', default=None, help='Output file path for Comma-separated-values (.csv) formatted results')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
488 parser.add_option('-U', '--unmatched', dest='unmatched', default=None, help='Output file path for peptide with no matches')
8
7863f1abcdda planemo upload for repository https://unipept.ugent.be/apidocs commit 8b14b02c5b11232c0c2cc278e3ac492455f2e0a5
galaxyp
parents: 7
diff changeset
489 parser.add_option('-u', '--url', dest='url', default='https://api.unipept.ugent.be/api/v1/', help='unipept url https://api.unipept.ugent.be/api/v1/')
7
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
490 parser.add_option('-P', '--peptide_match', dest='peptide_match', choices=['best', 'full', 'report'], default='best', help='Match whole peptide')
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
491 parser.add_option('--unmatched_aa', dest='unmatched_aa', default=None, help='Show unmatched AA in peptide as')
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
492 # debug
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
493 parser.add_option('-g', '--get', dest='get', action='store_true', default=False, help='Use GET instead of POST')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
494 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turning on debugging')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
495 parser.add_option('-v', '--version', dest='version', action='store_true', default=False, help='print version and exit')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
496 (options, args) = parser.parse_args()
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
497 if options.version:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
498 print('%s' % version)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
499 sys.exit(0)
7
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
500
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
501 def tryptic_match_string(peptide, tryptic_matches):
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
502 if options.unmatched_aa:
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
503 p = peptide.lower()
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
504 for m in tryptic_matches:
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
505 p = p.replace(m.lower(), m)
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
506 return re.sub('[a-z]', options.unmatched_aa, p)
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
507 else:
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
508 return ','.join(tryptic_matches)
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
509
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
510 invalid_ec = 2 if options.strict else None
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
511 peptides = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
512 # Get peptide sequences
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
513 if options.mzid:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
514 peptides += read_mzid(options.mzid)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
515 if options.pepxml:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
516 peptides += read_pepxml(options.pepxml)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
517 if options.tabular:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
518 peptides += read_tabular(options.tabular, options.column)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
519 if options.fasta:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
520 peptides += read_fasta(options.fasta)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
521 if args and len(args) > 0:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
522 for i, peptide in enumerate(args):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
523 if not re.match(pep_pat, peptide):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
524 warn_err('"%s" is not a peptide (arg %d)\n' % (peptide, i), exit_code=invalid_ec)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
525 peptides.append(peptide)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
526 if len(peptides) < 1:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
527 warn_err("No peptides input!", exit_code=1)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
528 column_order = pept2lca_column_order
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
529 if options.unipept == 'pept2prot':
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
530 column_order = pept2prot_extra_column_order if options.extra else pept2prot_column_order
4
4953dcd7dd39 planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty
galaxyp
parents: 3
diff changeset
531 else:
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
532 if options.extra or options.names:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
533 column_order = pept2lca_all_column_order if options.allfields else pept2lca_extra_column_order
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
534 else:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
535 column_order = pept2lca_column_order
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
536 # map to tryptic peptides
7
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
537 if options.peptide_match == 'full':
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
538 pepToParts = {p: [p] for p in peptides}
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
539 else:
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
540 pepToParts = {p: re.split('\n', re.sub(r'(?<=[RK])(?=[^P])', '\n', p)) for p in peptides}
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
541 if options.debug:
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
542 print("column_order: %s\n" % (column_order), file=sys.stderr)
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
543 partToPeps = {}
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
544 for peptide, parts in pepToParts.items():
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
545 if options.debug:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
546 print("peptide: %s\ttryptic: %s\n" % (peptide, parts), file=sys.stderr)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
547 for part in parts:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
548 if len(part) > 50:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
549 warn_err("peptide: %s tryptic fragment len %d > 50 for %s\n" % (peptide, len(part), part), exit_code=None)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
550 if 5 <= len(part) <= 50:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
551 partToPeps.setdefault(part, []).append(peptide)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
552 trypticPeptides = list(partToPeps.keys())
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
553 # unipept
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
554 unipept_resp = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
555 idx = list(range(0, len(trypticPeptides), options.max_request))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
556 idx.append(len(trypticPeptides))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
557 for i in range(len(idx) - 1):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
558 post_data = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
559 if options.equate_il:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
560 post_data.append(('equate_il', 'true'))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
561 if options.names or options.json:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
562 post_data.append(('extra', 'true'))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
563 post_data.append(('names', 'true'))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
564 elif options.extra or options.json:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
565 post_data.append(('extra', 'true'))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
566 if options.domains:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
567 post_data.append(('domains', 'true'))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
568 post_data += [('input[]', x) for x in trypticPeptides[idx[i]:idx[i + 1]]]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
569 if options.debug:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
570 print('post_data: %s\n' % (str(post_data)), file=sys.stderr)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
571 headers = {'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'application/json'}
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
572 url = '%s/%s' % (options.url.rstrip('/'), options.unipept)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
573 if options.get:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
574 params = '&'.join(["%s=%s" % (i[0], i[1]) for i in post_data])
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
575 url = '%s.json?%s' % (url, params)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
576 req = urllib.request.Request(url)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
577 else:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
578 url = '%s.json' % (url)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
579 data = urllib.parse.urlencode(post_data).encode()
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
580 params = '&'.join(["%s=%s" % (i[0], i[1]) for i in post_data])
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
581 if options.debug:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
582 print('data:\n%s\n' % (data), file=sys.stderr)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
583 req = urllib.request.Request(url, headers=headers, data=urllib.parse.urlencode(post_data).encode(), method='POST')
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
584 if options.debug:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
585 print("url: %s\n" % (str(url)), file=sys.stderr)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
586 try:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
587 resp = urllib.request.urlopen(req)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
588 rdata = resp.read()
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
589 if options.debug:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
590 print("%s %s\n" % (url, str(resp.getcode())), file=sys.stderr)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
591 if resp.getcode() == 200:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
592 if options.debug:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
593 print("rdata: \n%s\n\n" % (rdata), file=sys.stderr)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
594 unipept_resp += json.loads(rdata)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
595 # unipept_resp += json.loads(urllib.request.urlopen(req).read())
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
596 except Exception as e:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
597 warn_err('HTTP Error %s\n' % (str(e)), exit_code=None)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
598 unmatched_peptides = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
599 peptideMatches = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
600 if options.debug:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
601 print("unipept response: %s\n" % str(unipept_resp), file=sys.stderr)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
602 if options.unipept in ['pept2prot', 'pept2taxa']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
603 dupkey = 'uniprot_id' if options.unipept == 'pept2prot' else 'taxon_id' # should only keep one of these per input peptide
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
604 # multiple entries per trypticPeptide for pep2prot or pep2taxa
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
605 mapping = {}
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
606 for match in unipept_resp:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
607 mapping.setdefault(match['peptide'], []).append(match)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
608 for peptide in peptides:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
609 # Get the intersection of matches to the tryptic parts
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
610 keyToMatch = None
7
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
611 tryptic_match = []
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
612 for part in pepToParts[peptide]:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
613 if part in mapping:
7
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
614 tryptic_match.append(part)
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
615 temp = {match[dupkey]: match for match in mapping[part]}
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
616 if keyToMatch:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
617 dkeys = set(keyToMatch.keys()) - set(temp.keys())
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
618 for k in dkeys:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
619 del keyToMatch[k]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
620 else:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
621 keyToMatch = temp
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
622 # keyToMatch = keyToMatch.fromkeys([x for x in keyToMatch if x in temp]) if keyToMatch else temp
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
623 if not keyToMatch:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
624 unmatched_peptides.append(peptide)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
625 else:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
626 for key, match in keyToMatch.items():
7
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
627 match['tryptic_match'] = tryptic_match_string(peptide, tryptic_match)
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
628 match['tryptic_peptide'] = match['peptide']
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
629 match['peptide'] = peptide
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
630 peptideMatches.append(match)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
631 elif options.unipept in ['pept2lca', 'peptinfo']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
632 # should be one response per trypticPeptide for pep2lca
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
633 respMap = {v['peptide']: v for v in unipept_resp}
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
634 # map resp back to peptides
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
635 for peptide in peptides:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
636 matches = list()
7
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
637 tryptic_match = []
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
638 for part in pepToParts[peptide]:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
639 if part in respMap:
7
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
640 tryptic_match.append(part)
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
641 matches.append(respMap[part])
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
642 match = best_match(peptide, matches)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
643 if not match:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
644 unmatched_peptides.append(peptide)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
645 longest_tryptic_peptide = sorted(pepToParts[peptide], key=lambda x: len(x))[-1]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
646 match = {'peptide': longest_tryptic_peptide}
7
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
647 match['tryptic_match'] = tryptic_match_string(peptide, tryptic_match)
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
648 match['tryptic_peptide'] = match['peptide']
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
649 match['peptide'] = peptide
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
650 peptideMatches.append(match)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
651 else:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
652 respMap = {v['peptide']: v for v in unipept_resp}
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
653 # map resp back to peptides
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
654 for peptide in peptides:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
655 matches = list()
7
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
656 tryptic_match = []
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
657 for part in pepToParts[peptide]:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
658 if part in respMap and 'total_protein_count' in respMap[part]:
7
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
659 tryptic_match.append(part)
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
660 matches.append(respMap[part])
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
661 match = best_match(peptide, matches)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
662 if not match:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
663 unmatched_peptides.append(peptide)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
664 longest_tryptic_peptide = sorted(pepToParts[peptide], key=lambda x: len(x))[-1]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
665 match = {'peptide': longest_tryptic_peptide}
7
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
666 match['tryptic_match'] = tryptic_match_string(peptide, tryptic_match)
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
667 match['tryptic_peptide'] = match['peptide']
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
668 match['peptide'] = peptide
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
669 peptideMatches.append(match)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
670 resp = peptideMatches
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
671 if options.debug:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
672 print("\nmapped response: %s\n" % str(resp), file=sys.stderr)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
673 # output results
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
674 if not (options.unmatched or options.json or options.tsv or options.csv):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
675 print(str(resp))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
676 if options.unmatched:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
677 with open(options.unmatched, 'w') as outputFile:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
678 for peptide in peptides:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
679 if peptide in unmatched_peptides:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
680 outputFile.write("%s\n" % peptide)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
681 if options.json:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
682 if options.unipept in ['pept2lca', 'pept2taxa', 'peptinfo']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
683 root = get_taxon_json(resp)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
684 with open(options.json, 'w') as outputFile:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
685 outputFile.write(json.dumps(root))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
686 elif options.unipept in ['pept2prot', 'pept2ec', 'pept2go', 'pept2interpro', 'pept2funct']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
687 with open(options.json, 'w') as outputFile:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
688 outputFile.write(str(resp))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
689 if options.ec_json:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
690 if options.unipept in ['pept2ec', 'pept2funct', 'peptinfo']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
691 root = get_ec_json(resp)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
692 with open(options.ec_json, 'w') as outputFile:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
693 outputFile.write(json.dumps(root))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
694 if options.tsv or options.csv:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
695 rows = []
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
696 column_names = None
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
697 if options.unipept in ['pept2ec', 'pept2go', 'pept2interpro', 'pept2funct', 'peptinfo']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
698 taxa = None
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
699 ec_dict = None
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
700 go_dict = None
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
701 ipr_dict = None
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
702 if options.unipept in ['peptinfo']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
703 (taxa, taxon_cols) = get_taxon_dict(resp, column_order, extra=options.extra, names=options.names)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
704 if options.unipept in ['pept2ec', 'pept2funct', 'peptinfo']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
705 (ec_dict, ec_cols) = get_ec_dict(resp, extra=options.extra)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
706 if options.unipept in ['pept2go', 'pept2funct', 'peptinfo']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
707 (go_dict, go_cols) = get_go_dict(resp, extra=options.extra)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
708 if options.unipept in ['pept2interpro', 'pept2funct', 'peptinfo']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
709 (ipr_dict, ipr_cols) = get_ipr_dict(resp, extra=options.extra)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
710 for i, pdict in enumerate(resp):
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
711 peptide = pdict['peptide']
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
712 total_protein_count = str(pdict['total_protein_count']) if 'total_protein_count' in pdict else '0'
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
713 column_names = ['peptide', 'total_protein_count']
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
714 vals = [peptide, total_protein_count]
7
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
715 if options.peptide_match == 'report':
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
716 column_names = ['peptide', 'tryptic_match', 'total_protein_count']
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
717 tryptic_match = pdict.get('tryptic_match', '')
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
718 vals = [peptide, tryptic_match, total_protein_count]
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
719 if ec_dict:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
720 vals += ec_dict[peptide]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
721 column_names += ec_cols
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
722 if go_dict:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
723 vals += go_dict[peptide]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
724 column_names += go_cols
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
725 if ipr_dict:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
726 vals += ipr_dict[peptide]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
727 column_names += ipr_cols
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
728 if taxa:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
729 vals += taxa[peptide][1:]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
730 column_names += taxon_cols[1:]
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
731 rows.append(vals)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
732 elif options.unipept in ['pept2lca', 'pept2taxa', 'pept2prot']:
7
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
733 if options.peptide_match == 'report':
75b3b3d0adbf "planemo upload for repository http://unipept.ugent.be/apidocs commit b6707ea113b2a89b0bb8072dfcc9ceeef4a1b708"
galaxyp
parents: 6
diff changeset
734 column_order.insert(1, 'tryptic_match')
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
735 (taxa, taxon_cols) = get_taxon_dict(resp, column_order, extra=options.extra, names=options.names)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
736 column_names = taxon_cols
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
737 rows = list(taxa.values())
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
738 if options.tsv:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
739 with open(options.tsv, 'w') as outputFile:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
740 if column_names:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
741 outputFile.write("#%s\n" % '\t'.join(column_names))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
742 for vals in rows:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
743 outputFile.write("%s\n" % '\t'.join(vals))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
744 if options.csv:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
745 with open(options.csv, 'w') as outputFile:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
746 if column_names:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
747 outputFile.write("%s\n" % ','.join(column_names))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
748 for vals in rows:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
749 outputFile.write("%s\n" % ','.join(['"%s"' % (v if v else '') for v in vals]))
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
750 if options.ec_tsv and options.unipept in ['pept2ec', 'pept2funct', 'peptinfo']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
751 column_order = pept2ec_extra_column_order if options.extra else pept2ec_column_order
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
752 write_ec_table(options.ec_tsv, resp, column_order)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
753 if options.go_tsv and options.unipept in ['pept2go', 'pept2funct', 'peptinfo']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
754 column_order = pept2go_extra_column_order if options.extra else pept2go_column_order
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
755 write_go_table(options.go_tsv, resp, column_order)
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
756 if options.ipr_tsv and options.unipept in ['pept2interpro', 'pept2funct', 'peptinfo']:
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
757 column_order = pept2interpro_extra_column_order if options.extra else pept2interpro_column_order
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
758 write_ipr_table(options.ipr_tsv, resp, column_order)
0
6430407e5869 Uploaded
galaxyp
parents:
diff changeset
759
5
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
760
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
761 if __name__ == "__main__":
917fd3ebc223 "planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty"
galaxyp
parents: 4
diff changeset
762 __main__()