Mercurial > repos > gga > apollo_export
comparison create_features_from_gff3.py @ 10:c3b5bc8b4080 draft
"planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/apollo commit 08015be1ee8a784e0619f961aaa724857debfd6f"
author | gga |
---|---|
date | Mon, 02 Dec 2019 05:43:20 -0500 |
parents | 950fb2bf116d |
children | b763acecfb57 |
comparison
equal
deleted
inserted
replaced
9:f45ad96c9e08 | 10:c3b5bc8b4080 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 import argparse | 2 import argparse |
3 import logging | 3 import logging |
4 import sys | |
5 import time | |
6 | 4 |
7 from BCBio import GFF | 5 from apollo import accessible_organisms |
6 from apollo.util import GuessOrg, OrgOrGuess | |
8 | 7 |
9 from six.moves.builtins import str | 8 from arrow.apollo import get_apollo_instance |
10 | 9 |
11 from webapollo import GuessOrg, OrgOrGuess, PermissionCheck, WAAuth, WebApolloInstance, featuresToFeatureSchema, retry | 10 from webapollo import UserObj, handle_credentials |
12 logging.basicConfig(level=logging.INFO) | 11 logging.basicConfig(level=logging.INFO) |
13 log = logging.getLogger(__name__) | 12 log = logging.getLogger(__name__) |
14 | 13 |
15 | 14 |
16 if __name__ == '__main__': | 15 if __name__ == '__main__': |
17 parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') | 16 parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') |
18 WAAuth(parser) | |
19 parser.add_argument('email', help='User Email') | 17 parser.add_argument('email', help='User Email') |
20 parser.add_argument('--source', help='URL where the input dataset can be found.') | 18 parser.add_argument('--source', help='URL where the input dataset can be found.') |
21 OrgOrGuess(parser) | 19 OrgOrGuess(parser) |
22 | 20 |
23 parser.add_argument('gff3', type=argparse.FileType('r'), help='GFF3 file') | 21 parser.add_argument('gff3', type=argparse.FileType('r'), help='GFF3 file') |
24 args = parser.parse_args() | 22 args = parser.parse_args() |
25 | 23 |
26 wa = WebApolloInstance(args.apollo, args.username, args.password) | 24 wa = get_apollo_instance() |
27 # User must have an account | 25 # User must have an account |
28 gx_user = wa.users.assertOrCreateUser(args.email) | 26 gx_user = UserObj(**wa.users._assert_or_create_user(args.email)) |
27 handle_credentials(gx_user) | |
29 | 28 |
30 # Get organism | 29 # Get organism |
31 org_cn = GuessOrg(args, wa) | 30 org_cn = GuessOrg(args, wa) |
32 if isinstance(org_cn, list): | 31 if isinstance(org_cn, list): |
33 org_cn = org_cn[0] | 32 org_cn = org_cn[0] |
34 | 33 |
35 if not PermissionCheck(gx_user, org_cn, "WRITE"): | 34 all_orgs = wa.organisms.get_organisms() |
36 raise Exception("Action not permitted") | 35 if 'error' in all_orgs: |
37 org = wa.organisms.findOrganismByCn(org_cn) | 36 all_orgs = [] |
37 all_orgs = [org['commonName'] for org in all_orgs] | |
38 if org_cn not in all_orgs: | |
39 raise Exception("Could not find organism %s" % org_cn) | |
38 | 40 |
39 bad_quals = ['date_creation', 'source', 'owner', 'date_last_modified', 'Name', 'ID'] | 41 orgs = accessible_organisms(gx_user, [org_cn], 'WRITE') |
42 if not orgs: | |
43 raise Exception("You do not have write permission on this organism") | |
40 | 44 |
41 sys.stdout.write('# ') | 45 wa.annotations.load_gff3(org_cn, args.gff3, args.source) |
42 sys.stdout.write('\t'.join(['Feature ID', 'Apollo ID', 'Success', 'Messages'])) | |
43 sys.stdout.write('\n') | |
44 # print(wa.annotations.getFeatures()) | |
45 for rec in GFF.parse(args.gff3): | |
46 wa.annotations.setSequence(rec.id, org['id']) | |
47 for feature in rec.features: | |
48 # We can only handle genes right now | |
49 if feature.type not in ('gene', 'terminator'): | |
50 continue | |
51 # Convert the feature into a presentation that Apollo will accept | |
52 featureData = featuresToFeatureSchema([feature]) | |
53 if 'children' in featureData[0] and any([child['type']['name'] == 'tRNA' for child in featureData[0]['children']]): | |
54 # We're experiencing a (transient?) problem where gene_001 to | |
55 # gene_025 will be rejected. Thus, hardcode to a known working | |
56 # gene name and update later. | |
57 | |
58 featureData[0]['name'] = 'tRNA_000' | |
59 tRNA_sf = [child for child in feature.sub_features if child.type == 'tRNA'][0] | |
60 tRNA_type = 'tRNA-' + tRNA_sf.qualifiers.get('Codon', ["Unk"])[0] | |
61 | |
62 if 'Name' in feature.qualifiers: | |
63 if feature.qualifiers['Name'][0].startswith('tRNA-'): | |
64 tRNA_type = feature.qualifiers['Name'][0] | |
65 | |
66 newfeature = wa.annotations.addFeature(featureData, trustme=True) | |
67 | |
68 def func0(): | |
69 wa.annotations.setName( | |
70 newfeature['features'][0]['uniquename'], | |
71 tRNA_type, | |
72 ) | |
73 retry(func0) | |
74 | |
75 if args.source: | |
76 gene_id = newfeature['features'][0]['parent_id'] | |
77 | |
78 def setSource(): | |
79 wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]}) | |
80 retry(setSource) | |
81 | |
82 sys.stdout.write('\t'.join([ | |
83 feature.id, | |
84 newfeature['features'][0]['uniquename'], | |
85 'success', | |
86 ])) | |
87 elif featureData[0]['type']['name'] == 'terminator': | |
88 # We're experiencing a (transient?) problem where gene_001 to | |
89 # gene_025 will be rejected. Thus, hardcode to a known working | |
90 # gene name and update later. | |
91 featureData[0]['name'] = 'terminator_000' | |
92 newfeature = wa.annotations.addFeature(featureData, trustme=True) | |
93 | |
94 def func0(): | |
95 wa.annotations.setName( | |
96 newfeature['features'][0]['uniquename'], | |
97 'terminator' | |
98 ) | |
99 | |
100 retry(func0) | |
101 | |
102 if args.source: | |
103 gene_id = newfeature['features'][0]['parent_id'] | |
104 | |
105 def setSource(): | |
106 wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]}) | |
107 retry(setSource) | |
108 | |
109 sys.stdout.write('\t'.join([ | |
110 feature.id, | |
111 newfeature['features'][0]['uniquename'], | |
112 'success', | |
113 ])) | |
114 else: | |
115 try: | |
116 # We're experiencing a (transient?) problem where gene_001 to | |
117 # gene_025 will be rejected. Thus, hardcode to a known working | |
118 # gene name and update later. | |
119 featureData[0]['name'] = 'gene_000' | |
120 # Extract CDS feature from the feature data, this will be used | |
121 # to set the CDS location correctly (apollo currently screwing | |
122 # this up (2.0.6)) | |
123 CDS = featureData[0]['children'][0]['children'] | |
124 CDS = [x for x in CDS if x['type']['name'] == 'CDS'][0]['location'] | |
125 # Create the new feature | |
126 newfeature = wa.annotations.addFeature(featureData, trustme=True) | |
127 # Extract the UUIDs that apollo returns to us | |
128 mrna_id = newfeature['features'][0]['uniquename'] | |
129 gene_id = newfeature['features'][0]['parent_id'] | |
130 # Sleep to give it time to actually persist the feature. Apollo | |
131 # is terrible about writing + immediately reading back written | |
132 # data. | |
133 time.sleep(1) | |
134 # Correct the translation start, but with strand specific log | |
135 if CDS['strand'] == 1: | |
136 wa.annotations.setTranslationStart(mrna_id, min(CDS['fmin'], CDS['fmax'])) | |
137 else: | |
138 wa.annotations.setTranslationStart(mrna_id, max(CDS['fmin'], CDS['fmax']) - 1) | |
139 | |
140 # Finally we set the name, this should be correct. | |
141 time.sleep(0.5) | |
142 wa.annotations.setName(mrna_id, feature.qualifiers.get('product', feature.qualifiers.get('Name', ["Unknown"]))[0]) | |
143 time.sleep(0.5) | |
144 | |
145 def func(): | |
146 wa.annotations.setName(gene_id, feature.qualifiers.get('product', feature.qualifiers.get('Name', ["Unknown"]))[0]) | |
147 retry(func) | |
148 | |
149 if args.source: | |
150 gene_id = newfeature['features'][0]['parent_id'] | |
151 | |
152 def setSource(): | |
153 wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]}) | |
154 retry(setSource) | |
155 extra_attr = {} | |
156 for (key, values) in feature.qualifiers.items(): | |
157 if key in bad_quals: | |
158 continue | |
159 | |
160 if key == 'Note': | |
161 def func2(): | |
162 wa.annotations.addComments(gene_id, values) | |
163 retry(func2) | |
164 else: | |
165 extra_attr[key] = values | |
166 | |
167 def func3(): | |
168 wa.annotations.addAttributes(gene_id, extra_attr) | |
169 retry(func3) | |
170 | |
171 sys.stdout.write('\t'.join([ | |
172 feature.id, | |
173 gene_id, | |
174 'success', | |
175 ])) | |
176 except Exception as e: | |
177 msg = str(e) | |
178 if '\n' in msg: | |
179 msg = msg[0:msg.index('\n')] | |
180 sys.stdout.write('\t'.join([ | |
181 feature.id, | |
182 '', | |
183 'ERROR', | |
184 msg | |
185 ])) | |
186 sys.stdout.write('\n') | |
187 sys.stdout.flush() |