comparison data_manager/funannotate.py @ 1:8dff71edbce5 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_funannotate commit d1ebc78155f57c87d8e82c9855b176428e9803ad"
author iuc
date Thu, 18 Nov 2021 21:55:43 +0000
parents ef7f9e2f32f2
children 13018941c6a0
comparison
equal deleted inserted replaced
0:ef7f9e2f32f2 1:8dff71edbce5
4 import json 4 import json
5 import operator 5 import operator
6 import os 6 import os
7 import subprocess 7 import subprocess
8 import sys 8 import sys
9 import tarfile
9 from datetime import datetime 10 from datetime import datetime
11
12 import requests
13
14 # Some additional busco/orthodb10 datasets that can be added to funannotate db
15 # Will probably not be needed anymore in future versions of funannotate when it
16 # will use a recent busco version
17 BUSCO_10_DATASETS_URL = "https://busco-data.ezlab.org/v5/data/lineages/{dataset}"
18 BUSCO_10_DATASETS = [
19 "acidobacteria_odb10.2020-03-06.tar.gz",
20 "aconoidasida_odb10.2020-08-05.tar.gz",
21 "actinobacteria_class_odb10.2021-02-23.tar.gz",
22 "actinobacteria_phylum_odb10.2021-02-23.tar.gz",
23 "actinopterygii_odb10.2021-02-19.tar.gz",
24 "agaricales_odb10.2020-08-05.tar.gz",
25 "agaricomycetes_odb10.2020-08-05.tar.gz",
26 "alphabaculovirus_odb10.2020-11-26.tar.gz",
27 "alphaherpesvirinae_odb10.2020-11-26.tar.gz",
28 "alphaproteobacteria_odb10.2021-02-23.tar.gz",
29 "alteromonadales_odb10.2021-02-23.tar.gz",
30 "alveolata_odb10.2020-09-10.tar.gz",
31 "apicomplexa_odb10.2020-09-10.tar.gz",
32 "aquificae_odb10.2021-02-23.tar.gz",
33 "arachnida_odb10.2020-08-05.tar.gz",
34 "archaea_odb10.2021-02-23.tar.gz",
35 "arthropoda_odb10.2020-09-10.tar.gz",
36 "ascomycota_odb10.2020-09-10.tar.gz",
37 "aves_odb10.2021-02-19.tar.gz",
38 "aviadenovirus_odb10.2020-11-26.tar.gz",
39 "bacillales_odb10.2021-02-23.tar.gz",
40 "bacilli_odb10.2021-02-23.tar.gz",
41 "bacteria_odb10.2020-03-06.tar.gz",
42 "bacteroidales_odb10.2021-02-23.tar.gz",
43 "bacteroidetes-chlorobi_group_odb10.2021-02-23.tar.gz",
44 "bacteroidetes_odb10.2021-02-23.tar.gz",
45 "bacteroidia_odb10.2021-02-23.tar.gz",
46 "baculoviridae_odb10.2020-11-26.tar.gz",
47 "basidiomycota_odb10.2020-09-10.tar.gz",
48 "bclasvirinae_odb10.2020-11-26.tar.gz",
49 "betabaculovirus_odb10.2020-11-26.tar.gz",
50 "betaherpesvirinae_odb10.2020-11-26.tar.gz",
51 "betaproteobacteria_odb10.2021-02-23.tar.gz",
52 "boletales_odb10.2020-08-05.tar.gz",
53 "brassicales_odb10.2020-08-05.tar.gz",
54 "burkholderiales_odb10.2021-02-23.tar.gz",
55 "campylobacterales_odb10.2020-03-06.tar.gz",
56 "capnodiales_odb10.2020-08-05.tar.gz",
57 "carnivora_odb10.2021-02-19.tar.gz",
58 "cellvibrionales_odb10.2020-03-06.tar.gz",
59 "cetartiodactyla_odb10.2021-02-19.tar.gz",
60 "chaetothyriales_odb10.2020-08-05.tar.gz",
61 "cheoctovirus_odb10.2020-11-26.tar.gz",
62 "chlamydiae_odb10.2020-03-06.tar.gz",
63 "chlorobi_odb10.2020-03-06.tar.gz",
64 "chloroflexi_odb10.2020-03-06.tar.gz",
65 "chlorophyta_odb10.2020-08-05.tar.gz",
66 "chordopoxvirinae_odb10.2020-11-26.tar.gz",
67 "chromatiales_odb10.2020-03-06.tar.gz",
68 "chroococcales_odb10.2020-03-06.tar.gz",
69 "clostridia_odb10.2020-03-06.tar.gz",
70 "clostridiales_odb10.2020-03-06.tar.gz",
71 "coccidia_odb10.2020-08-05.tar.gz",
72 "coriobacteriales_odb10.2020-03-06.tar.gz",
73 "coriobacteriia_odb10.2020-03-06.tar.gz",
74 "corynebacteriales_odb10.2020-03-06.tar.gz",
75 "cyanobacteria_odb10.2021-02-23.tar.gz",
76 "cyprinodontiformes_odb10.2021-02-19.tar.gz",
77 "cytophagales_odb10.2021-02-23.tar.gz",
78 "cytophagia_odb10.2021-02-23.tar.gz",
79 "delta-epsilon-subdivisions_odb10.2021-02-23.tar.gz",
80 "deltaproteobacteria_odb10.2021-02-23.tar.gz",
81 "desulfobacterales_odb10.2020-03-06.tar.gz",
82 "desulfovibrionales_odb10.2021-02-23.tar.gz",
83 "desulfurococcales_odb10.2021-02-23.tar.gz",
84 "desulfuromonadales_odb10.2020-03-06.tar.gz",
85 "diptera_odb10.2020-08-05.tar.gz",
86 "dothideomycetes_odb10.2020-08-05.tar.gz",
87 "embryophyta_odb10.2020-09-10.tar.gz",
88 "endopterygota_odb10.2020-09-10.tar.gz",
89 "enquatrovirus_odb10.2021-05-05.tar.gz",
90 "enterobacterales_odb10.2021-02-23.tar.gz",
91 "entomoplasmatales_odb10.2020-03-06.tar.gz",
92 "epsilonproteobacteria_odb10.2020-03-06.tar.gz",
93 "euarchontoglires_odb10.2021-02-19.tar.gz",
94 "eudicots_odb10.2020-09-10.tar.gz",
95 "euglenozoa_odb10.2020-08-05.tar.gz",
96 "eukaryota_odb10.2020-09-10.tar.gz",
97 "eurotiales_odb10.2020-08-05.tar.gz",
98 "eurotiomycetes_odb10.2020-08-05.tar.gz",
99 "euryarchaeota_odb10.2021-02-23.tar.gz",
100 "eutheria_odb10.2021-02-19.tar.gz",
101 "fabales_odb10.2020-08-05.tar.gz",
102 "firmicutes_odb10.2021-02-23.tar.gz",
103 "flavobacteriales_odb10.2021-02-23.tar.gz",
104 "flavobacteriia_odb10.2021-02-23.tar.gz",
105 "fromanvirus_odb10.2020-11-26.tar.gz",
106 "fungi_odb10.2021-06-28.tar.gz",
107 "fusobacteria_odb10.2020-03-06.tar.gz",
108 "fusobacteriales_odb10.2020-03-06.tar.gz",
109 "gammaherpesvirinae_odb10.2020-11-26.tar.gz",
110 "gammaproteobacteria_odb10.2021-02-23.tar.gz",
111 "glires_odb10.2021-02-19.tar.gz",
112 "glomerellales_odb10.2020-08-05.tar.gz",
113 "guernseyvirinae_odb10.2020-11-26.tar.gz",
114 "halobacteria_odb10.2021-02-23.tar.gz",
115 "halobacteriales_odb10.2021-02-23.tar.gz",
116 "haloferacales_odb10.2021-02-23.tar.gz",
117 "helotiales_odb10.2020-08-05.tar.gz",
118 "hemiptera_odb10.2020-08-05.tar.gz",
119 "herpesviridae_odb10.2020-11-26.tar.gz",
120 "hymenoptera_odb10.2020-08-05.tar.gz",
121 "hypocreales_odb10.2020-08-05.tar.gz",
122 "insecta_odb10.2020-09-10.tar.gz",
123 "iridoviridae_odb10.2020-11-26.tar.gz",
124 "lactobacillales_odb10.2020-03-06.tar.gz",
125 "laurasiatheria_odb10.2021-02-19.tar.gz",
126 "legionellales_odb10.2020-03-06.tar.gz",
127 "leotiomycetes_odb10.2020-08-05.tar.gz",
128 "lepidoptera_odb10.2020-08-05.tar.gz",
129 "liliopsida_odb10.2020-09-10.tar.gz",
130 "mammalia_odb10.2021-02-19.tar.gz",
131 "metazoa_odb10.2021-02-24.tar.gz",
132 "methanobacteria_odb10.2021-02-23.tar.gz",
133 "methanococcales_odb10.2021-02-23.tar.gz",
134 "methanomicrobia_odb10.2021-02-23.tar.gz",
135 "methanomicrobiales_odb10.2021-02-23.tar.gz",
136 "micrococcales_odb10.2021-02-23.tar.gz",
137 "microsporidia_odb10.2020-08-05.tar.gz",
138 "mollicutes_odb10.2020-03-06.tar.gz",
139 "mollusca_odb10.2020-08-05.tar.gz",
140 "mucorales_odb10.2020-08-05.tar.gz",
141 "mucoromycota_odb10.2020-08-05.tar.gz",
142 "mycoplasmatales_odb10.2020-03-06.tar.gz",
143 "natrialbales_odb10.2021-02-23.tar.gz",
144 "neisseriales_odb10.2021-02-23.tar.gz",
145 "nematoda_odb10.2020-08-05.tar.gz",
146 "nitrosomonadales_odb10.2020-03-06.tar.gz",
147 "nostocales_odb10.2020-03-06.tar.gz",
148 "oceanospirillales_odb10.2020-03-06.tar.gz",
149 "onygenales_odb10.2020-08-05.tar.gz",
150 "oscillatoriales_odb10.2021-02-23.tar.gz",
151 "pahexavirus_odb10.2020-11-26.tar.gz",
152 "passeriformes_odb10.2021-02-19.tar.gz",
153 "pasteurellales_odb10.2021-02-23.tar.gz",
154 "peduovirus_odb10.2021-02-23.tar.gz",
155 "planctomycetes_odb10.2020-03-06.tar.gz",
156 "plasmodium_odb10.2020-08-05.tar.gz",
157 "pleosporales_odb10.2020-08-05.tar.gz",
158 "poales_odb10.2020-08-05.tar.gz",
159 "polyporales_odb10.2020-08-05.tar.gz",
160 "poxviridae_odb10.2020-11-26.tar.gz",
161 "primates_odb10.2021-02-19.tar.gz",
162 "propionibacteriales_odb10.2020-03-06.tar.gz",
163 "proteobacteria_odb10.2021-02-23.tar.gz",
164 "pseudomonadales_odb10.2020-03-06.tar.gz",
165 "rhizobiales_odb10.2020-03-06.tar.gz",
166 "rhizobium-agrobacterium_group_odb10.2020-03-06.tar.gz",
167 "rhodobacterales_odb10.2021-02-23.tar.gz",
168 "rhodospirillales_odb10.2020-03-06.tar.gz",
169 "rickettsiales_odb10.2020-03-06.tar.gz",
170 "rudiviridae_odb10.2020-11-26.tar.gz",
171 "saccharomycetes_odb10.2020-08-05.tar.gz",
172 "sauropsida_odb10.2021-02-19.tar.gz",
173 "selenomonadales_odb10.2020-03-06.tar.gz",
174 "simplexvirus_odb10.2020-11-26.tar.gz",
175 "skunavirus_odb10.2020-11-26.tar.gz",
176 "solanales_odb10.2020-08-05.tar.gz",
177 "sordariomycetes_odb10.2020-08-05.tar.gz",
178 "sphingobacteriia_odb10.2020-03-06.tar.gz",
179 "sphingomonadales_odb10.2021-02-23.tar.gz",
180 "spirochaetales_odb10.2020-03-06.tar.gz",
181 "spirochaetes_odb10.2021-02-23.tar.gz",
182 "spirochaetia_odb10.2021-02-23.tar.gz",
183 "spounavirinae_odb10.2020-11-26.tar.gz",
184 "stramenopiles_odb10.2020-08-05.tar.gz",
185 "streptomycetales_odb10.2020-03-06.tar.gz",
186 "streptosporangiales_odb10.2020-03-06.tar.gz",
187 "sulfolobales_odb10.2021-02-23.tar.gz",
188 "synechococcales_odb10.2020-03-06.tar.gz",
189 "synergistetes_odb10.2020-03-06.tar.gz",
190 "tenericutes_odb10.2020-03-06.tar.gz",
191 "tequatrovirus_odb10.2020-11-26.tar.gz",
192 "teseptimavirus_odb10.2020-11-26.tar.gz",
193 "tetrapoda_odb10.2021-02-19.tar.gz",
194 "tevenvirinae_odb10.2021-02-23.tar.gz",
195 "thaumarchaeota_odb10.2021-02-23.tar.gz",
196 "thermoanaerobacterales_odb10.2020-03-06.tar.gz",
197 "thermoplasmata_odb10.2021-02-23.tar.gz",
198 "thermoproteales_odb10.2021-02-23.tar.gz",
199 "thermoprotei_odb10.2021-02-23.tar.gz",
200 "thermotogae_odb10.2020-03-06.tar.gz",
201 "thiotrichales_odb10.2020-03-06.tar.gz",
202 "tissierellales_odb10.2020-03-06.tar.gz",
203 "tissierellia_odb10.2020-03-06.tar.gz",
204 "tremellomycetes_odb10.2020-08-05.tar.gz",
205 "tunavirinae_odb10.2020-11-26.tar.gz",
206 "varicellovirus_odb10.2020-11-26.tar.gz",
207 "verrucomicrobia_odb10.2020-03-06.tar.gz",
208 "vertebrata_odb10.2021-02-19.tar.gz",
209 "vibrionales_odb10.2020-03-06.tar.gz",
210 "viridiplantae_odb10.2020-09-10.tar.gz",
211 "xanthomonadales_odb10.2020-03-06.tar.gz",
212 ]
213
214
215 def download_file(url, dest):
216 with requests.get(url, stream=True) as r:
217 r.raise_for_status()
218 with open(dest, 'wb') as f:
219 for chunk in r.iter_content(chunk_size=8192):
220 f.write(chunk)
10 221
11 222
12 if __name__ == "__main__": 223 if __name__ == "__main__":
13 224
14 parser = argparse.ArgumentParser() 225 parser = argparse.ArgumentParser()
36 return_code = proc.wait() 247 return_code = proc.wait()
37 if return_code: 248 if return_code:
38 print("Error downloading Funannotate database.", file=sys.stderr) 249 print("Error downloading Funannotate database.", file=sys.stderr)
39 sys.exit(return_code) 250 sys.exit(return_code)
40 251
252 # Download newer busco datasets from orthodb 10
253 if args.partial:
254 BUSCO_10_DATASETS = BUSCO_10_DATASETS[:1]
255
256 for busco_dataset in BUSCO_10_DATASETS:
257 print("Downloading additional busco orthodb10 dataset %s" % busco_dataset)
258 dest_tar = os.path.join(output_directory, busco_dataset)
259 download_file(BUSCO_10_DATASETS_URL.format(dataset=busco_dataset), dest_tar)
260 print("Extracting %s" % busco_dataset)
261 tar = tarfile.open(dest_tar, "r:gz")
262 tar.extractall(output_directory)
263 tar.close()
264 os.remove(dest_tar)
265
41 version_id = datetime.today().strftime('%Y-%m-%d-%H%M%S') 266 version_id = datetime.today().strftime('%Y-%m-%d-%H%M%S')
42 267
43 version = '1.0' 268 version = '1.0'
44 269
45 data_manager_dict["data_tables"][args.datatable_name].append( 270 data_manager_dict["data_tables"][args.datatable_name].append(