Mercurial > repos > iuc > data_manager_dada2
annotate data_manager/data_manager.py @ 3:c29275496061 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit 8533fe71d1d50f09348da2dc34941724407a1ffe"
author | iuc |
---|---|
date | Tue, 14 Jul 2020 07:37:09 -0400 |
parents | a6a81f921701 |
children | 38ccd4589990 |
rev | line source |
---|---|
0
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
1 import argparse |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
2 import json |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
3 import os |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
4 try: |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
5 # For Python 3.0 and later |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
6 from urllib.request import Request, urlopen |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
7 except ImportError: |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
8 # Fall back to Python 2 imports |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
9 from urllib2 import Request, urlopen |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
10 |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
11 DEFAULT_TAXLEVELS = "Kingdom,Phylum,Class,Order,Family,Genus,Species" |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
12 |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
13 FILE2NAME = { |
2
a6a81f921701
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit a096d3eb593edee6cb08247be3a09138d088a6ce"
iuc
parents:
1
diff
changeset
|
14 "silva_138": "Silva version 138", |
0
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
15 "silva_132": "Silva version 132", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
16 "silva_128": "Silva version 128", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
17 "rdp_16": "RDP trainset 16", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
18 "rdp_14": "RDP trainset 14", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
19 "greengenes_13.84": "GreenGenes version 13.84", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
20 "unite_8.0_fungi": "UNITE: General Fasta release 8.0 for Fungi", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
21 "unite_8.0_fungi_singletons": "UNITE: General Fasta release 8.0 for Fungi including global and 97% singletons", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
22 "RefSeq_RDP_2018_05": "NCBI RefSeq 16S rRNA database supplemented by RDP (05/2018)", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
23 "gtdb_2018_11": "GTDB: Genome Taxonomy Database (Bacteria & Archaea) (11/2018)", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
24 "hitdb_1": "HitDB version 1 (Human InTestinal 16S rRNA)", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
25 "silva_euk_18S_132": "Silva version 132 Eukaryotic 18S", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
26 "PR2_4.11.1": "Protist Ribosomal Reference database (PR2) 4.11.1" |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
27 } |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
28 |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
29 FILE2TAXURL = { |
2
a6a81f921701
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit a096d3eb593edee6cb08247be3a09138d088a6ce"
iuc
parents:
1
diff
changeset
|
30 "silva_138": "https://zenodo.org/record/3731176/files/silva_nr_v138_train_set.fa.gz?download=1", |
0
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
31 "silva_132": "https://zenodo.org/record/1172783/files/silva_nr_v132_train_set.fa.gz?download=1", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
32 "silva_128": "https://zenodo.org/record/824551/files/silva_nr_v128_train_set.fa.gz?download=1", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
33 "rdp_16": "https://zenodo.org/record/801828/files/rdp_train_set_16.fa.gz?download=1", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
34 "rdp_14": "https://zenodo.org/record/158955/files/rdp_train_set_14.fa.gz?download=1", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
35 "unite_8.0_fungi": "https://files.plutof.ut.ee/public/orig/EB/0C/EB0CCB3A871B77EA75E472D13926271076904A588D2E1C1EA5AFCF7397D48378.zip", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
36 "unite_8.0_fungi_singletons": "https://files.plutof.ut.ee/doi/06/A2/06A2C86256EED64085670EB0C54B7115F6DAC8F311C656A9CB33E386CFABA0D0.zip", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
37 "greengenes_13.84": "https://zenodo.org/record/158955/files/gg_13_8_train_set_97.fa.gz?download=1", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
38 "RefSeq_RDP_2018_05": "https://zenodo.org/record/2541239/files/RefSeq-RDP16S_v2_May2018.fa.gz?download=1", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
39 "gtdb_2018_11": "https://zenodo.org/record/2541239/files/GTDB_bac-arc_ssu_r86.fa.gz?download=1", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
40 "hitdb_1": "https://zenodo.org/record/159205/files/hitdb_v1.00.fa.gz?download=1", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
41 "silva_euk_18S_132": "https://zenodo.org/record/1447330/files/silva_132.18s.99_rep_set.dada2.fa.gz?download=1", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
42 "PR2_4.11.1": "https://github.com/pr2database/pr2database/releases/download/4.11.1/pr2_version_4.11.1_dada2.fasta.gz" |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
43 } |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
44 |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
45 FILE2SPECIESURL = { |
2
a6a81f921701
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit a096d3eb593edee6cb08247be3a09138d088a6ce"
iuc
parents:
1
diff
changeset
|
46 "silva_138": "https://zenodo.org/record/3731176/files/silva_species_assignment_v138.fa.gz?download=1", |
0
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
47 "silva_132": "https://zenodo.org/record/1172783/files/silva_species_assignment_v132.fa.gz?download=1", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
48 "silva_128": "https://zenodo.org/record/824551/files/silva_species_assignment_v128.fa.gz?download=1", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
49 "rdp_16": "https://zenodo.org/record/801828/files/rdp_species_assignment_16.fa.gz?download=1", |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
50 "rdp_14": "https://zenodo.org/record/158955/files/rdp_species_assignment_14.fa.gz?download=1" |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
51 } |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
52 |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
53 FILE2TAXLEVELS = { |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
54 "PR2_4.11.1": "Kingdom,Supergroup,Division,Class,Order,Family,Genus,Species" |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
55 } |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
56 |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
57 |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
58 def url_download(url, fname, workdir): |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
59 """ |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
60 download url to workdir/fname |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
61 """ |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
62 file_path = os.path.join(workdir, fname) |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
63 if not os.path.exists(workdir): |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
64 os.makedirs(workdir) |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
65 src = None |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
66 dst = None |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
67 try: |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
68 req = Request(url) |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
69 src = urlopen(req) |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
70 with open(file_path, 'wb') as dst: |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
71 while True: |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
72 chunk = src.read(2**10) |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
73 if chunk: |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
74 dst.write(chunk) |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
75 else: |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
76 break |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
77 finally: |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
78 if src: |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
79 src.close() |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
80 |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
81 # special treatment of UNITE DBs: they are zip files containing two fasta (xyz.fasta and developer/xyz.fasta) |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
82 if fname.startswith("unite"): |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
83 import glob |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
84 import gzip |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
85 import shutil |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
86 import zipfile |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
87 # unzip download |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
88 zip_ref = zipfile.ZipFile(file_path, 'r') |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
89 zip_ref.extractall(workdir) |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
90 zip_ref.close() |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
91 # gzip top level fasta file |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
92 fastas = glob.glob("%s/*fasta" % workdir) |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
93 if len(fastas) != 1: |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
94 msg = "UNITE download %s contained %d fasta file(s): %s" % (url, len(fastas), " ".join(fastas)) |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
95 raise Exception(msg) |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
96 with open(fastas[0], 'rb') as f_in: |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
97 with gzip.open(file_path, 'wb') as f_out: |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
98 shutil.copyfileobj(f_in, f_out) |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
99 |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
100 |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
101 def remote_dataset(dataset, outjson): |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
102 |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
103 with open(outjson) as jf: |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
104 params = json.loads(jf.read()) |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
105 |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
106 workdir = params['output_data'][0]['extra_files_path'] |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
107 os.mkdir(workdir) |
3
c29275496061
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit 8533fe71d1d50f09348da2dc34941724407a1ffe"
iuc
parents:
2
diff
changeset
|
108 url_download(FILE2TAXURL[dataset], dataset + ".taxonomy", workdir) |
0
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
109 |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
110 data_manager_json = {"data_tables": {}} |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
111 data_manager_entry = {} |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
112 data_manager_entry['value'] = dataset |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
113 data_manager_entry['name'] = FILE2NAME[dataset] |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
114 data_manager_entry['path'] = dataset + ".taxonomy" |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
115 data_manager_entry['taxlevels'] = FILE2TAXLEVELS.get(dataset, DEFAULT_TAXLEVELS) |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
116 data_manager_json["data_tables"]["dada2_taxonomy"] = data_manager_entry |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
117 |
3
c29275496061
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit 8533fe71d1d50f09348da2dc34941724407a1ffe"
iuc
parents:
2
diff
changeset
|
118 if FILE2SPECIESURL.get(dataset, False): |
c29275496061
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit 8533fe71d1d50f09348da2dc34941724407a1ffe"
iuc
parents:
2
diff
changeset
|
119 url_download(FILE2SPECIESURL[dataset], dataset + ".species", workdir) |
0
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
120 data_manager_entry = {} |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
121 data_manager_entry['value'] = dataset |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
122 data_manager_entry['name'] = FILE2NAME[dataset] |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
123 data_manager_entry['path'] = dataset + ".species" |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
124 data_manager_json["data_tables"]["dada2_species"] = data_manager_entry |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
125 |
1
bf7b2c14cabc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit a82e4981dac025c909244acd7127c215bdb519a7"
iuc
parents:
0
diff
changeset
|
126 with open(outjson, 'w') as jf: |
bf7b2c14cabc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit a82e4981dac025c909244acd7127c215bdb519a7"
iuc
parents:
0
diff
changeset
|
127 jf.write(json.dumps(data_manager_json, sort_keys=True)) |
0
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
128 |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
129 |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
130 if __name__ == '__main__': |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
131 parser = argparse.ArgumentParser(description='Create data manager json.') |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
132 parser.add_argument('--out', action='store', help='JSON filename') |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
133 parser.add_argument('--dataset', action='store', help='Download data set name') |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
134 args = parser.parse_args() |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
135 |
f57c13f5878b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
iuc
parents:
diff
changeset
|
136 remote_dataset(args.dataset, args.out) |