Mercurial > repos > iuc > dram_set_database_locations
comparison dram_set_database_locations.py @ 0:6e52f03e612d draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dram commit 52575ece22fcdbb6fc3aa3582ea377075aaa4db1
author | iuc |
---|---|
date | Thu, 01 Sep 2022 17:16:35 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:6e52f03e612d |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import os | |
5 import subprocess | |
6 | |
7 parser = argparse.ArgumentParser() | |
8 | |
9 parser.add_argument('--db_version', action='store', dest='db_version', help='Version of DRAM databases') | |
10 parser.add_argument('--skip_uniref', action='store_true', dest='skip_uniref', default=False, help='Flag to Download and process uniref') | |
11 parser.add_argument('--galaxy_data_manager_data_path', action='store', dest='galaxy_data_manager_data_path', help='Absolute Galaxy data manager data path') | |
12 parser.add_argument('--output', action='store', dest='output', help='Output file') | |
13 | |
14 args = parser.parse_args() | |
15 | |
16 | |
17 def get_new_dram_config_entry(db_version, old_entry, new_base_path): | |
18 # Example old_entry: | |
19 # KOfam db: /home/galaxies/gvk/jwd/003/3045/working/dataset_4268_files/kofam_profiles.hmm | |
20 base_path, file_name = os.path.split(old_entry) | |
21 # The new entry must be GALAXY_DATA_MANAGER_DATA_PATH/DRAM/${value}/file_name | |
22 return os.path.join(new_base_path, 'DRAM', db_version, file_name) | |
23 | |
24 | |
25 # At this point the DRAM config will look something like this. | |
26 # Processed search databases | |
27 # KEGG db: None | |
28 # KOfam db: /home/galaxies/gvk/jwd/003/3045/working/dataset_4268_files/kofam_profiles.hmm | |
29 # KOfam KO list: /home/galaxies/gvk/jwd/003/3045/working/dataset_4268_files/kofam_ko_list.tsv | |
30 # UniRef db: None | |
31 # Pfam db: /home/galaxies/gvk/jwd/003/3045/working/dataset_4268_files/pfam.mmspro | |
32 # dbCAN db: /home/galaxies/gvk/jwd/003/3045/working/dataset_4268_files/dbCAN-HMMdb-V10.txt | |
33 # RefSeq Viral db: /home/galaxies/gvk/jwd/003/3045/working/dataset_4268_files/refseq_viral.20220707.mmsdb | |
34 # MEROPS peptidase db: /home/galaxies/gvk/jwd/003/3045/working/dataset_4268_files/peptidases.20220707.mmsdb | |
35 # VOGDB db: /home/galaxies/gvk/jwd/003/3045/working/dataset_4268_files/vog_latest_hmms.txt | |
36 # | |
37 # Descriptions of search database entries | |
38 # Pfam hmm dat: /home/galaxies/gvk/jwd/003/3045/working/dataset_4268_files/Pfam-A.hmm.dat.gz | |
39 # dbCAN family activities: /home/galaxies/gvk/jwd/003/3045/working/dataset_4268_files/CAZyDB.07292021.fam-activities.txt | |
40 # VOG annotations: /home/galaxies/gvk/jwd/003/3045/working/dataset_4268_files/vog_annotations_latest.tsv.gz | |
41 # | |
42 # Description db: /home/galaxies/gvk/jwd/003/3045/working/dataset_4268_files/description_db.sqlite | |
43 # | |
44 # DRAM distillation sheets | |
45 # Genome summary form: /home/galaxies/gvk/jwd/003/3045/working/dataset_4268_files/genome_summary_form.20220707.tsv | |
46 # Module step form: /home/galaxies/gvk/jwd/003/3045/working/dataset_4268_files/module_step_form.20220707.tsv | |
47 # ETC module database: /home/galaxies/gvk/jwd/003/3045/working/dataset_4268_files/etc_mdoule_database.20220707.tsv | |
48 # Function heatmap form: /home/galaxies/gvk/jwd/003/3045/working/dataset_4268_files/function_heatmap_form.20220707.tsv | |
49 # AMG database: /home/galaxies/gvk/jwd/003/3045/working/dataset_4268_files/amg_database.20220707.tsv | |
50 | |
51 # Write the current DRAM CONFIG to a file for processing. | |
52 cmd = 'DRAM-setup.py print_config > dram_config.txt' | |
53 subprocess.check_call(cmd, shell=True) | |
54 | |
55 # Update the database locations that DRAM sets in it's CONFIG | |
56 # to point to the configured GALAXY_DATA_MANAGER_DATA_PATH location | |
57 # for the DRAM databases. | |
58 cmd = 'DRAM-setup.py set_database_locations' | |
59 with open('dram_config.txt', 'r') as fh: | |
60 for line in fh: | |
61 line = line.rstrip('\r\n') | |
62 if line.startswith('KOfam db:'): | |
63 cmd = '%s --kofam_hmm_loc %s' % (cmd, get_new_dram_config_entry(args.db_version, line, args.galaxy_data_manager_data_path)) | |
64 elif line.startswith('KOfam KO list:'): | |
65 cmd = '%s --kofam_ko_list_loc %s' % (cmd, get_new_dram_config_entry(args.db_version, line, args.galaxy_data_manager_data_path)) | |
66 elif line.startswith('UniRef db:'): | |
67 if not args.skip_uniref: | |
68 cmd = '%s --uniref_db_loc %s' % (cmd, get_new_dram_config_entry(args.db_version, line, args.galaxy_data_manager_data_path)) | |
69 elif line.startswith('Pfam db:'): | |
70 cmd = '%s --pfam_db_loc %s' % (cmd, get_new_dram_config_entry(args.db_version, line, args.galaxy_data_manager_data_path)) | |
71 elif line.startswith('dbCAN db:'): | |
72 cmd = '%s --dbcan_db_loc %s' % (cmd, get_new_dram_config_entry(args.db_version, line, args.galaxy_data_manager_data_path)) | |
73 elif line.startswith('RefSeq Viral db:'): | |
74 cmd = '%s --viral_db_loc %s' % (cmd, get_new_dram_config_entry(args.db_version, line, args.galaxy_data_manager_data_path)) | |
75 elif line.startswith('MEROPS peptidase db:'): | |
76 cmd = '%s --peptidase_db_loc %s' % (cmd, get_new_dram_config_entry(args.db_version, line, args.galaxy_data_manager_data_path)) | |
77 elif line.startswith('VOGDB db:'): | |
78 cmd = '%s --vogdb_db_loc %s' % (cmd, get_new_dram_config_entry(args.db_version, line, args.galaxy_data_manager_data_path)) | |
79 elif line.startswith('Pfam hmm dat:'): | |
80 cmd = '%s --pfam_hmm_dat %s' % (cmd, get_new_dram_config_entry(args.db_version, line, args.galaxy_data_manager_data_path)) | |
81 elif line.startswith('dbCAN family activities:'): | |
82 cmd = '%s --dbcan_fam_activities %s' % (cmd, get_new_dram_config_entry(args.db_version, line, args.galaxy_data_manager_data_path)) | |
83 elif line.startswith('VOG annotations:'): | |
84 cmd = '%s --vog_annotations %s' % (cmd, get_new_dram_config_entry(args.db_version, line, args.galaxy_data_manager_data_path)) | |
85 elif line.startswith('Description db:'): | |
86 cmd = '%s --description_db_loc %s' % (cmd, get_new_dram_config_entry(args.db_version, line, args.galaxy_data_manager_data_path)) | |
87 elif line.startswith('Genome summary form:'): | |
88 cmd = '%s --genome_summary_form_loc %s' % (cmd, get_new_dram_config_entry(args.db_version, line, args.galaxy_data_manager_data_path)) | |
89 elif line.startswith('Module step form:'): | |
90 cmd = '%s --module_step_form_loc %s' % (cmd, get_new_dram_config_entry(args.db_version, line, args.galaxy_data_manager_data_path)) | |
91 elif line.startswith('ETC module database:'): | |
92 cmd = '%s --etc_module_database_loc %s' % (cmd, get_new_dram_config_entry(args.db_version, line, args.galaxy_data_manager_data_path)) | |
93 elif line.startswith('Function heatmap form:'): | |
94 cmd = '%s --function_heatmap_form_loc %s' % (cmd, get_new_dram_config_entry(args.db_version, line, args.galaxy_data_manager_data_path)) | |
95 elif line.startswith('AMG database:'): | |
96 cmd = '%s --amg_database_loc %s' % (cmd, get_new_dram_config_entry(args.db_version, line, args.galaxy_data_manager_data_path)) | |
97 cmd = '%s --update_description_db' % cmd | |
98 subprocess.check_call(cmd, shell=True) | |
99 | |
100 # Write the new DRAM CONFIG to a file to the output. | |
101 cmd = 'DRAM-setup.py print_config > %s' % args.output | |
102 subprocess.check_call(cmd, shell=True) |