Mercurial > repos > davidvanzessen > fetch_vep_cache_data
changeset 0:de3516853bde draft default tip
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 30 Nov 2018 09:56:38 -0500 |
parents | |
children | |
files | data_manager/fetch_vep_cache_data.py data_manager/fetch_vep_cache_data.xml data_manager_conf.xml tool-data/vep_cache_data.loc.sample |
diffstat | 4 files changed, 116 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/fetch_vep_cache_data.py Fri Nov 30 09:56:38 2018 -0500 @@ -0,0 +1,54 @@ +import argparse +import os +import json +import re +import pprint +import subprocess +import sys + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Use VEP INSTALL.pl to download/process the cache for an assembly") + parser.add_argument("--output-file") + parser.add_argument("--output-dir") + parser.add_argument("--assembly") + args = parser.parse_args() + + output_file = args.output_file + output_dir = args.output_dir + assembly = args.assembly + extra = args.assembly + + with open(output_file) as output_file_handle: + params = json.loads(output_file_handle.read()) + + print(output_file) + print(output_dir) + print(assembly) + pprint.pprint(params) + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + vep_install_cmd = ["vep_install", "--NO_HTSLIB", "-a alcf", "--CACHEDIR", output_dir, "--SPECIES", assembly] + vep_install_cmd = "vep_install --NO_HTSLIB -a alcf --CACHEDIR {0} --SPECIES {1}".format(output_dir, assembly) + print("Running INSTALL.pl") + #print(" ".join(vep_install_cmd)) + print(vep_install_cmd) + exit_code = subprocess.call(vep_install_cmd, cwd=output_dir, shell=True) + #process = subprocess.Popen(vep_install_cmd, shell=True, cwd=output_dir) + + print(exit_code) + + output_dict = dict( + data_tables=dict( + vep_cache_data=[{ + "path": output_dir, + "dbkey": assembly, + "name": assembly, + "value": assembly + }] + ) + ) + with open(output_file, 'w') as output_file_handle: + output_file_handle.write(json.dumps(output_dict)) + sys.exit(0) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/fetch_vep_cache_data.xml Fri Nov 30 09:56:38 2018 -0500 @@ -0,0 +1,37 @@ +<tool id="fetch_vep_cache_data" name="VEP Cache" version="0.0.1" tool_type="manage_data"> + <requirements> + <requirement type="package" version="94.5">ensembl-vep</requirement> + </requirements> + <description>fetching</description> + + <command detect_errors="exit_code"> +python $__tool_directory__/fetch_vep_cache_data.py +--output-file ${ out_file } +--output-dir ${ out_file.extra_files_path } +--assembly ${ dbkey }${ extra } + </command> + <inputs> + <param name="dbkey" type="select" label="Species"> + <option value="homo_sapiens">Human</option> + <option value="mus_musculus">Mouse</option> + </param> + <param name="extra" type="select" label="Get the Ensembl, Refseq or merged Ensembl/Refseq"> + <option value="" selected="true">Ensembl</option> + <option value="_refseq">Refseq</option> + <option value="_merged">Merged</option> + </param> + </inputs> + <outputs> + <data name="out_file" format="data_manager_json"/> + </outputs> + <!-- + <tests> + <test> + DON'T FORGET TO DEFINE SOME TOOL TESTS + </test> + </tests> + --> + <help> + There is no help + </help> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Fri Nov 30 09:56:38 2018 -0500 @@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/fetch_vep_cache_data.xml" id="fetch_vep_cache_data"> + <data_table name="vep_cache_data"> + <output> + <column name="value" /> + <column name="dbkey" /> + <column name="name" /> + <column name="path" output_ref="out_file" > + <move type="directory"> + <source>${path}</source> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/seq/${path}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/seq/${path}</value_translation> + </column> + </output> + </data_table> + </data_manager> +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/vep_cache_data.loc.sample Fri Nov 30 09:56:38 2018 -0500 @@ -0,0 +1,6 @@ +# VEP data cache location +# <value> <dbkey> <name> <path> +# <value> short name of the entry +# <dbkey> galaxy ref key +# <name> display name of the entry +# <path> path to the directory of the cache dir \ No newline at end of file