Mercurial > repos > scottx611x > data_manager_fetch_gene_annotation
changeset 0:0442068f5c91 draft
Uploaded
author | scottx611x |
---|---|
date | Tue, 26 Apr 2016 13:33:16 -0400 |
parents | |
children | ff7e10e82d89 |
files | data_manager_gene_annotation/data_manager/data_manager.py data_manager_gene_annotation/data_manager/gene_annotation_fetcher.xml data_manager_gene_annotation/data_manager_conf.xml data_manager_gene_annotation/tool-data/gene_annotation.loc.sample data_manager_gene_annotation/tool_data_table_conf.xml.sample |
diffstat | 4 files changed, 124 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_gene_annotation/data_manager/data_manager.py Tue Apr 26 13:33:16 2016 -0400 @@ -0,0 +1,76 @@ +import argparse +import datetime +import json +import os +import shutil +import sys +import tarfile +import urllib2 +import zipfile + +parser = argparse.ArgumentParser(description='Create data manager json.') +parser.add_argument('--out', dest='output', action='store', help='JSON filename') +parser.add_argument('--name', dest='name', action='store', default=str(datetime.date.today()), help='Data table entry unique ID') +parser.add_argument('--url', dest='url', action='store', help='Download URL', default="http://www.scott-ouellette.com/gene_annotations/chr1-hg19_genes.gtf") + +args = parser.parse_args() + +def url_download(url, workdir): + if not os.path.exists(workdir): + os.makedirs(workdir) + file_path = os.path.join(workdir, 'download.dat') + if not os.path.exists(workdir): + os.makedirs(workdir) + src = None + dst = None + hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', + 'Accept-Encoding': 'none', + 'Accept-Language': 'en-US,en;q=0.8', + 'Connection': 'keep-alive'} + try: + req = urllib2.Request(url, headers=hdr) + src = urllib2.urlopen(req) + dst = open(file_path, 'wb') + while True: + chunk = src.read(2**10) + if chunk: + dst.write(chunk) + else: + break + except Exception as e: + print e, "FUCK" + finally: + if src: + src.close() + if dst: + dst.close() + if tarfile.is_tarfile(file_path): + fh = tarfile.open(file_path, 'r:*') + elif zipfile.is_zipfile(file_path): + fh = zipfile.ZipFile(file_path, 'r') + else: + return + fh.extractall(workdir) + os.remove(file_path) + + +def main(args): + workdir = os.path.join(os.getcwd(), 'gene_annotation') + url_download(args.url, workdir) + data_manager_entry = {} + data_manager_entry['value'] = args.name.lower() + data_manager_entry['name'] = args.name + data_manager_entry['path'] = args.output + data_manager_json = dict(data_tables=dict(gene_annotation=data_manager_entry)) + params = json.loads(open(args.output).read()) + target_directory = params['output_data'][0]['extra_files_path'] + os.mkdir(target_directory) + output_path = os.path.abspath(os.path.join(os.getcwd(), 'gene_annotation')) + for filename in os.listdir(workdir): + shutil.move(os.path.join(output_path, filename), target_directory) + file(args.output, 'w').write(json.dumps(data_manager_json)) + +if __name__ == '__main__': + main(args)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_gene_annotation/data_manager/gene_annotation_fetcher.xml Tue Apr 26 13:33:16 2016 -0400 @@ -0,0 +1,25 @@ +<?xml version="1.0"?> +<tool id="gene_annotation_fetcher_data_manager" name="Gene Annotation Fetch" tool_type="manage_data" version="1.0.0"> + <description>gene annotation fetcher</description> + <stdio> + <exit_code description="Error" level="fatal" range="1:" /> + </stdio> + <command interpreter="python"> + <![CDATA[ + data_manager.py --out "${out_file}" + #if $gene_annotation_url: + --url "${gene_annotation_url}" + #end if + #if $database_name: + --name "${database_name}" + #end if + ]]> + </command> + <inputs> + <param help="Enter a unique identifier, or leave blank for today's date" label="Name for this database" name="database_name" type="text" optional="True" /> + <param label="Enter URL for gene annotation files" name="gene_annotation_url" type="text" /> + </inputs> + <outputs> + <data format="data_manager_json" name="out_file" /> + </outputs> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_gene_annotation/data_manager_conf.xml Tue Apr 26 13:33:16 2016 -0400 @@ -0,0 +1,15 @@ +<?xml version="1.0"?> +<data_managers> + + <data_manager tool_file="data_manager/gene_annotation_fetcher.xml" id="gene_annotation_fetcher" version="1.0.0"> + <data_table name="gene_annotation"> + <output> + <column name="value" /> + <column name="dbkey" /> + <column name="name" /> + <column name="path"/> + </output> + </data_table> + </data_manager> + +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_gene_annotation/tool_data_table_conf.xml.sample Tue Apr 26 13:33:16 2016 -0400 @@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of gene annotation data --> + <table name="gene_annotation" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/gene_annotation.loc" /> + </table> +</tables> \ No newline at end of file