# HG changeset patch
# User iuc
# Date 1641321258 0
# Node ID 2a546f92b1ba76c460f444f3047e7c4ce04ccb6c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_packaged_annotation_data commit 339a6c16fb6d944d4e147b5192cbeb0ebd26d18e"
diff -r 000000000000 -r 2a546f92b1ba data_manager/install_packaged_annotation_data.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/install_packaged_annotation_data.py Tue Jan 04 18:34:18 2022 +0000
@@ -0,0 +1,150 @@
+#!/usr/bin/env python
+
+import argparse
+import datetime
+import json
+import os
+import re
+from urllib.request import urlretrieve
+
+import yaml
+
+
+class PackagedAnnotationMeta():
+ @classmethod
+ def from_file(cls, fname):
+ meta = yaml.safe_load(open(fname))
+ return cls(meta)
+
+ def __init__(self, meta_dict):
+ if 'build' not in meta_dict:
+ meta_dict['build'] = datetime.date.today().isoformat()
+ if 'volume' not in meta_dict:
+ meta_dict['volume'] = 1
+
+ required_meta = ['name', 'build', 'volume', 'refgenome', 'records']
+ for key in required_meta:
+ if not meta_dict.get(key):
+ raise KeyError(
+ 'Required info "{0}" missing from metadata'
+ .format(key)
+ )
+ required_record_meta = ['id', 'name', 'version', 'format', 'source']
+ for key in required_record_meta:
+ for record in meta_dict['records']:
+ if not record.get(key):
+ raise KeyError(
+ '{0}\n'
+ 'Required info "{1}" missing from record metadata'
+ .format(record, key)
+ )
+ self.meta = meta_dict
+ self.meta['id'] = self._get_id()
+
+ def _get_id(self):
+ components = [
+ self.meta['name'],
+ self.meta['refgenome'],
+ str(self.meta['volume']),
+ str(self.meta['build'])
+ ]
+ return '__'.join(
+ [
+ re.sub(r'[^a-zA-Z_0-9\-]', '', i.replace(' ', '_'))
+ for i in components
+ ]
+ )
+
+ def records(self, full_record_names=False):
+ for record in self.meta['records']:
+ ret = record.copy()
+ if full_record_names:
+ ret['name'] = self._full_record_name(record)
+ yield ret
+
+ def fullname(self):
+ return '{0} ({1}, vol:{2}/build:{3})'.format(
+ self.meta['name'],
+ self.meta['refgenome'],
+ self.meta['volume'],
+ self.meta['build']
+ )
+
+ def _full_record_name(self, record):
+ return '{0} ({1}, {2}; from {3}/vol:{4}/build:{5})'.format(
+ record['name'], record['version'],
+ self.meta['refgenome'],
+ self.meta['name'],
+ self.meta['volume'],
+ self.meta['build']
+ )
+
+ def dump(self, fname):
+ with open(fname, 'w') as fo:
+ yaml.dump(
+ self.meta, fo, allow_unicode=False, default_flow_style=False
+ )
+
+
+def fetch_data(source_url, target_file):
+ final_file, headers = urlretrieve(source_url, target_file)
+
+
+def meta_to_dm_records(meta, dbkey=None):
+ data_table_rows = []
+ for record in meta.records(full_record_names=True):
+ data_table_rows.append(
+ {
+ 'value': '{0}:{1}'.format(meta.meta['id'], record['id']),
+ 'dbkey': dbkey or meta.meta['refgenome'],
+ 'data_name': record['name'],
+ 'data_id': record['id'],
+ 'data_format': record['format'],
+ 'package_id': meta.meta['id'],
+ 'package_name': meta.fullname(),
+ 'path': '{0}/{1}'.format(
+ meta.meta['volume'],
+ meta.meta['build']
+ )
+ }
+ )
+ return data_table_rows
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument('metadata')
+ parser.add_argument(
+ '-o', '--galaxy-datamanager-json',
+ required=True
+ )
+ parser.add_argument('-t', '--target-directory', default=None)
+ parser.add_argument('--dbkey', default=None)
+ args = parser.parse_args()
+
+ if args.target_directory:
+ if not os.path.isdir(args.target_directory):
+ os.mkdir(args.target_directory)
+ else:
+ args.target_directory = os.getcwd()
+
+ meta = PackagedAnnotationMeta.from_file(args.metadata)
+
+ for record in meta.records():
+ fetch_data(
+ record['source'],
+ os.path.join(args.target_directory, record['id'])
+ )
+
+ meta.dump(os.path.join(args.target_directory, 'meta.yml'))
+
+ # Finally, we prepare the metadata for the new data table record ...
+ data_manager_dict = {
+ 'data_tables': {
+ 'packaged_annotation_data': meta_to_dm_records(meta, args.dbkey)
+ }
+ }
+
+ # ... and save it to the json results file
+ with open(args.galaxy_datamanager_json, 'w') as fh:
+ json.dump(data_manager_dict, fh, sort_keys=True)
diff -r 000000000000 -r 2a546f92b1ba data_manager/install_packaged_annotation_data.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/install_packaged_annotation_data.xml Tue Jan 04 18:34:18 2022 +0000
@@ -0,0 +1,40 @@
+
+ fetching
+
+ python
+ pyyaml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool fetches and installs packages of genome annotation datasets that are
+not tightly bound to specific tools, but generic enough to be of use for many
+different tools.
+
+It populates the "packaged_annotation_data" data table.
+
+
diff -r 000000000000 -r 2a546f92b1ba data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Tue Jan 04 18:34:18 2022 +0000
@@ -0,0 +1,25 @@
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 2a546f92b1ba test-data/dbkeys.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dbkeys.loc Tue Jan 04 18:34:18 2022 +0000
@@ -0,0 +1,2 @@
+#
+hg19 Human hg19 a_path
diff -r 000000000000 -r 2a546f92b1ba test-data/from_test-meta.data_manager.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/from_test-meta.data_manager.json Tue Jan 04 18:34:18 2022 +0000
@@ -0,0 +1,1 @@
+{"data_tables": {"packaged_annotation_data": [{"data_format": "bed", "data_id": "hotspots.data", "data_name": "CancerHotspots (v2, hg19; from Cancer variant data/vol:1/build:1)", "dbkey": "hg19", "package_id": "Cancer_variant_data__hg19__1__1", "package_name": "Cancer variant data (hg19, vol:1/build:1)", "path": "1/1", "value": "Cancer_variant_data__hg19__1__1:hotspots.data"}, {"data_format": "bed", "data_id": "civic.variants", "data_name": "CIViC variants (01-Feb-2019, hg19; from Cancer variant data/vol:1/build:1)", "dbkey": "hg19", "package_id": "Cancer_variant_data__hg19__1__1", "package_name": "Cancer variant data (hg19, vol:1/build:1)", "path": "1/1", "value": "Cancer_variant_data__hg19__1__1:civic.variants"}]}}
\ No newline at end of file
diff -r 000000000000 -r 2a546f92b1ba test-data/packaged_annotation_data.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/packaged_annotation_data.loc Tue Jan 04 18:34:18 2022 +0000
@@ -0,0 +1,3 @@
+#
+#
+
diff -r 000000000000 -r 2a546f92b1ba test-data/test-meta.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-meta.yml Tue Jan 04 18:34:18 2022 +0000
@@ -0,0 +1,18 @@
+name: Cancer variant data
+build: 1
+refgenome: hg19
+records:
+ - id: hotspots.data
+ name: CancerHotspots
+ version: v2
+ doi: 10.1158/2159-8290.CD-17-0321
+ format: bed
+ source: https://zenodo.org/api/files/a89ff3af-261e-4c24-a9fb-5050ce8807b2/hotspots.bed
+ checksum: md5:ec8ec9afd4ae4935ac474e150e4e90aa
+ - id: civic.variants
+ name: CIViC variants
+ version: 01-Feb-2019
+ doi: http://dx.doi.org/10.1038/ng.3774
+ format: bed
+ source: https://zenodo.org/api/files/a89ff3af-261e-4c24-a9fb-5050ce8807b2/01-Feb-2019-CIVic.bed
+ checksum: md5:9e42bb7492be9e0011bf29b7e4f83f41
diff -r 000000000000 -r 2a546f92b1ba tool-data/dbkeys.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/dbkeys.loc.sample Tue Jan 04 18:34:18 2022 +0000
@@ -0,0 +1,1 @@
+#
diff -r 000000000000 -r 2a546f92b1ba tool-data/packaged_annotation_data.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/packaged_annotation_data.loc.sample Tue Jan 04 18:34:18 2022 +0000
@@ -0,0 +1,20 @@
+#This file describes genome annotation data packages and their contents
+#available on the server.
+#Such data can consist of any number of individual files in a variety of
+#formats (e.g., bed, vcf, tabular) describing any features with respect to the
+#genome with the associated dbkey.
+#The directory referenced in the column of the table is expected to
+#contain the file listed under and a meta.yml file with details about
+#the annotation package volume and all of its contents.
+#This data table has the format (white space characters are TAB characters):
+#
+#
+#
+#So, packaged_annotation_data.loc tables could look like this:
+#
+#dbSNP_hg19__1__1:dbSNP.tidy hg19 dbSNP tidy (b147.20160601, hg19; from dbSNP/vol:1/build:1) dbSNP.tidy vcf_bgzip dbSNP__hg19__1__1 dbSNP (hg19, vol:1/build:1) /path/to/packaged_annotation_data/hg19/dbSNP/1/1
+#Cancer_variant_data__1__1:hotspots.data hg19 CancerHotspots (v2, hg19; from Cancer variant data/vol:1/build:1) hotspots.data bed Cancer_variant_data__hg19__1__1 Cancer variant data (hg19, vol:1/build:1) /path/to/packaged_annotation_data/hg19/Cancer_variant_data/1/1
+#Cancer_genes_data__1__1:civic.genes hg19 CIViC genes (01-Feb-2019, hg19; from Cancer gene data/vol:1/build:1) civic.genes tabular Cancer_gene_data__hg19__1__1 Cancer gene data (hg19, vol:1/build:1) /path/to/packaged_annotation_data/hg19/Cancer_variant_data/1/1
+#SARS-CoV-2_amplicon_primer_sets__NC_045512.2__1__1:ARTICv3 NC_045512.2 ARTIC (v3, NC_045512.2; from SARS-CoV-2 amplicon primer sets/vol:1/build:1) ARTICv3 bed6 SARS-CoV-2_amplicon_primer_sets__NC_045512.2__1__1 SARS-CoV-2 amplicon primer sets (NC_045512.2, vol:1/build:1) /path/to/packaged_annotation_data/NC_045512.2/SARS-CoV-2_amplicon_primer_sets/1/1
+#SARS-CoV-2_amplicon_primer_sets__NC_045512.2__1__1:ARTICv4 NC_045512.2 ARTIC (v4, NC_045512.2; from SARS-CoV-2 amplicon primer sets/vol:1/build:1) ARTICv4 bed6 SARS-CoV-2_amplicon_primer_sets__NC_045512.2__1__1 SARS-CoV-2 amplicon primer sets (NC_045512.2, vol:1/build:1) /path/to/packaged_annotation_data/NC_045512.2/SARS-CoV-2_amplicon_primer_sets/1/1
+#
diff -r 000000000000 -r 2a546f92b1ba tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Tue Jan 04 18:34:18 2022 +0000
@@ -0,0 +1,12 @@
+
+
+
+ value, dbkey, data_name, data_id, data_format, package_id, package_name, path
+
+
+
+
+ value, name, len_path
+
+
+
diff -r 000000000000 -r 2a546f92b1ba tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Tue Jan 04 18:34:18 2022 +0000
@@ -0,0 +1,12 @@
+
+
+
+ value, dbkey, data_name, data_id, data_format, package_id, package_name, path
+
+
+
+
+ value, name, len_path
+
+
+