Mercurial > repos > iuc > humann_split_table
diff customizemetadata.py @ 2:44cacbe09bfc draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
author | iuc |
---|---|
date | Tue, 07 Feb 2023 19:40:08 +0000 |
parents | 506d4a349aa1 |
children |
line wrap: on
line diff
--- a/customizemetadata.py Wed May 19 17:02:03 2021 +0000 +++ b/customizemetadata.py Tue Feb 07 19:40:08 2023 +0000 @@ -6,8 +6,12 @@ import json import pickle import re +import sys +from importlib.metadata import version from pathlib import Path +from packaging.version import Version + def load_from_json(json_fp): ''' @@ -56,6 +60,7 @@ 'taxonomy': in_metadata['taxonomy'], 'merged_taxon': {} } + # transform merged_taxons tuple keys to string for k in in_metadata['merged_taxon']: n = ' , '.join(k) @@ -65,6 +70,38 @@ dump_to_json(out_metadata, json_fp) +def validate_map_version(infile, file_type): + ''' + Check conformity of a user-provided pkl file to Metaphlan SGB (>= v4.0). + + :param infile: Path to input Pickle/JSON file + :param file_type: String definining file type, pkl or JSON. Case-insensitive + ''' + file_type = file_type.lower() + if file_type == 'pkl' or file_type == 'pickle': + # load metadata from Pickle file + with bz2.BZ2File(infile, 'r') as pkl_f: + in_metadata = pickle.load(pkl_f) + elif file_type == 'json': + in_metadata = load_from_json(infile) + else: + raise ValueError("Unsupported file type to validate.") + + # Get metaphlan version in $PATH + metaphlan_version = Version(version('metaphlan')) + + # Ensure that there are 8 taxonomy levels separated with "|"s. + # v3 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276', 4404432) + # v4 DB release encodes the taxids as: ('2|1224|1236|91347|543|547|354276|', 4404432) + for k in in_metadata['taxonomy']: + if (in_metadata['taxonomy'][k][0].count('|') != 7 and metaphlan_version >= Version('4')) or (in_metadata['taxonomy'][k][0].count('|') != 6 and metaphlan_version < Version('4')): + # raise ValueError("Missing/Extra values in GCA list") + print("The input taxonomy mapping file %s is incompatible with Metaphlan v.%s in $PATH." % (infile, metaphlan_version)) + sys.exit(42) + + print("%s is compatible with Metaphlan v.%s." % (infile, metaphlan_version)) + + def transform_json_to_pkl(json_fp, pkl_fp): ''' Read JSON file and drop it to a Pickle file @@ -80,6 +117,7 @@ 'taxonomy': in_metadata['taxonomy'], 'merged_taxon': {} } + # transform merged_taxons keys to tuple for k in in_metadata['merged_taxon']: n = ' , '.split(k) @@ -448,8 +486,10 @@ args = parser.parse_args() if args.function == 'transform_pkl_to_json': + validate_map_version(Path(args.pkl), 'pkl') transform_pkl_to_json(Path(args.pkl), Path(args.json)) elif args.function == 'transform_json_to_pkl': + validate_map_version(Path(args.json), 'json') transform_json_to_pkl(Path(args.json), Path(args.pkl)) elif args.function == 'add_marker': add_marker(